Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
DISCONTINUED:openSUSE:11.2
gcc41
gcc-amdfam10-suse-3.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File gcc-amdfam10-suse-3.patch of Package gcc41
Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c.orig +++ gcc/config/i386/i386.c @@ -851,12 +851,71 @@ const int x86_arch_always_fancy_math_387 this option on P4 brings over 20% SPECfp regression, while enabling it on K8 brings roughly 2.4% regression that can be partly masked by careful scheduling of moves. */ + +/* +Code generation for scalar reg-reg moves: + if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) + movaps reg, reg + else + movss reg, reg + + if (x86_sse_partial_reg_dependency == true) + movapd reg, reg + else + movsd reg, reg + +Code generation for scalar loads of double precision data: + if (x86_sse_split_regs == true) + movlpd mem, reg (gas syntax) + else + movsd mem, reg + +Code generation for unaligned packed loads of single precision data: + if (x86_sse_partial_reg_dependency == true) + { + if (x86_sse_unaligned_move_optimal) + { + movups mem, reg + } + else + { + xorps reg3, reg3 + movaps reg3, reg2 + movlps mem, reg2 + movhps mem+8, reg2 + } + } + else + { + movlps mem, reg + movhps mem+8, reg + } +Code generation for unaligned packed loads of double precision data: + if (x86_sse_split_regs == true) + { + movlpd mem, reg + movhpd mem+8, reg + } + else + { + if (x86_sse_unaligned_move_optimal) + { + movupd mem, reg + } + else + { + movsd mem, reg2 + movhpd mem+8, reg2 + } + } +*/ const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_AMDFAM10; /* Set for machines where the type and dependencies are resolved on SSE register parts instead of whole registers, so we may maintain just lower part of scalar values in proper format leaving the upper part undefined. */ const int x86_sse_split_regs = m_ATHLON_K8; +const int x86_sse_unaligned_move_optimal = m_AMDFAM10; const int x86_sse_typeless_stores = m_ATHLON_K8 | m_AMDFAM10; const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; const int x86_use_ffreep = m_ATHLON_K8 | m_AMDFAM10; @@ -8923,6 +8982,13 @@ ix86_expand_vector_move_misalign (enum m } else { + if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) + { + op0 = gen_lowpart (V2DFmode, op0); + op1 = gen_lowpart (V2DFmode, op1); + emit_insn (gen_sse2_movupd (op0, op1)); + return; + } /* ??? Not sure about the best option for the Intel chips. The following would seem to satisfy; the register is entirely cleared, breaking the dependency chain. We @@ -8942,7 +9008,16 @@ ix86_expand_vector_move_misalign (enum m else { if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) + { + if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) + { + op0 = gen_lowpart (V4SFmode, op0); + op1 = gen_lowpart (V4SFmode, op1); + emit_insn (gen_sse_movups (op0, op1)); + return; + } emit_move_insn (op0, CONST0_RTX (mode)); + } else emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h.orig +++ gcc/config/i386/i386.h @@ -158,6 +158,7 @@ extern const int x86_accumulate_outgoing extern const int x86_epilogue_using_move, x86_decompose_lea; extern const int x86_arch_always_fancy_math_387, x86_shift1; extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs; +extern const int x86_sse_unaligned_move_optimal; extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor; extern const int x86_use_ffreep; extern const int x86_inter_unit_moves, x86_schedule; @@ -207,6 +208,8 @@ extern int x86_prefetch_sse; #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK) #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ (x86_sse_partial_reg_dependency & TUNEMASK) +#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \ + (x86_sse_unaligned_move_optimal & TUNEMASK) #define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK) #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK) #define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor