Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
Please login to access the resource
openSUSE:11.4
gcc41
gcc-amdfam10-suse-12.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File gcc-amdfam10-suse-12.patch of Package gcc41
Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c.orig +++ gcc/config/i386/i386.c @@ -938,6 +938,9 @@ const int x86_cmpxchg = ~m_386; const int x86_xadd = ~m_386; const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC | m_AMDFAM10; +/* Use Vector Converts instead of Scalar Converts. Added for AMDFAM10 */ +const int x86_use_vector_converts = m_AMDFAM10; + /* In case the average insn count for single function invocation is lower than this constant, emit fast (but longer) prologue and epilogue code. */ Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h.orig +++ gcc/config/i386/i386.h @@ -168,6 +168,7 @@ extern const int x86_use_incdec; extern const int x86_pad_returns; extern const int x86_partial_flag_reg_stall; extern int x86_prefetch_sse; +extern const int x86_use_vector_converts; #define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK) #define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK) @@ -217,6 +218,7 @@ extern int x86_prefetch_sse; #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK) #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK) #define TARGET_PREFETCH_SSE (x86_prefetch_sse) +#define TARGET_USE_VECTOR_CONVERTS (x86_use_vector_converts & TUNEMASK) #define TARGET_SHIFT1 (x86_shift1 & TUNEMASK) #define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK) #define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK) Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md.orig +++ gcc/config/i386/i386.md @@ -162,6 +162,11 @@ (UNSPEC_INSERTQI 133) (UNSPEC_INSERTQ 134) + ; Other AMDFAM10 Patterns + (UNSPEC_CVTSI2SS_AMDFAM10 140) + (UNSPEC_CVTSI2SD_AMDFAM10 141) + (UNSPEC_MOVDSI2SF_AMDFAM10 142) + (UNSPEC_MOVDSI2DF_AMDFAM10 143) ]) (define_constants @@ -4474,7 +4479,46 @@ [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] "TARGET_80387 || TARGET_SSE_MATH" - "") + " + { + /* For converting SI to SF, the following code is faster in AMDFAM10 + mov mem32, reg32 + movd xmm, mem32 + cvtdq2ps xmm,xmm + */ + + if (TARGET_USE_VECTOR_CONVERTS && !optimize_size + && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH + && optimize ) + { + rtx tmp; + tmp = assign_386_stack_local (SImode, SLOT_TEMP); + emit_move_insn (tmp, operands[1]); + emit_insn (gen_sse2_movdsi2sf_amdfam10 (operands[0], tmp)); + emit_insn (gen_sse2_cvtdq2ps_amdfam10 (operands[0], operands[0])); + DONE; + } + } + ") + +(define_insn "sse2_cvtdq2ps_amdfam10" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "x")] + UNSPEC_CVTSI2SS_AMDFAM10))] + "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_movdsi2sf_amdfam10" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SI 1 "memory_operand" "m")] + UNSPEC_MOVDSI2SF_AMDFAM10))] + "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + (define_insn "*floatsisf2_mixed" [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f") @@ -4589,7 +4633,45 @@ [(set (match_operand:DF 0 "register_operand" "") (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") + " + { + /* For converting SI to DF, the following code is faster in AMDFAM10 + mov mem32, reg32 + movd xmm, mem32 + cvtdq2pd xmm,xmm + */ + + if (TARGET_USE_VECTOR_CONVERTS && !optimize_size + && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH + && optimize) + { + rtx tmp; + tmp = assign_386_stack_local (SImode, SLOT_TEMP); + emit_move_insn (tmp, operands[1]); + emit_insn (gen_sse2_movdsi2df_amdfam10 (operands[0], tmp)); + emit_insn (gen_sse2_cvtdq2pd_amdfam10 (operands[0], operands[0])); + DONE; + } + } + ") + +(define_insn "sse2_cvtdq2pd_amdfam10" + [(set (match_operand:DF 0 "register_operand" "=Y") + (unspec:DF [(match_operand:DF 1 "register_operand" "Y")] + UNSPEC_CVTSI2SD_AMDFAM10))] + "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_movdsi2df_amdfam10" + [(set (match_operand:DF 0 "register_operand" "=Y") + (unspec:DF [(match_operand:SI 1 "memory_operand" "m")] + UNSPEC_MOVDSI2DF_AMDFAM10))] + "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) (define_insn "*floatsidf2_mixed" [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor