Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
DISCONTINUED:openSUSE:11.2
libgcj41
gcc-amdfam10-suse-20.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File gcc-amdfam10-suse-20.patch of Package libgcj41
Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in.orig +++ gcc/Makefile.in @@ -1974,7 +1974,7 @@ tree-data-ref.o: tree-data-ref.c $(CONFI $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(TREE_DATA_REF_H) $(SCEV_H) tree-pass.h tree-chrec.h tree-vect-analyze.o: tree-vect-analyze.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) \ + $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(TARGET_H) $(BASIC_BLOCK_H) \ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ tree-vectorizer.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) tree-chrec.h tree-vect-transform.o: tree-vect-transform.c $(CONFIG_H) $(SYSTEM_H) \ Index: gcc/target-def.h =================================================================== --- gcc/target-def.h.orig +++ gcc/target-def.h @@ -352,6 +352,10 @@ Foundation, 51 Franklin Street, Fifth Fl #define TARGET_VECTOR_MODE_SUPPORTED_P hook_bool_mode_false #endif +#ifndef TARGET_VECTOR_MISALIGN_SUPPORTED_P +#define TARGET_VECTOR_MISALIGN_SUPPORTED_P hook_bool_mode_false +#endif + #ifndef TARGET_VECTOR_OPAQUE_P #define TARGET_VECTOR_OPAQUE_P hook_bool_tree_false #endif @@ -587,6 +591,7 @@ Foundation, 51 Franklin Street, Fifth Fl TARGET_VALID_POINTER_MODE, \ TARGET_SCALAR_MODE_SUPPORTED_P, \ TARGET_VECTOR_MODE_SUPPORTED_P, \ + TARGET_VECTOR_MISALIGN_SUPPORTED_P, \ TARGET_VECTOR_OPAQUE_P, \ TARGET_RTX_COSTS, \ TARGET_ADDRESS_COST, \ Index: gcc/target.h =================================================================== --- gcc/target.h.orig +++ gcc/target.h @@ -440,6 +440,9 @@ struct gcc_target for further details. */ bool (* vector_mode_supported_p) (enum machine_mode mode); + /* True if misaligned load-execute vector operations are allowed. */ + bool (* vector_misalign_supported_p) (enum machine_mode mode); + /* True if a vector is opaque. */ bool (* vector_opaque_p) (tree); Index: gcc/tree-vect-analyze.c =================================================================== --- gcc/tree-vect-analyze.c.orig +++ gcc/tree-vect-analyze.c @@ -25,6 +25,7 @@ Software Foundation, 51 Franklin Street, #include "tm.h" #include "ggc.h" #include "tree.h" +#include "target.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" @@ -709,7 +710,7 @@ vect_compute_data_ref_alignment (struct tree stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree ref = DR_REF (dr); - tree vectype; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree base, base_addr; bool base_aligned; tree misalign; @@ -718,6 +719,13 @@ vect_compute_data_ref_alignment (struct if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_compute_data_ref_alignment:"); + /* Check if target cares about alignment at all. */ + if (targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype))) + { + DR_MISALIGNMENT (dr) = 0; + return true; + } + /* Initialize misalignment to unknown. */ DR_MISALIGNMENT (dr) = -1; @@ -725,7 +733,6 @@ vect_compute_data_ref_alignment (struct aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); base = build_fold_indirect_ref (base_addr); - vectype = STMT_VINFO_VECTYPE (stmt_info); alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) Index: gcc/tree-vect-transform.c =================================================================== --- gcc/tree-vect-transform.c.orig +++ gcc/tree-vect-transform.c @@ -1693,7 +1693,12 @@ vectorizable_load (tree stmt, block_stmt vec_dest = vect_create_destination_var (scalar_dest, vectype); data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); - if (aligned_access_p (dr)) + /* If misaligned load-execute is supported, + treat it as aligned when the target supports it. */ + if ((targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype)) + && alignment_support_cheme == dr_aligned) + || (!targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype)) + && aligned_access_p (dr))) data_ref = build_fold_indirect_ref (data_ref); else { Index: gcc/tree-vectorizer.c =================================================================== --- gcc/tree-vectorizer.c.orig +++ gcc/tree-vectorizer.c @@ -1577,20 +1577,25 @@ vect_supportable_dr_alignment (struct da tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); enum machine_mode mode = (int) TYPE_MODE (vectype); + /* Definitely aligned access. */ if (aligned_access_p (dr)) return dr_aligned; /* Possibly unaligned access. */ - if (DR_IS_READ (dr)) { + /* Check if target cares about alignment at all. */ + if (targetm.vector_misalign_supported_p (mode)) + return dr_unaligned_supported; + + /* Check if target can do software pipeline. */ if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing && (!targetm.vectorize.builtin_mask_for_load || targetm.vectorize.builtin_mask_for_load ())) return dr_unaligned_software_pipeline; + /* If can't software pipeline the loads, check for misaligned loads. */ if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing) - /* Can't software pipeline the loads, but can at least do them. */ return dr_unaligned_supported; } Index: gcc/config/i386/crtmisalignsse.c =================================================================== --- /dev/null +++ gcc/config/i386/crtmisalignsse.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2006 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * In addition to the permissions in the GNU General Public License, the + * Free Software Foundation gives you unlimited permission to link the + * compiled version of this file with other programs, and to distribute + * those programs without any restriction coming from the use of this + * file. (The General Public License restrictions do apply in other + * respects; for example, they cover modification of the file, and + * distribution when not linked into another program.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * As a special exception, if you link this library with files + * compiled with GCC to produce an executable, this does not cause + * the resulting executable to be covered by the GNU General Public License. + * This exception does not however invalidate any other reasons why + * the executable file might be covered by the GNU General Public License. + */ + +#define CPUID_LARGEST_FN (0x00000000) /* Vendor ID and largest supported basic function. */ +#define CPUID_ID (0x00000001) /* Family, model and stepping. */ +#define CPUID_LARGEST_EX (0x80000000) /* Largest supported extended function. */ +#define CPUID_EX_FEATURES (0x80000001) /* Extended features. */ + +#define CPUID_SSEMISALIGN (1 << 7) /* Misaliged SSE support. */ + +#define CPUID_AMD0 (0x68747541) /* Auth */ +#define CPUID_AMD1 (0x69746E65) /* enti */ +#define CPUID_AMD2 (0x444D4163) /* cAMD */ + +#define CPUID_FAMILY_EX (20) /* Extended CPU family. */ +#define CPUID_FAMILY_EX_MASK (255) +#define CPUID_FAMILY (8) /* CPU family. */ +#define CPUID_FAMILY_MASK (15) +#define CPUID_MODEL_EX (16) /* Extended CPU model. */ +#define CPUID_MODEL_EX_MASK (15) +#define CPUID_MODEL (4) /* CPU model. */ +#define CPUID_MODEL_MASK (15) +#define CPUID_STEP (0) /* CPU stepping. */ +#define CPUID_STEP_MASK (15) + +#define FAMILY_AMD_GH (0x10) /* GH */ +#define MODEL_AMD_GH_A (0) /* GH rev. A */ + +#define MXCSR_MM_A (1 << 16) /* Misaligned SSE mask (GH rev. A). */ +#define MXCSR_MM (1 << 17) /* Misaligned SSE mask. */ + +#define FLAG_ID (1 << 21) /* CPUID support. */ + +#include <sys/types.h> +#include <stdint.h> + +#define CPUID(a) /* Execute CPUID. */ \ + __asm __volatile ("xchgl %%ebx, %1;" /* Save %ebx. */ \ + "cpuid;" \ + "xchgl %%ebx, %1;" /* Restore %ebx. */ \ + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) \ + : "a" (a)); + +static void __attribute__ ((constructor)) +check_misalign_sse (void) +{ + uint32_t eax, ebx, ecx, edx; + uint32_t cpuid [3], family, model, step; + +#ifndef __x86_64__ + /* All 64-bit targets support CPUID; + only check explicitly for 32-bit ones. */ + + /* Check if we can use CPUID. */ + __asm __volatile ("pushf;" /* Save flags. */ + "mov (%%esp), %0;" /* Copy flags into register. */ + "mov %0, %1;" /* Save'em. */ + "xor %2, %0;" /* Flip ID bit. */ + "push %0;" /* Try modified flags. */ + "popf;" + "pushf;" /* Read flags into register again. */ + "pop %0;" + "popf;" /* Restore flags. */ + : "=&r" (eax), "=&r" (ebx) + : "i" (FLAG_ID) + : "memory"); + + /* Check if flipping ID bit stuck. */ + if (((eax ^ ebx) & FLAG_ID) == 0) + return; +#endif + + /* Get the highest supported basic function. */ + CPUID (CPUID_LARGEST_FN); + + /* Check if basic functions used below are supported. */ + if (eax < CPUID_ID) + return; + + cpuid[0] = ebx; + cpuid[1] = edx; + cpuid[2] = ecx; + + /* Get the highest supported extended function (saving vendor string). */ + CPUID (CPUID_LARGEST_EX); + + /* Check if basic functions used below are supported. */ + if (eax < CPUID_EX_FEATURES) + return; + + /* Get the extended features. */ + CPUID (CPUID_EX_FEATURES); + + /* Check if misaligned SSE is supported. */ + if ((ecx & CPUID_SSEMISALIGN) == 0) + return; + + /* Get the CPU ID. */ + CPUID (CPUID_ID); + + family = ((eax >> CPUID_FAMILY_EX) & CPUID_FAMILY_EX_MASK) /* calculate family */ + + ((eax >> CPUID_FAMILY) & CPUID_FAMILY_MASK); + model = (((eax >> CPUID_MODEL_EX) & CPUID_MODEL_EX_MASK) << CPUID_MODEL) /* calculate model */ + + ((eax >> CPUID_MODEL) & CPUID_MODEL_MASK); + step = ((eax >> CPUID_STEP) & CPUID_STEP_MASK); /* calculate stepping */ + + /* Enable misaligned SSE. */ + uint32_t mxcsr = __builtin_ia32_stmxcsr (); + + if ( cpuid [0] == CPUID_AMD0 && cpuid [1] == CPUID_AMD1 && cpuid [2] == CPUID_AMD2 \ + && family == FAMILY_AMD_GH && model == MODEL_AMD_GH_A) + mxcsr |= MXCSR_MM_A; /* AMD GH rev. A */ + else + mxcsr |= MXCSR_MM; /* Others */ + + __builtin_ia32_ldmxcsr (mxcsr); +} Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c.orig +++ gcc/config/i386/i386.c @@ -1219,6 +1219,7 @@ static void ix86_setup_incoming_varargs tree, int *, int); static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); static bool ix86_vector_mode_supported_p (enum machine_mode); +static bool ix86_vector_misalign_supported_p (enum machine_mode); static int ix86_address_cost (rtx); static bool ix86_cannot_force_const_mem (rtx); @@ -1446,6 +1447,9 @@ static void x86_64_elf_select_section (t #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p +#undef TARGET_VECTOR_MISALIGN_SUPPORTED_P +#define TARGET_VECTOR_MISALIGN_SUPPORTED_P ix86_vector_misalign_supported_p + #ifdef HAVE_AS_TLS #undef TARGET_ASM_OUTPUT_DWARF_DTPREL #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel @@ -1500,8 +1504,8 @@ ix86_handle_option (size_t code, const c case OPT_msse: if (!value) { - target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A); - target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A; + target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE_MISALIGN); + target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE_MISALIGN; } return true; @@ -1589,7 +1593,8 @@ override_options (void) PTA_SSSE3 = 256, PTA_POPCNT= 512, PTA_ABM = 1024, - PTA_SSE4A = 2048 + PTA_SSE4A = 2048, + PTA_SSE_MISALIGN = 4096 } flags; } const processor_alias_table[] = @@ -1643,8 +1648,8 @@ override_options (void) {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT - | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_POPCNT - | PTA_ABM | PTA_SSE4A}, + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_SSE4A + | PTA_POPCNT | PTA_ABM}, }; int const pta_size = ARRAY_SIZE (processor_alias_table); @@ -1799,6 +1804,9 @@ override_options (void) if (processor_alias_table[i].flags & PTA_SSE4A && !(target_flags_explicit & MASK_SSE4A)) target_flags |= MASK_SSE4A; + if (processor_alias_table[i].flags & PTA_SSE_MISALIGN + && !(target_flags_explicit & MASK_SSE_MISALIGN)) + target_flags |= MASK_SSE_MISALIGN; if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) error ("CPU you selected does not support x86-64 " "instruction set"); @@ -18878,6 +18886,17 @@ ix86_vector_mode_supported_p (enum machi return true; if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) return true; + + return false; +} + +/* Implements target hook vector_mode_misalign_p. */ +static bool +ix86_vector_misalign_supported_p (enum machine_mode mode) +{ + if (TARGET_SSE_MISALIGN && ix86_vector_mode_supported_p (mode)) + return true; + return false; } Index: gcc/config/i386/i386.opt =================================================================== --- gcc/config/i386/i386.opt.orig +++ gcc/config/i386/i386.opt @@ -205,6 +205,10 @@ msse4a Target Report Mask(SSE4A) Support new AMDFAM10 SSE4A built-in functions and code generation +mssemisalign +Target Report Mask(SSE_MISALIGN) +Support misaligned memory in vector operations. + mpopcnt Target Report Mask(POPCNT) Support new AMDFAM10 Advanced Bit Manipulation (ABM) popcount built-in functions and code generation Index: gcc/config/i386/linux.h =================================================================== --- gcc/config/i386/linux.h.orig +++ gcc/config/i386/linux.h @@ -130,6 +130,7 @@ Boston, MA 02110-1301, USA. */ #undef ENDFILE_SPEC #define ENDFILE_SPEC \ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{mssemisalign:crtmisalignsse.o%s} \ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" /* A C statement (sans semicolon) to output to the stdio stream Index: gcc/config/i386/linux64.h =================================================================== --- gcc/config/i386/linux64.h.orig +++ gcc/config/i386/linux64.h @@ -68,6 +68,7 @@ Boston, MA 02110-1301, USA. */ #undef ENDFILE_SPEC #define ENDFILE_SPEC \ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{mssemisalign:crtmisalignsse.o%s} \ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" #define MULTILIB_DEFAULTS { "m64" } Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md.orig +++ gcc/config/i386/sse.md @@ -70,11 +70,23 @@ else return "pxor\t%0, %0"; case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return TARGET_SSE_MISALIGN && MEM_P (operands[1]) + ? "movups\t{%1, %0|%0, %1}" + : "movaps\t{%1, %0|%0, %1}"; + else + return TARGET_SSE_MISALIGN && MEM_P (operands[1]) + ? "movdqu\t{%1, %0|%0, %1}" + : "movdqa\t{%1, %0|%0, %1}"; case 2: if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; + return TARGET_SSE_MISALIGN + ? "movups\t{%1, %0|%0, %1}" + : "movaps\t{%1, %0|%0, %1}"; else - return "movdqa\t{%1, %0|%0, %1}"; + return TARGET_SSE_MISALIGN + ? "movdqu\t{%1, %0|%0, %1}" + : "movdqa\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } @@ -83,7 +95,6 @@ (set (attr "mode") (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) (const_string "V4SF") - (eq_attr "alternative" "0,1") (if_then_else (ne (symbol_ref "optimize_size") @@ -156,11 +167,23 @@ else return "xorpd\t%0, %0"; case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return TARGET_SSE_MISALIGN && MEM_P (operands[1]) + ? "movups\t{%1, %0|%0, %1}" + : "movaps\t{%1, %0|%0, %1}"; + else + return TARGET_SSE_MISALIGN && MEM_P (operands[1]) + ? "movupd\t{%1, %0|%0, %1}" + : "movapd\t{%1, %0|%0, %1}"; case 2: if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; + return TARGET_SSE_MISALIGN + ? "movups\t{%1, %0|%0, %1}" + : "movaps\t{%1, %0|%0, %1}"; else - return "movapd\t{%1, %0|%0, %1}"; + return TARGET_SSE_MISALIGN + ? "movupd\t{%1, %0|%0, %1}" + : "movapd\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } Index: gcc/config/i386/t-crtfm =================================================================== --- gcc/config/i386/t-crtfm.orig +++ gcc/config/i386/t-crtfm @@ -1,6 +1,11 @@ -EXTRA_PARTS += crtfastmath.o +EXTRA_PARTS += crtfastmath.o crtmisalignsse.o $(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES) $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \ $(srcdir)/config/i386/crtfastmath.c \ -o $(T)crtfastmath$(objext) + +$(T)crtmisalignsse.o: $(srcdir)/config/i386/crtmisalignsse.c $(GCC_PASSES) stmp-int-hdrs + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \ + $(srcdir)/config/i386/crtmisalignsse.c \ + -o $(T)crtmisalignsse$(objext) Index: gcc/config/i386/t-linux64 =================================================================== --- gcc/config/i386/t-linux64.orig +++ gcc/config/i386/t-linux64 @@ -12,7 +12,7 @@ LIBGCC = stmp-multilib INSTALL_LIBGCC = install-multilib EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \ - crtbeginT.o crtfastmath.o + crtbeginT.o crtfastmath.o crtmisalignsse.o # The pushl in CTOR initialization interferes with frame pointer elimination. # crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables, Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi.orig +++ gcc/doc/invoke.texi @@ -525,7 +525,7 @@ Objective-C and Objective-C++ Dialects}. -mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol --m96bit-long-double -mregparm=@var{num} -msseregparm @gol +-m96bit-long-double -mregparm=@var{num} -msseregparm -mssemisalign @gol -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol -mcmodel=@var{code-model} @gol -m32 -m64 -mlarge-data-threshold=@var{num}} @@ -9319,6 +9319,14 @@ function by using the function attribute modules with the same value, including any libraries. This includes the system libraries and startup modules. +@item -mssemisalign +@opindex mssemisalign +Allow the vectorizer to use operands in memory even if not +guaranteed to be aligned. + +@strong{Warning:} if you use this switch then the resulting modules +must be run only on processors which support this feature. + @item -mpreferred-stack-boundary=@var{num} @opindex mpreferred-stack-boundary Attempt to keep the stack boundary aligned to a 2 raised to @var{num}
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor