Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
DISCONTINUED:openSUSE:11.2
libgcj41
gcc-amdfam10-suse-1.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File gcc-amdfam10-suse-1.patch of Package libgcj41
Index: gcc/config/i386/ammintrin.h =================================================================== --- /dev/null +++ gcc/config/i386/ammintrin.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the AMD Programmers ManualIntel C++ Compiler + User Guide and Reference, version 8.0. */ + +#ifndef _AMMINTRIN_H_INCLUDED +#define _AMMINTRIN_H_INCLUDED + +#ifdef __SSE4A__ +#include <pmmintrin.h> + +static __inline void __attribute__((__always_inline__)) +_mm_stream_sd (double * __P, __m128d __Y) +{ + __builtin_ia32_movntsd (__P, (__v2df) __Y); +} + +static __inline void __attribute__((__always_inline__)) +_mm_stream_ss (float * __P, __m128 __Y) +{ + __builtin_ia32_movntss (__P, (__v4sf) __Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_extract_si64 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); +} + +#define _mm_extracti_si64(X, I, L) \ +((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L)) + +static __inline __m128i __attribute__((__always_inline__)) +_mm_insert_si64 (__m128i __X,__m128i __Y) +{ + return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); +} + +#define _mm_inserti_si64(X, Y, I, L) \ +((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L)) + + +#endif /* __SSE4A__ */ + +#endif /* _AMMINTRIN_H_INCLUDED */ Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c.orig +++ gcc/config/i386/i386.c @@ -1371,16 +1371,24 @@ ix86_handle_option (size_t code, const c case OPT_msse: if (!value) { - target_flags &= ~(MASK_SSE2 | MASK_SSE3); - target_flags_explicit |= MASK_SSE2 | MASK_SSE3; + target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A); + target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A; } return true; case OPT_msse2: if (!value) { - target_flags &= ~MASK_SSE3; - target_flags_explicit |= MASK_SSE3; + target_flags &= ~(MASK_SSE3 | MASK_SSE4A); + target_flags_explicit |= MASK_SSE3 | MASK_SSE4A; + } + return true; + + case OPT_msse3: + if (!value) + { + target_flags &= ~MASK_SSE4A; + target_flags_explicit |= MASK_SSE4A; } return true; @@ -1448,7 +1456,10 @@ override_options (void) PTA_3DNOW = 32, PTA_3DNOW_A = 64, PTA_64BIT = 128, - PTA_SSSE3 = 256 + PTA_SSSE3 = 256, + PTA_POPCNT= 512, + PTA_ABM = 1024, + PTA_SSE4A = 2048 } flags; } const processor_alias_table[] = @@ -1501,6 +1512,9 @@ override_options (void) | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, + {"amdfam10", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT + | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_POPCNT + | PTA_ABM | PTA_SSE4A}, }; int const pta_size = ARRAY_SIZE (processor_alias_table); @@ -1646,6 +1660,15 @@ override_options (void) target_flags |= MASK_SSSE3; if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) x86_prefetch_sse = true; + if (processor_alias_table[i].flags & PTA_POPCNT + && !(target_flags_explicit & MASK_POPCNT)) + target_flags |= MASK_POPCNT; + if (processor_alias_table[i].flags & PTA_ABM + && !(target_flags_explicit & MASK_ABM)) + target_flags |= MASK_ABM; + if (processor_alias_table[i].flags & PTA_SSE4A + && !(target_flags_explicit & MASK_SSE4A)) + target_flags |= MASK_SSE4A; if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) error ("CPU you selected does not support x86-64 " "instruction set"); @@ -1837,6 +1860,10 @@ override_options (void) if (TARGET_SSSE3) target_flags |= MASK_SSE3; + /* Turn on SSE3 builtins for -msse4a. */ + if (TARGET_SSE4A) + target_flags |= MASK_SSE3; + /* Turn on SSE2 builtins for -msse3. */ if (TARGET_SSE3) target_flags |= MASK_SSE2; @@ -1856,6 +1883,10 @@ override_options (void) if (TARGET_3DNOW) target_flags |= MASK_MMX; + /* Turn on POPCNT builtins for -mabm. */ + if (TARGET_ABM) + target_flags |= MASK_POPCNT; + if (TARGET_64BIT) { if (TARGET_ALIGN_DOUBLE) @@ -14294,6 +14325,14 @@ enum ix86_builtins IX86_BUILTIN_PABSW128, IX86_BUILTIN_PABSD128, + /* AMDFAM10 - SSE4A New Instructions. */ + IX86_BUILTIN_MOVNTSD, + IX86_BUILTIN_MOVNTSS, + IX86_BUILTIN_EXTRQI, + IX86_BUILTIN_EXTRQ, + IX86_BUILTIN_INSERTQI, + IX86_BUILTIN_INSERTQ, + IX86_BUILTIN_VEC_INIT_V2SI, IX86_BUILTIN_VEC_INIT_V4HI, IX86_BUILTIN_VEC_INIT_V8QI, @@ -15036,6 +15075,16 @@ ix86_init_mmx_sse_builtins (void) = build_function_type_list (void_type_node, pchar_type_node, V16QI_type_node, NULL_TREE); + tree v2di_ftype_v2di_unsigned_unsigned + = build_function_type_list (V2DI_type_node, V2DI_type_node, + unsigned_type_node, unsigned_type_node, NULL_TREE); + tree v2di_ftype_v2di_v2di_unsigned_unsigned + = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, + unsigned_type_node, unsigned_type_node, NULL_TREE); + tree v2di_ftype_v2di_v16qi + = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, + NULL_TREE); + tree float80_type; tree float128_type; tree ftype; @@ -15387,6 +15436,14 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR); + /* AMDFAM10 SSE4A New built-ins */ + def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); + def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); + def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI); + def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ); + def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI); + def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); + /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); @@ -15875,9 +15932,9 @@ ix86_expand_builtin (tree exp, rtx targe enum insn_code icode; tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); tree arglist = TREE_OPERAND (exp, 1); - tree arg0, arg1, arg2; - rtx op0, op1, op2, pat; - enum machine_mode tmode, mode0, mode1, mode2, mode3; + tree arg0, arg1, arg2, arg3; + rtx op0, op1, op2, op3, pat; + enum machine_mode tmode, mode0, mode1, mode2, mode3,mode4; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); switch (fcode) @@ -16300,6 +16357,114 @@ ix86_expand_builtin (tree exp, rtx targe emit_insn (pat); return target; + case IX86_BUILTIN_MOVNTSD: + return ix86_expand_store_builtin (CODE_FOR_sse4a_movntsd, arglist); + + case IX86_BUILTIN_MOVNTSS: + return ix86_expand_store_builtin (CODE_FOR_sse4a_movntss, arglist); + + case IX86_BUILTIN_INSERTQ: + case IX86_BUILTIN_EXTRQ: + icode = (fcode == IX86_BUILTIN_EXTRQ + ? CODE_FOR_sse4a_extrq + : CODE_FOR_sse4a_insertq); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_EXTRQI: + icode = CODE_FOR_sse4a_extrqi; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + error ("index mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("length mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IX86_BUILTIN_INSERTQI: + icode = CODE_FOR_sse4a_insertqi; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + op3 = expand_expr (arg3, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("index mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (! (*insn_data[icode].operand[4].predicate) (op3, mode4)) + { + error ("length mask must be an immediate"); + return gen_reg_rtx (tmode); + } + if (optimize || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2, op3); + if (! pat) + return 0; + emit_insn (pat); + return target; + case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h.orig +++ gcc/config/i386/i386.h @@ -387,6 +387,8 @@ extern int x86_prefetch_sse; builtin_define ("__SSE3__"); \ if (TARGET_SSSE3) \ builtin_define ("__SSSE3__"); \ + if (TARGET_SSE4A) \ + builtin_define ("__SSE4A__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md.orig +++ gcc/config/i386/i386.md @@ -154,6 +154,14 @@ (UNSPEC_PSHUFB 120) (UNSPEC_PSIGN 121) (UNSPEC_PALIGNR 122) + + ; For SSE4A support + (UNSPEC_MOVNTS 130) + (UNSPEC_EXTRQI 131) + (UNSPEC_EXTRQ 132) + (UNSPEC_INSERTQI 133) + (UNSPEC_INSERTQ 134) + ]) (define_constants @@ -14546,7 +14554,31 @@ [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) (clobber (reg:CC FLAGS_REG))])] "" - "") + " + { + if (TARGET_ABM) + { + emit_insn (gen_clzsi2_abm (operands[0], operands[1])); + DONE; + } + } + ") + +(define_insn "clzsi2_abm" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt {%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "popcountsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" + "popcnt {%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) (define_insn "*bsr" [(set (match_operand:SI 0 "register_operand" "=r") @@ -14566,8 +14598,36 @@ (parallel [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT" - "") + "" + " + { + if (TARGET_ABM) + { + emit_insn (gen_clzdi2_abm (operands[0], operands[1])); + DONE; + } + else if (TARGET_64BIT) + ; + else + FAIL; + } + ") + +(define_insn "clzdi2_abm" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt {%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" + "popcnt {%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) (define_insn "*bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") Index: gcc/config/i386/i386.opt =================================================================== --- gcc/config/i386/i386.opt.orig +++ gcc/config/i386/i386.opt @@ -201,6 +201,18 @@ mssse3 Target Report Mask(SSSE3) Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation +msse4a +Target Report Mask(SSE4A) +Support new AMDFAM10 SSE4A built-in functions and code generation + +mpopcnt +Target Report Mask(POPCNT) +Support new AMDFAM10 Advanced Bit Manipulation (ABM) popcount built-in functions and code generation + +mabm +Target Report Mask(ABM) +Support new AMDFAM10 Advanced Bit Manipulation (ABM) built-in functions and code generation + msseregparm Target RejectNegative Mask(SSEREGPARM) Use SSE register passing conventions for SF and DF mode Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md.orig +++ gcc/config/i386/sse.md @@ -4498,3 +4498,83 @@ "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; [(set_attr "type" "sselog1") (set_attr "mode" "DI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; AMD SSE4A instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_insn "sse4a_movntsd" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (vec_merge:V2DF + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNTS) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE4A" + "movntsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "DF")]) + +(define_insn "sse4a_movntss" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNTS) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE4A" + "movntss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn "sse4a_extrqi" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")] + UNSPEC_EXTRQI))] + "TARGET_SSE4A" +{ + return "extrq\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sse") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_extrq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_EXTRQ))] + "TARGET_SSE4A" +{ + return "extrq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sse") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_insertqi" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")] + UNSPEC_INSERTQI))] + "TARGET_SSE4A" +{ + return "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"; +} + [(set_attr "type" "sse") + (set_attr "mode" "TI")]) + +(define_insn "sse4a_insertq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "x")] + UNSPEC_INSERTQ))] + "TARGET_SSE4A" +{ + return "insertq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sse") + (set_attr "mode" "TI")]) Index: gcc/config.gcc =================================================================== --- gcc/config.gcc.orig +++ gcc/config.gcc @@ -264,12 +264,12 @@ xscale-*-*) i[34567]86-*-*) cpu_type=i386 extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h" + pmmintrin.h tmmintrin.h ammintrin.h" ;; x86_64-*-*) cpu_type=i386 extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h" + pmmintrin.h tmmintrin.h ammintrin.h" need_64bit_hwint=yes ;; ia64-*-*) Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi.orig +++ gcc/doc/extend.texi @@ -6906,6 +6906,18 @@ v4si __builtin_ia32_pabsd128 (v4si) v8hi __builtin_ia32_pabsw128 (v8hi) @end smallexample +The following built-in functions are available when @option{-msse4a} is used. +All of them generate the machine instruction that is part of the name with XMM registers. +@smallexample +void _mm_stream_sd (double*,__m128d); +void _mm_stream_ss (float*,__m128); +__m128i _mm_extract_si64 (__m128i, __m128i); +__m128i _mm_extracti_si64 (__m128i, int, int); +__m128i _mm_insert_si64 (__m128i, __m128i); +__m128i _mm_inserti_si64 (__m128i, __m128i, int, int); +@end smallexample + + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi.orig +++ gcc/doc/invoke.texi @@ -522,7 +522,7 @@ Objective-C and Objective-C++ Dialects}. -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol +-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol @@ -9073,6 +9073,10 @@ instruction set support. @item k8, opteron, athlon64, athlon-fx AMD K8 core based CPUs with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.) +@item amdfam10 +AMD Family 10 core based CPUs with x86-64 instruction set support. (This supersets +MMX, SSE, SSE2, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit instruction set +extensions.) @item winchip-c6 IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction set support. @@ -9350,6 +9354,8 @@ preferred alignment to @option{-mpreferr @itemx -mno-sse3 @item -mssse3 @itemx -mno-ssse3 +@item -msse4a +@item -mno-sse4a @item -m3dnow @itemx -mno-3dnow @opindex mmmx @@ -9358,11 +9364,15 @@ preferred alignment to @option{-mpreferr @opindex mno-sse @opindex m3dnow @opindex mno-3dnow +@item -mpopcnt +@itemx -mno-popcnt +@item -mabm +@itemx -mno-abm These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets. -These extensions are also available as built-in functions: see -@ref{X86 Built-in Functions}, for details of the functions enabled and -disabled by these switches. +SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets. +These extensions are also available as built-in functions: see +@ref{X86 Built-in Functions}, for details of the functions enabled +and disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor