Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
Please login to access the resource
DISCONTINUED:openSUSE:11.2:Update
libgcj43
ibm-cell-split
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File ibm-cell-split of Package libgcj43
2008-08-27 Trevor Smigiel <Trevor_Smigiel@playstation.sony.com> Improve code generated for loads and stores on SPU. * doc/tm.texi (SPLIT_BEFORE_CSE2) : Document. * tree-pass.h (pass_split_before_cse2) : Declare. * final.c (rest_of_clean_state) : Initialize split0_completed. * recog.c (split0_completed) : Define. (gate_handle_split_before_cse2, rest_of_handle_split_before_cse2) : New functions. (pass_split_before_cse2) : New pass. * rtl.h (split0_completed) : Declare. * passes.c (init_optimization_passes) : Add pass_split_before_cse2 before pass_cse2 . * config/spu/spu-protos.h (spu_legitimate_address) : Add for_split argument. (aligned_mem_p, spu_valid_move) : Remove prototypes. (spu_split_load, spu_split_store) : Change return type to int. * config/spu/predicates.md (spu_mem_operand) : Remove. (spu_dest_operand) : Add. * config/spu/spu-builtins.md (spu_lqd, spu_lqx, spu_lqa, spu_lqr, spu_stqd, spu_stqx, spu_stqa, spu_stqr) : Remove AND operation. * config/spu/spu.c (regno_aligned_for_load) : Remove. (reg_aligned_for_addr, address_needs_split) : New functions. (spu_legitimate_address, spu_expand_mov, spu_split_load, spu_split_store) : Update. (spu_init_expanders) : Pregenerate a couple of pseudo-registers. * config/spu/spu.h (REG_ALIGN, SPLIT_BEFORE_CSE2) : Define. (GO_IF_LEGITIMATE_ADDRESS) : Update for spu_legitimate_address. * config/spu/spu.md ("_mov<mode>", "_movdi", "_movti") : Update predicates. ("load", "store") : Change to define_split. testsuite/ * testsuite/gcc.target/spu/split0-1.c : Add test. diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/predicates.md gcc-4_3/gcc/config/spu/predicates.md *** gcc-4_3-orig/gcc/config/spu/predicates.md 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/predicates.md 2008-09-10 20:09:59.000000000 +0200 *************** *** 39,52 **** (ior (not (match_code "subreg")) (match_test "valid_subreg (op)")))) - (define_predicate "spu_mem_operand" - (and (match_operand 0 "memory_operand") - (match_test "reload_in_progress || reload_completed || aligned_mem_p (op)"))) - (define_predicate "spu_mov_operand" ! (ior (match_operand 0 "spu_mem_operand") (match_operand 0 "spu_nonmem_operand"))) (define_predicate "call_operand" (and (match_code "mem") (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op)) --- 39,52 ---- (ior (not (match_code "subreg")) (match_test "valid_subreg (op)")))) (define_predicate "spu_mov_operand" ! (ior (match_operand 0 "memory_operand") (match_operand 0 "spu_nonmem_operand"))) + (define_predicate "spu_dest_operand" + (ior (match_operand 0 "memory_operand") + (match_operand 0 "spu_reg_operand"))) + (define_predicate "call_operand" (and (match_code "mem") (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op)) diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/spu-builtins.md gcc-4_3/gcc/config/spu/spu-builtins.md *** gcc-4_3-orig/gcc/config/spu/spu-builtins.md 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/spu-builtins.md 2008-09-10 20:09:59.000000000 +0200 *************** *** 23,31 **** (define_expand "spu_lqd" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_nonmem_operand" "")) ! (const_int -16))))] "" { if (GET_CODE (operands[2]) == CONST_INT --- 23,30 ---- (define_expand "spu_lqd" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_nonmem_operand" ""))))] "" { if (GET_CODE (operands[2]) == CONST_INT *************** *** 42,57 **** (define_expand "spu_lqx" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_reg_operand" "")) ! (const_int -16))))] "" "") (define_expand "spu_lqa" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "") ! (const_int -16))))] "" { if (GET_CODE (operands[1]) == CONST_INT --- 41,54 ---- (define_expand "spu_lqx" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_reg_operand" ""))))] "" "") (define_expand "spu_lqa" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (match_operand:SI 1 "immediate_operand" "")))] "" { if (GET_CODE (operands[1]) == CONST_INT *************** *** 61,75 **** (define_expand "spu_lqr" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (and:SI (match_operand:SI 1 "address_operand" "") ! (const_int -16))))] "" "") (define_expand "spu_stqd" ! [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_nonmem_operand" "")) ! (const_int -16))) (match_operand:TI 0 "spu_reg_operand" "r,r"))] "" { --- 58,70 ---- (define_expand "spu_lqr" [(set (match_operand:TI 0 "spu_reg_operand" "") ! (mem:TI (match_operand:SI 1 "address_operand" "")))] "" "") (define_expand "spu_stqd" ! [(set (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_nonmem_operand" ""))) (match_operand:TI 0 "spu_reg_operand" "r,r"))] "" { *************** *** 86,101 **** }) (define_expand "spu_stqx" ! [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_reg_operand" "")) ! (const_int -16))) (match_operand:TI 0 "spu_reg_operand" "r"))] "" "") (define_expand "spu_stqa" ! [(set (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "") ! (const_int -16))) (match_operand:TI 0 "spu_reg_operand" "r"))] "" { --- 81,94 ---- }) (define_expand "spu_stqx" ! [(set (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") ! (match_operand:SI 2 "spu_reg_operand" ""))) (match_operand:TI 0 "spu_reg_operand" "r"))] "" "") (define_expand "spu_stqa" ! [(set (mem:TI (match_operand:SI 1 "immediate_operand" "")) (match_operand:TI 0 "spu_reg_operand" "r"))] "" { *************** *** 105,112 **** }) (define_expand "spu_stqr" ! [(set (mem:TI (and:SI (match_operand:SI 1 "address_operand" "") ! (const_int -16))) (match_operand:TI 0 "spu_reg_operand" ""))] "" "") --- 98,104 ---- }) (define_expand "spu_stqr" ! [(set (mem:TI (match_operand:SI 1 "address_operand" "")) (match_operand:TI 0 "spu_reg_operand" ""))] "" "") diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/spu.c gcc-4_3/gcc/config/spu/spu.c *** gcc-4_3-orig/gcc/config/spu/spu.c 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/spu.c 2008-09-11 13:45:00.000000000 +0200 *************** static tree spu_build_builtin_va_list (v *** 120,128 **** static void spu_va_start (tree, rtx); static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p, tree * post_p); - static int regno_aligned_for_load (int regno); static int store_with_one_insn_p (rtx mem); static int mem_is_padded_component_ref (rtx x); static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); static void spu_asm_globalize_label (FILE * file, const char *name); static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, --- 120,128 ---- static void spu_va_start (tree, rtx); static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p, tree * post_p); static int store_with_one_insn_p (rtx mem); static int mem_is_padded_component_ref (rtx x); + static int reg_aligned_for_addr (rtx x, int aligned); static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); static void spu_asm_globalize_label (FILE * file, const char *name); static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, *************** spu_legitimate_constant_p (rtx x) *** 2857,2880 **** /* Valid address are: - symbol_ref, label_ref, const - reg ! - reg + const, where either reg or const is 16 byte aligned - reg + reg, alignment doesn't matter The alignment matters in the reg+const case because lqd and stqd ! ignore the 4 least significant bits of the const. (TODO: It might be ! preferable to allow any alignment and fix it up when splitting.) */ int ! spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, ! rtx x, int reg_ok_strict) { ! if (mode == TImode && GET_CODE (x) == AND ! && GET_CODE (XEXP (x, 1)) == CONST_INT ! && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16) x = XEXP (x, 0); switch (GET_CODE (x)) { - case SYMBOL_REF: case LABEL_REF: ! return !TARGET_LARGE_MEM; case CONST: if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS) --- 2857,2908 ---- /* Valid address are: - symbol_ref, label_ref, const - reg ! - reg + const, where const is 16 byte aligned - reg + reg, alignment doesn't matter The alignment matters in the reg+const case because lqd and stqd ! ignore the 4 least significant bits of the const. ! ! Addresses are handled in 4 phases. ! 1) from the beginning of rtl expansion until the split0 pass. Any ! address is acceptable. ! 2) The split0 pass. It is responsible for making every load and store ! valid. It calls legitimate_address with FOR_SPLIT set to 1. This ! is where non-16-byte aligned loads/stores are split into multiple ! instructions to extract or insert just the part we care about. ! 3) From the split0 pass to the beginning of reload. During this ! phase the constant part of an address must be 16 byte aligned, and ! we don't allow any loads/store of less than 4 bytes. We also ! allow a mask of -16 to be part of the address as an optimization. ! 4) From reload until the end. Reload can change the modes of loads ! and stores to something smaller than 4-bytes which we need to allow ! now, and it also adjusts the address to match. So in this phase we ! allow that special case. Still allow addresses with a mask of -16. ! ! FOR_SPLIT is only set to 1 for phase 2, otherwise it is 0. */ int ! spu_legitimate_address (enum machine_mode mode, rtx x, int reg_ok_strict, ! int for_split) { ! int aligned = (split0_completed || for_split) ! && !reload_in_progress && !reload_completed; ! int const_aligned = split0_completed || for_split; ! if (GET_MODE_SIZE (mode) >= 16) ! aligned = 0; ! else if (aligned && GET_MODE_SIZE (mode) < 4) ! return 0; ! if (split0_completed ! && (GET_CODE (x) == AND ! && GET_CODE (XEXP (x, 1)) == CONST_INT ! && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16 ! && !CONSTANT_P (XEXP (x, 0)))) x = XEXP (x, 0); switch (GET_CODE (x)) { case LABEL_REF: ! return !TARGET_LARGE_MEM && !aligned; ! ! case SYMBOL_REF: ! return !TARGET_LARGE_MEM && (!aligned || ALIGNED_SYMBOL_REF_P (x)); case CONST: if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS) *************** spu_legitimate_address (enum machine_mod *** 2882,2903 **** rtx sym = XEXP (XEXP (x, 0), 0); rtx cst = XEXP (XEXP (x, 0), 1); - /* Accept any symbol_ref + constant, assuming it does not - wrap around the local store addressability limit. */ if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT) ! return 1; } return 0; case CONST_INT: return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; case SUBREG: x = XEXP (x, 0); ! gcc_assert (GET_CODE (x) == REG); case REG: ! return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); case PLUS: case LO_SUM: --- 2910,2939 ---- rtx sym = XEXP (XEXP (x, 0), 0); rtx cst = XEXP (XEXP (x, 0), 1); if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT) ! { ! /* Check for alignment if required. */ ! if (!aligned) ! return 1; ! if ((INTVAL (cst) & 15) == 0 && ALIGNED_SYMBOL_REF_P (sym)) ! return 1; ! } } return 0; case CONST_INT: + /* We don't test alignement here. For an absolute address we + assume the user knows what they are doing. */ return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; case SUBREG: x = XEXP (x, 0); ! if (GET_CODE (x) != REG) ! return 0; case REG: ! return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict) ! && reg_aligned_for_addr (x, 0); case PLUS: case LO_SUM: *************** spu_legitimate_address (enum machine_mod *** 2908,2928 **** op0 = XEXP (op0, 0); if (GET_CODE (op1) == SUBREG) op1 = XEXP (op1, 0); - /* We can't just accept any aligned register because CSE can - change it to a register that is not marked aligned and then - recog will fail. So we only accept frame registers because - they will only be changed to other frame registers. */ if (GET_CODE (op0) == REG && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) && GET_CODE (op1) == CONST_INT && INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff ! && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0)) return 1; if (GET_CODE (op0) == REG && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) && GET_CODE (op1) == REG ! && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) return 1; } break; --- 2944,2972 ---- op0 = XEXP (op0, 0); if (GET_CODE (op1) == SUBREG) op1 = XEXP (op1, 0); if (GET_CODE (op0) == REG && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) && GET_CODE (op1) == CONST_INT && INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff ! && reg_aligned_for_addr (op0, 0) ! && (!const_aligned ! || (INTVAL (op1) & 15) == 0 ! || ((reload_in_progress || reload_completed) ! && GET_MODE_SIZE (mode) < 4 ! && (INTVAL (op1) & 15) == 4 - GET_MODE_SIZE (mode)) ! /* Some passes create a fake register for testing valid ! addresses, be more lenient when we see those. ivopts ! and reload do it. */ ! || REGNO (op0) == LAST_VIRTUAL_REGISTER + 1 ! || REGNO (op0) == LAST_VIRTUAL_REGISTER + 2)) return 1; if (GET_CODE (op0) == REG && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) + && reg_aligned_for_addr (op0, 0) && GET_CODE (op1) == REG ! && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict) ! && reg_aligned_for_addr (op1, 0)) return 1; } break; *************** spu_legitimize_address (rtx x, rtx oldx *** 2960,2966 **** else if (GET_CODE (op1) != REG) op1 = force_reg (Pmode, op1); x = gen_rtx_PLUS (Pmode, op0, op1); ! if (spu_legitimate_address (mode, x, 0)) return x; } return NULL_RTX; --- 3004,3010 ---- else if (GET_CODE (op1) != REG) op1 = force_reg (Pmode, op1); x = gen_rtx_PLUS (Pmode, op0, op1); ! if (spu_legitimate_address (mode, x, 0, 0)) return x; } return NULL_RTX; *************** spu_conditional_register_usage (void) *** 3385,3444 **** } } ! /* This is called to decide when we can simplify a load instruction. We ! must only return true for registers which we know will always be ! aligned. Taking into account that CSE might replace this reg with ! another one that has not been marked aligned. ! So this is really only true for frame, stack and virtual registers, ! which we know are always aligned and should not be adversely effected ! by CSE. */ static int ! regno_aligned_for_load (int regno) ! { ! return regno == FRAME_POINTER_REGNUM ! || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM) ! || regno == ARG_POINTER_REGNUM ! || regno == STACK_POINTER_REGNUM ! || (regno >= FIRST_VIRTUAL_REGISTER ! && regno <= LAST_VIRTUAL_REGISTER); ! } ! ! /* Return TRUE when mem is known to be 16-byte aligned. */ ! int ! aligned_mem_p (rtx mem) { ! if (MEM_ALIGN (mem) >= 128) return 1; ! if (GET_MODE_SIZE (GET_MODE (mem)) >= 16) ! return 1; ! if (GET_CODE (XEXP (mem, 0)) == PLUS) ! { ! rtx p0 = XEXP (XEXP (mem, 0), 0); ! rtx p1 = XEXP (XEXP (mem, 0), 1); ! if (regno_aligned_for_load (REGNO (p0))) ! { ! if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1))) ! return 1; ! if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) ! return 1; ! } ! } ! else if (GET_CODE (XEXP (mem, 0)) == REG) ! { ! if (regno_aligned_for_load (REGNO (XEXP (mem, 0)))) ! return 1; ! } ! else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0))) ! return 1; ! else if (GET_CODE (XEXP (mem, 0)) == CONST) ! { ! rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0); ! rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1); ! if (GET_CODE (p0) == SYMBOL_REF ! && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) ! return 1; ! } ! return 0; } /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF --- 3429,3444 ---- } } ! /* This is called any time we inspect the alignment of a register for ! addresses. */ static int ! reg_aligned_for_addr (rtx x, int aligned) { ! int regno = ! REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x); ! if (!aligned) return 1; ! return REGNO_POINTER_ALIGN (regno) >= 128; } /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF *************** spu_encode_section_info (tree decl, rtx *** 3467,3475 **** static int store_with_one_insn_p (rtx mem) { rtx addr = XEXP (mem, 0); ! if (GET_MODE (mem) == BLKmode) return 0; /* Only static objects. */ if (GET_CODE (addr) == SYMBOL_REF) { --- 3467,3478 ---- static int store_with_one_insn_p (rtx mem) { + enum machine_mode mode = GET_MODE (mem); rtx addr = XEXP (mem, 0); ! if (mode == BLKmode) return 0; + if (GET_MODE_SIZE (mode) >= 16) + return 1; /* Only static objects. */ if (GET_CODE (addr) == SYMBOL_REF) { *************** store_with_one_insn_p (rtx mem) *** 3493,3498 **** --- 3496,3517 ---- return 0; } + /* Return 1 when the address is not valid for a simple load and store as + required by the '_mov*' patterns. We could make this less strict + for loads, but we prefer mem's to look the same so they are more + likely to be merged. */ + static int + address_needs_split (rtx mem) + { + if (GET_MODE_SIZE (GET_MODE (mem)) < 16 + && (GET_MODE_SIZE (GET_MODE (mem)) < 4 + || !(store_with_one_insn_p (mem) + || mem_is_padded_component_ref (mem)))) + return 1; + + return 0; + } + int spu_expand_mov (rtx * ops, enum machine_mode mode) { *************** spu_expand_mov (rtx * ops, enum machine_ *** 3540,3564 **** } else { - if (GET_CODE (ops[0]) == MEM) - { - if (!spu_valid_move (ops)) - { - emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode), - gen_reg_rtx (TImode))); - return 1; - } - } - else if (GET_CODE (ops[1]) == MEM) - { - if (!spu_valid_move (ops)) - { - emit_insn (gen_load - (ops[0], ops[1], gen_reg_rtx (TImode), - gen_reg_rtx (SImode))); - return 1; - } - } /* Catch the SImode immediates greater than 0x7fffffff, and sign extend them. */ if (GET_CODE (ops[1]) == CONST_INT) --- 3559,3564 ---- *************** spu_expand_mov (rtx * ops, enum machine_ *** 3574,3580 **** return 0; } ! void spu_split_load (rtx * ops) { enum machine_mode mode = GET_MODE (ops[0]); --- 3574,3580 ---- return 0; } ! int spu_split_load (rtx * ops) { enum machine_mode mode = GET_MODE (ops[0]); *************** spu_split_load (rtx * ops) *** 3582,3587 **** --- 3582,3598 ---- int rot_amt; addr = XEXP (ops[1], 0); + gcc_assert (GET_CODE (addr) != AND); + + if (!address_needs_split (ops[1])) + { + addr = XEXP (ops[1], 0); + if (spu_legitimate_address (mode, addr, 0, 1)) + return 0; + ops[1] = change_address (ops[1], VOIDmode, force_reg (Pmode, addr)); + emit_move_insn (ops[0], ops[1]); + return 1; + } rot = 0; rot_amt = 0; *************** spu_split_load (rtx * ops) *** 3599,3610 **** */ p0 = XEXP (addr, 0); p1 = XEXP (addr, 1); ! if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0))) { ! if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) { ! emit_insn (gen_addsi3 (ops[3], p0, p1)); ! rot = ops[3]; } else rot = p0; --- 3610,3641 ---- */ p0 = XEXP (addr, 0); p1 = XEXP (addr, 1); ! if (!reg_aligned_for_addr (p0, 1)) { ! if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1)) { ! rot = gen_reg_rtx (SImode); ! emit_insn (gen_addsi3 (rot, p0, p1)); ! } ! else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) ! { ! if (INTVAL (p1) > 0 ! && INTVAL (p1) * BITS_PER_UNIT < REG_ALIGN (p0)) ! { ! rot = gen_reg_rtx (SImode); ! emit_insn (gen_addsi3 (rot, p0, p1)); ! addr = p0; ! } ! else ! { ! rtx x = gen_reg_rtx (SImode); ! emit_move_insn (x, p1); ! if (!spu_arith_operand (p1, SImode)) ! p1 = x; ! rot = gen_reg_rtx (SImode); ! emit_insn (gen_addsi3 (rot, p0, p1)); ! addr = gen_rtx_PLUS (Pmode, p0, x); ! } } else rot = p0; *************** spu_split_load (rtx * ops) *** 3614,3629 **** if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) { rot_amt = INTVAL (p1) & 15; ! p1 = GEN_INT (INTVAL (p1) & -16); ! addr = gen_rtx_PLUS (SImode, p0, p1); } ! else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) rot = p1; } } else if (GET_CODE (addr) == REG) { ! if (!regno_aligned_for_load (REGNO (addr))) rot = addr; } else if (GET_CODE (addr) == CONST) --- 3645,3665 ---- if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) { rot_amt = INTVAL (p1) & 15; ! if (INTVAL (p1) & -16) ! { ! p1 = GEN_INT (INTVAL (p1) & -16); ! addr = gen_rtx_PLUS (SImode, p0, p1); ! } ! else ! addr = p0; } ! else if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1)) rot = p1; } } else if (GET_CODE (addr) == REG) { ! if (!reg_aligned_for_addr (addr, 1)) rot = addr; } else if (GET_CODE (addr) == CONST) *************** spu_split_load (rtx * ops) *** 3642,3648 **** addr = XEXP (XEXP (addr, 0), 0); } else ! rot = addr; } else if (GET_CODE (addr) == CONST_INT) { --- 3678,3687 ---- addr = XEXP (XEXP (addr, 0), 0); } else ! { ! rot = gen_reg_rtx (Pmode); ! emit_move_insn (rot, addr); ! } } else if (GET_CODE (addr) == CONST_INT) { *************** spu_split_load (rtx * ops) *** 3650,3656 **** addr = GEN_INT (rot_amt & -16); } else if (!ALIGNED_SYMBOL_REF_P (addr)) ! rot = addr; if (GET_MODE_SIZE (mode) < 4) rot_amt += GET_MODE_SIZE (mode) - 4; --- 3689,3698 ---- addr = GEN_INT (rot_amt & -16); } else if (!ALIGNED_SYMBOL_REF_P (addr)) ! { ! rot = gen_reg_rtx (Pmode); ! emit_move_insn (rot, addr); ! } if (GET_MODE_SIZE (mode) < 4) rot_amt += GET_MODE_SIZE (mode) - 4; *************** spu_split_load (rtx * ops) *** 3659,3673 **** if (rot && rot_amt) { ! emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt))); ! rot = ops[3]; rot_amt = 0; } ! load = ops[2]; ! addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); ! mem = change_address (ops[1], TImode, addr); emit_insn (gen_movti (load, mem)); --- 3701,3715 ---- if (rot && rot_amt) { ! rtx x = gen_reg_rtx (SImode); ! emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt))); ! rot = x; rot_amt = 0; } ! load = gen_reg_rtx (TImode); ! mem = change_address (ops[1], TImode, copy_rtx (addr)); emit_insn (gen_movti (load, mem)); *************** spu_split_load (rtx * ops) *** 3676,3698 **** else if (rot_amt) emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8))); ! if (reload_completed) ! emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load))); ! else ! emit_insn (gen_spu_convert (ops[0], load)); } ! void spu_split_store (rtx * ops) { enum machine_mode mode = GET_MODE (ops[0]); ! rtx pat = ops[2]; ! rtx reg = ops[3]; rtx addr, p0, p1, p1_lo, smem; int aform; int scalar; addr = XEXP (ops[0], 0); if (GET_CODE (addr) == PLUS) { --- 3718,3748 ---- else if (rot_amt) emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8))); ! emit_insn (gen_spu_convert (ops[0], load)); ! return 1; } ! int spu_split_store (rtx * ops) { enum machine_mode mode = GET_MODE (ops[0]); ! rtx reg; rtx addr, p0, p1, p1_lo, smem; int aform; int scalar; + if (!address_needs_split (ops[0])) + { + addr = XEXP (ops[0], 0); + if (spu_legitimate_address (mode, addr, 0, 1)) + return 0; + ops[0] = change_address (ops[0], VOIDmode, force_reg (Pmode, addr)); + emit_move_insn (ops[0], ops[1]); + return 1; + } + addr = XEXP (ops[0], 0); + gcc_assert (GET_CODE (addr) != AND); if (GET_CODE (addr) == PLUS) { *************** spu_split_store (rtx * ops) *** 3704,3710 **** unaligned reg + aligned reg => lqx, c?x, shuf, stqx unaligned reg + unaligned reg => lqx, c?x, shuf, stqx unaligned reg + aligned const => lqd, c?d, shuf, stqx ! unaligned reg + unaligned const -> not allowed by legitimate address */ aform = 0; p0 = XEXP (addr, 0); --- 3754,3760 ---- unaligned reg + aligned reg => lqx, c?x, shuf, stqx unaligned reg + unaligned reg => lqx, c?x, shuf, stqx unaligned reg + aligned const => lqd, c?d, shuf, stqx ! unaligned reg + unaligned const -> lqx, c?d, shuf, stqx */ aform = 0; p0 = XEXP (addr, 0); *************** spu_split_store (rtx * ops) *** 3712,3719 **** if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT) { p1_lo = GEN_INT (INTVAL (p1) & 15); ! p1 = GEN_INT (INTVAL (p1) & -16); ! addr = gen_rtx_PLUS (SImode, p0, p1); } } else if (GET_CODE (addr) == REG) --- 3762,3781 ---- if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT) { p1_lo = GEN_INT (INTVAL (p1) & 15); ! if (reg_aligned_for_addr (p0, 1)) ! { ! p1 = GEN_INT (INTVAL (p1) & -16); ! if (p1 == const0_rtx) ! addr = p0; ! else ! addr = gen_rtx_PLUS (SImode, p0, p1); ! } ! else ! { ! rtx x = gen_reg_rtx (SImode); ! emit_move_insn (x, p1); ! addr = gen_rtx_PLUS (SImode, p0, x); ! } } } else if (GET_CODE (addr) == REG) *************** spu_split_store (rtx * ops) *** 3730,3760 **** p1_lo = addr; if (ALIGNED_SYMBOL_REF_P (addr)) p1_lo = const0_rtx; ! else if (GET_CODE (addr) == CONST) { ! if (GET_CODE (XEXP (addr, 0)) == PLUS ! && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) ! && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) ! { ! HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); ! if ((v & -16) != 0) ! addr = gen_rtx_CONST (Pmode, ! gen_rtx_PLUS (Pmode, ! XEXP (XEXP (addr, 0), 0), ! GEN_INT (v & -16))); ! else ! addr = XEXP (XEXP (addr, 0), 0); ! p1_lo = GEN_INT (v & 15); ! } } else if (GET_CODE (addr) == CONST_INT) { p1_lo = GEN_INT (INTVAL (addr) & 15); addr = GEN_INT (INTVAL (addr) & -16); } } ! addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); scalar = store_with_one_insn_p (ops[0]); if (!scalar) --- 3792,3825 ---- p1_lo = addr; if (ALIGNED_SYMBOL_REF_P (addr)) p1_lo = const0_rtx; ! else if (GET_CODE (addr) == CONST ! && GET_CODE (XEXP (addr, 0)) == PLUS ! && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) ! && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) { ! HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); ! if ((v & -16) != 0) ! addr = gen_rtx_CONST (Pmode, ! gen_rtx_PLUS (Pmode, ! XEXP (XEXP (addr, 0), 0), ! GEN_INT (v & -16))); ! else ! addr = XEXP (XEXP (addr, 0), 0); ! p1_lo = GEN_INT (v & 15); } else if (GET_CODE (addr) == CONST_INT) { p1_lo = GEN_INT (INTVAL (addr) & 15); addr = GEN_INT (INTVAL (addr) & -16); } + else + { + p1_lo = gen_reg_rtx (SImode); + emit_move_insn (p1_lo, addr); + } } ! reg = gen_reg_rtx (TImode); scalar = store_with_one_insn_p (ops[0]); if (!scalar) *************** spu_split_store (rtx * ops) *** 3764,3774 **** possible, and copying the flags will prevent that in certain cases, e.g. consider the volatile flag. */ rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); set_mem_alias_set (lmem, 0); emit_insn (gen_movti (reg, lmem)); ! if (!p0 || regno_aligned_for_load (REGNO (p0))) p0 = stack_pointer_rtx; if (!p1_lo) p1_lo = const0_rtx; --- 3829,3840 ---- possible, and copying the flags will prevent that in certain cases, e.g. consider the volatile flag. */ + rtx pat = gen_reg_rtx (TImode); rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); set_mem_alias_set (lmem, 0); emit_insn (gen_movti (reg, lmem)); ! if (!p0 || reg_aligned_for_addr (p0, 1)) p0 = stack_pointer_rtx; if (!p1_lo) p1_lo = const0_rtx; *************** spu_split_store (rtx * ops) *** 3776,3792 **** emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); emit_insn (gen_shufb (reg, ops[1], reg, pat)); } - else if (reload_completed) - { - if (GET_CODE (ops[1]) == REG) - emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1]))); - else if (GET_CODE (ops[1]) == SUBREG) - emit_move_insn (reg, - gen_rtx_REG (GET_MODE (reg), - REGNO (SUBREG_REG (ops[1])))); - else - abort (); - } else { if (GET_CODE (ops[1]) == REG) --- 3842,3847 ---- *************** spu_split_store (rtx * ops) *** 3798,3812 **** } if (GET_MODE_SIZE (mode) < 4 && scalar) ! emit_insn (gen_shlqby_ti ! (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode)))); ! smem = change_address (ops[0], TImode, addr); /* We can't use the previous alias set because the memory has changed size and can potentially overlap objects of other types. */ set_mem_alias_set (smem, 0); emit_insn (gen_movti (smem, reg)); } /* Return TRUE if X is MEM which is a struct member reference --- 3853,3868 ---- } if (GET_MODE_SIZE (mode) < 4 && scalar) ! emit_insn (gen_ashlti3 ! (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode)))); ! smem = change_address (ops[0], TImode, copy_rtx (addr)); /* We can't use the previous alias set because the memory has changed size and can potentially overlap objects of other types. */ set_mem_alias_set (smem, 0); emit_insn (gen_movti (smem, reg)); + return 1; } /* Return TRUE if X is MEM which is a struct member reference *************** fix_range (const char *const_str) *** 3905,3941 **** } } - int - spu_valid_move (rtx * ops) - { - enum machine_mode mode = GET_MODE (ops[0]); - if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode)) - return 0; - - /* init_expr_once tries to recog against load and store insns to set - the direct_load[] and direct_store[] arrays. We always want to - consider those loads and stores valid. init_expr_once is called in - the context of a dummy function which does not have a decl. */ - if (cfun->decl == 0) - return 1; - - /* Don't allows loads/stores which would require more than 1 insn. - During and after reload we assume loads and stores only take 1 - insn. */ - if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed) - { - if (GET_CODE (ops[0]) == MEM - && (GET_MODE_SIZE (mode) < 4 - || !(store_with_one_insn_p (ops[0]) - || mem_is_padded_component_ref (ops[0])))) - return 0; - if (GET_CODE (ops[1]) == MEM - && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1]))) - return 0; - } - return 1; - } - /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that can be generated using the fsmbi instruction. */ int --- 3961,3966 ---- *************** spu_sms_res_mii (struct ddg *g) *** 5577,5588 **** void spu_init_expanders (void) ! { ! /* HARD_FRAME_REGISTER is only 128 bit aligned when ! * frame_pointer_needed is true. We don't know that until we're ! * expanding the prologue. */ if (cfun) ! REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; } static enum machine_mode --- 5602,5627 ---- void spu_init_expanders (void) ! { if (cfun) ! { ! rtx r0, r1; ! /* HARD_FRAME_REGISTER is only 128 bit aligned when ! frame_pointer_needed is true. We don't know that until we're ! expanding the prologue. */ ! REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; ! ! /* A number of passes use LAST_VIRTUAL_REGISTER+1 and ! LAST_VIRTUAL_REGISTER+2 to test the back-end. We want to ! handle those cases specially, so we reserve those two registers ! here by generating them. */ ! r0 = gen_reg_rtx (SImode); ! r1 = gen_reg_rtx (SImode); ! mark_reg_pointer (r0, 128); ! mark_reg_pointer (r1, 128); ! gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1 ! && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2); ! } } static enum machine_mode diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/spu.h gcc-4_3/gcc/config/spu/spu.h *** gcc-4_3-orig/gcc/config/spu/spu.h 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/spu.h 2008-09-11 13:45:00.000000000 +0200 *************** enum reg_class { *** 254,259 **** --- 254,264 ---- #define INT_REG_OK_FOR_BASE_P(X,STRICT) \ ((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X)))) + #define REG_ALIGN(X) \ + (REG_POINTER(X) \ + ? REGNO_POINTER_ALIGN (ORIGINAL_REGNO (X)) \ + : 0) + #define PREFERRED_RELOAD_CLASS(X,CLASS) (CLASS) #define CLASS_MAX_NREGS(CLASS, MODE) \ *************** targetm.resolve_overloaded_builtin = spu *** 439,445 **** #endif #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ ! { if (spu_legitimate_address (MODE, X, REG_OK_STRICT_FLAG)) \ goto ADDR; \ } --- 444,450 ---- #endif #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ ! { if (spu_legitimate_address (MODE, X, REG_OK_STRICT_FLAG, 0)) \ goto ADDR; \ } *************** targetm.resolve_overloaded_builtin = spu *** 633,635 **** --- 638,642 ---- extern GTY(()) rtx spu_compare_op0; extern GTY(()) rtx spu_compare_op1; + #define SPLIT_BEFORE_CSE2 1 + diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/spu.md gcc-4_3/gcc/config/spu/spu.md *** gcc-4_3-orig/gcc/config/spu/spu.md 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/spu.md 2008-09-11 13:45:00.000000000 +0200 *************** *** 273,280 **** (define_split [(set (match_operand 0 "spu_reg_operand") (match_operand 1 "immediate_operand"))] ! ! "" [(set (match_dup 0) (high (match_dup 1))) (set (match_dup 0) --- 273,279 ---- (define_split [(set (match_operand 0 "spu_reg_operand") (match_operand 1 "immediate_operand"))] ! "split0_completed" [(set (match_dup 0) (high (match_dup 1))) (set (match_dup 0) *************** *** 311,319 **** ;; move internal (define_insn "_mov<mode>" ! [(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m") (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))] ! "spu_valid_move (operands)" "@ ori\t%0,%1,0 il%s1\t%0,%S1 --- 310,319 ---- ;; move internal (define_insn "_mov<mode>" ! [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m") (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))] ! "register_operand(operands[0], <MODE>mode) ! || register_operand(operands[1], <MODE>mode)" "@ ori\t%0,%1,0 il%s1\t%0,%S1 *************** *** 331,339 **** "iohl\t%0,%2@l") (define_insn "_movdi" ! [(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m") (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))] ! "spu_valid_move (operands)" "@ ori\t%0,%1,0 il%d1\t%0,%D1 --- 331,340 ---- "iohl\t%0,%2@l") (define_insn "_movdi" ! [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m") (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))] ! "register_operand(operands[0], DImode) ! || register_operand(operands[1], DImode)" "@ ori\t%0,%1,0 il%d1\t%0,%D1 *************** *** 344,352 **** [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) (define_insn "_movti" ! [(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m") (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))] ! "spu_valid_move (operands)" "@ ori\t%0,%1,0 il%t1\t%0,%T1 --- 345,354 ---- [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) (define_insn "_movti" ! [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m") (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))] ! "register_operand(operands[0], TImode) ! || register_operand(operands[1], TImode)" "@ ori\t%0,%1,0 il%t1\t%0,%T1 *************** *** 356,384 **** stq%p0\t%1,%0" [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) ! (define_insn_and_split "load" ! [(set (match_operand 0 "spu_reg_operand" "=r") ! (match_operand 1 "memory_operand" "m")) ! (clobber (match_operand:TI 2 "spu_reg_operand" "=&r")) ! (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))] ! "GET_MODE(operands[0]) == GET_MODE(operands[1])" ! "#" ! "" [(set (match_dup 0) (match_dup 1))] ! { spu_split_load(operands); DONE; }) ! (define_insn_and_split "store" ! [(set (match_operand 0 "memory_operand" "=m") ! (match_operand 1 "spu_reg_operand" "r")) ! (clobber (match_operand:TI 2 "spu_reg_operand" "=&r")) ! (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))] ! "GET_MODE(operands[0]) == GET_MODE(operands[1])" ! "#" ! "" [(set (match_dup 0) (match_dup 1))] ! { spu_split_store(operands); DONE; }) ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d --- 358,382 ---- stq%p0\t%1,%0" [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) ! (define_split ! [(set (match_operand 0 "spu_reg_operand") ! (match_operand 1 "memory_operand"))] ! "GET_MODE(operands[0]) == GET_MODE(operands[1]) && !split0_completed" [(set (match_dup 0) (match_dup 1))] ! { if (spu_split_load(operands)) ! DONE; ! }) ! (define_split ! [(set (match_operand 0 "memory_operand") ! (match_operand 1 "spu_reg_operand"))] ! "GET_MODE(operands[0]) == GET_MODE(operands[1]) && !split0_completed" [(set (match_dup 0) (match_dup 1))] ! { if (spu_split_store(operands)) ! DONE; ! }) ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d diff -crNp -x .svn gcc-4_3-orig/gcc/config/spu/spu-protos.h gcc-4_3/gcc/config/spu/spu-protos.h *** gcc-4_3-orig/gcc/config/spu/spu-protos.h 2008-09-11 13:44:34.000000000 +0200 --- gcc-4_3/gcc/config/spu/spu-protos.h 2008-09-10 20:09:59.000000000 +0200 *************** extern int arith_immediate_p (rtx op, en *** 54,60 **** extern int spu_constant_address_p (rtx x); extern int spu_legitimate_constant_p (rtx x); extern int spu_legitimate_address (enum machine_mode mode, rtx x, ! int reg_ok_strict); extern rtx spu_legitimize_address (rtx x, rtx oldx, enum machine_mode mode); extern int spu_initial_elimination_offset (int from, int to); extern rtx spu_function_value (const_tree type, const_tree func); --- 54,60 ---- extern int spu_constant_address_p (rtx x); extern int spu_legitimate_constant_p (rtx x); extern int spu_legitimate_address (enum machine_mode mode, rtx x, ! int reg_ok_strict, int for_split); extern rtx spu_legitimize_address (rtx x, rtx oldx, enum machine_mode mode); extern int spu_initial_elimination_offset (int from, int to); extern rtx spu_function_value (const_tree type, const_tree func); *************** extern void spu_setup_incoming_varargs ( *** 64,74 **** tree type, int *pretend_size, int no_rtl); extern void spu_conditional_register_usage (void); - extern int aligned_mem_p (rtx mem); extern int spu_expand_mov (rtx * ops, enum machine_mode mode); ! extern void spu_split_load (rtx * ops); ! extern void spu_split_store (rtx * ops); ! extern int spu_valid_move (rtx * ops); extern int fsmbi_const_p (rtx x); extern int cpat_const_p (rtx x, enum machine_mode mode); extern rtx gen_cpat_const (rtx * ops); --- 64,72 ---- tree type, int *pretend_size, int no_rtl); extern void spu_conditional_register_usage (void); extern int spu_expand_mov (rtx * ops, enum machine_mode mode); ! extern int spu_split_load (rtx * ops); ! extern int spu_split_store (rtx * ops); extern int fsmbi_const_p (rtx x); extern int cpat_const_p (rtx x, enum machine_mode mode); extern rtx gen_cpat_const (rtx * ops); diff -crNp -x .svn gcc-4_3-orig/gcc/doc/tm.texi gcc-4_3/gcc/doc/tm.texi *** gcc-4_3-orig/gcc/doc/tm.texi 2008-09-11 13:44:35.000000000 +0200 --- gcc-4_3/gcc/doc/tm.texi 2008-09-11 13:45:01.000000000 +0200 *************** to the functions in @file{libgcc} that p *** 10372,10374 **** --- 10372,10386 ---- call stack unwinding. It is used in declarations in @file{unwind-generic.h} and the associated definitions of those functions. @end defmac + + @defmac SPLIT_BEFORE_CSE2 + This macro determines whether to use an additional split pass before the + second CSE pass. @code{split0_completed} will be set after this pass is + completed. + + For example, the Cell SPU target uses this for better optimization of + the multiple instructions required to do simple loads and stores. The + optimizations before this pass work better on simple memory + instructions, and the optimizations right after this pass (e.g., CSE and + combine) are be able to optimize the split instructions. + @end defmac diff -crNp -x .svn gcc-4_3-orig/gcc/final.c gcc-4_3/gcc/final.c *** gcc-4_3-orig/gcc/final.c 2008-09-11 13:44:35.000000000 +0200 --- gcc-4_3/gcc/final.c 2008-09-10 20:09:59.000000000 +0200 *************** rest_of_clean_state (void) *** 4238,4243 **** --- 4238,4246 ---- #ifdef STACK_REGS regstack_completed = 0; #endif + #ifdef SPLIT_BEFORE_CSE2 + split0_completed = 0; + #endif /* Clear out the insn_length contents now that they are no longer valid. */ diff -crNp -x .svn gcc-4_3-orig/gcc/passes.c gcc-4_3/gcc/passes.c *** gcc-4_3-orig/gcc/passes.c 2008-09-11 13:44:35.000000000 +0200 --- gcc-4_3/gcc/passes.c 2008-09-10 20:09:59.000000000 +0200 *************** init_optimization_passes (void) *** 715,720 **** --- 715,721 ---- } NEXT_PASS (pass_web); NEXT_PASS (pass_jump_bypass); + NEXT_PASS (pass_split_before_cse2); NEXT_PASS (pass_cse2); NEXT_PASS (pass_rtl_dse1); NEXT_PASS (pass_rtl_fwprop_addr); diff -crNp -x .svn gcc-4_3-orig/gcc/recog.c gcc-4_3/gcc/recog.c *** gcc-4_3-orig/gcc/recog.c 2008-09-11 13:44:35.000000000 +0200 --- gcc-4_3/gcc/recog.c 2008-09-10 20:09:59.000000000 +0200 *************** int reload_completed; *** 94,99 **** --- 94,104 ---- /* Nonzero after thread_prologue_and_epilogue_insns has run. */ int epilogue_completed; + #ifdef SPLIT_BEFORE_CSE2 + /* Nonzero after split0 pass has run. */ + int split0_completed; + #endif + /* Initialize data used by the function `recog'. This must be called once in the compilation of a function before any insn recognition may be done in the function. */ *************** struct tree_opt_pass pass_split_for_shor *** 3497,3500 **** --- 3502,3541 ---- 0 /* letter */ }; + static bool + gate_handle_split_before_cse2 (void) + { + #ifdef SPLIT_BEFORE_CSE2 + return SPLIT_BEFORE_CSE2; + #else + return 0; + #endif + } + + static unsigned int + rest_of_handle_split_before_cse2 (void) + { + #ifdef SPLIT_BEFORE_CSE2 + split_all_insns_noflow (); + split0_completed = 1; + #endif + return 0; + } + + struct tree_opt_pass pass_split_before_cse2 = + { + "split0", /* name */ + gate_handle_split_before_cse2, /* gate */ + rest_of_handle_split_before_cse2, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ + }; diff -crNp -x .svn gcc-4_3-orig/gcc/rtl.h gcc-4_3/gcc/rtl.h *** gcc-4_3-orig/gcc/rtl.h 2008-09-11 13:44:36.000000000 +0200 --- gcc-4_3/gcc/rtl.h 2008-09-10 20:09:59.000000000 +0200 *************** extern int reload_completed; *** 1997,2002 **** --- 1997,2007 ---- /* Nonzero after thread_prologue_and_epilogue_insns has run. */ extern int epilogue_completed; + #ifdef SPLIT_BEFORE_CSE2 + /* Nonzero after the split0 pass has completed. */ + extern int split0_completed; + #endif + /* Set to 1 while reload_as_needed is operating. Required by some machines to handle any generated moves differently. */ diff -crNp -x .svn gcc-4_3-orig/gcc/testsuite/gcc.target/spu/split0-1.c gcc-4_3/gcc/testsuite/gcc.target/spu/split0-1.c *** gcc-4_3-orig/gcc/testsuite/gcc.target/spu/split0-1.c 1970-01-01 01:00:00.000000000 +0100 --- gcc-4_3/gcc/testsuite/gcc.target/spu/split0-1.c 2008-09-10 20:09:59.000000000 +0200 *************** *** 0 **** --- 1,17 ---- + /* Make sure there are only 2 loads. */ + /* { dg-do compile { target spu-*-* } } */ + /* { dg-options "-O2" } */ + /* { dg-final { scan-assembler-times "lqd \\$\[0-9\]+,0\\(\\$\[0-9\]+\\)" 1 } } */ + /* { dg-final { scan-assembler-times "lqd \\$\[0-9\]+,16\\(\\$\[0-9\]+\\)" 1 } } */ + /* { dg-final { scan-assembler-times "lq\[dx\]" 2 } } */ + + struct __attribute__ ((__aligned__(16))) S { + int a, b, c, d; + int e, f, g, h; + }; + + int + f(struct S *s) + { + return s->a + s->b + s->c + s->d + s->e + s->f + s->g + s->h; + } diff -crNp -x .svn gcc-4_3-orig/gcc/tree-pass.h gcc-4_3/gcc/tree-pass.h *** gcc-4_3-orig/gcc/tree-pass.h 2008-09-11 13:44:36.000000000 +0200 --- gcc-4_3/gcc/tree-pass.h 2008-09-10 20:09:59.000000000 +0200 *************** extern struct tree_opt_pass pass_rtl_dol *** 385,390 **** --- 385,391 ---- extern struct tree_opt_pass pass_rtl_loop_done; extern struct tree_opt_pass pass_web; + extern struct tree_opt_pass pass_split_before_cse2; extern struct tree_opt_pass pass_cse2; extern struct tree_opt_pass pass_df_initialize_opt; extern struct tree_opt_pass pass_df_initialize_no_opt;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor