Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:lafenghu
libffi43
s390-long-loop-prediction-1
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File s390-long-loop-prediction-1 of Package libffi43
Index: gcc/config/s390/s390.c =================================================================== --- gcc/config/s390/s390.c.orig 2009-11-20 13:52:36.000000000 +0100 +++ gcc/config/s390/s390.c 2009-11-20 13:52:37.000000000 +0100 @@ -356,6 +356,10 @@ struct machine_function GTY(()) #define REGNO_PAIR_OK(REGNO, MODE) \ (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) +/* That's the read ahead of the dynamic branch prediction unit in + bytes on a z10 CPU. */ +#define Z10_PREDICT_DISTANCE 384 + static enum machine_mode s390_libgcc_cmp_return_mode (void) { @@ -9599,6 +9603,66 @@ s390_optimize_prologue (void) } } +/* On z10 the dynamic branch prediction must see the backward jump in + a window of 384 bytes. If not it falls back to the static + prediction. This function rearranges the loop backward branch in a + way which makes the static prediction always correct. The function + returns true if it added an instruction. */ +static bool +s390_z10_fix_long_loop_prediction (rtx insn) +{ + rtx set = single_set (insn); + rtx code_label, label_ref, new_label; + rtx uncond_jump; + rtx cur_insn; + rtx tmp; + int distance; + + /* This will exclude branch on count and branch on index patterns + since these are correctly statically predicted. */ + if (!set + || SET_DEST (set) != pc_rtx + || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE) + return false; + + label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ? + XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2)); + + gcc_assert (GET_CODE (label_ref) == LABEL_REF); + + code_label = XEXP (label_ref, 0); + + if (INSN_ADDRESSES (INSN_UID (code_label)) == -1 + || INSN_ADDRESSES (INSN_UID (insn)) == -1 + || (INSN_ADDRESSES (INSN_UID (insn)) + - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE)) + return false; + + for (distance = 0, cur_insn = PREV_INSN (insn); + distance < Z10_PREDICT_DISTANCE - 6; + distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn)) + if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn)) + return false; + + new_label = gen_label_rtx (); + uncond_jump = emit_jump_insn_after ( + gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_LABEL_REF (VOIDmode, code_label)), + insn); + emit_label_after (new_label, uncond_jump); + + tmp = XEXP (SET_SRC (set), 1); + XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2); + XEXP (SET_SRC (set), 2) = tmp; + INSN_CODE (insn) = -1; + + XEXP (label_ref, 0) = new_label; + JUMP_LABEL (insn) = new_label; + JUMP_LABEL (uncond_jump) = code_label; + + return true; +} + /* Returns 1 if INSN reads the value of REG for purposes not related to addressing of memory, and 0 otherwise. */ static int @@ -9681,97 +9745,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx * if that register's value is delivered via a bypass, then the pipeline recycles, thereby causing significant performance decline. This function locates such situations and exchanges the two - operands of the compare. */ -static void -s390_z10_optimize_cmp (void) + operands of the compare. The function return true whenever it + added an insn. */ +static bool +s390_z10_optimize_cmp (rtx insn) { - rtx insn, prev_insn, next_insn; - int added_NOPs = 0; + rtx prev_insn, next_insn; + bool insn_added_p = false; + rtx cond, *op0, *op1; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (GET_CODE (PATTERN (insn)) == PARALLEL) { - rtx cond, *op0, *op1; - - if (!INSN_P (insn) || INSN_CODE (insn) <= 0) - continue; - - if (GET_CODE (PATTERN (insn)) == PARALLEL) - { - /* Handle compare and branch and branch on count - instructions. */ - rtx pattern = single_set (insn); - - if (!pattern - || SET_DEST (pattern) != pc_rtx - || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) - continue; + /* Handle compare and branch and branch on count + instructions. */ + rtx pattern = single_set (insn); + + if (!pattern + || SET_DEST (pattern) != pc_rtx + || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) + return false; - cond = XEXP (SET_SRC (pattern), 0); - op0 = &XEXP (cond, 0); - op1 = &XEXP (cond, 1); - } - else if (GET_CODE (PATTERN (insn)) == SET) - { - rtx src, dest; + cond = XEXP (SET_SRC (pattern), 0); + op0 = &XEXP (cond, 0); + op1 = &XEXP (cond, 1); + } + else if (GET_CODE (PATTERN (insn)) == SET) + { + rtx src, dest; - /* Handle normal compare instructions. */ - src = SET_SRC (PATTERN (insn)); - dest = SET_DEST (PATTERN (insn)); + /* Handle normal compare instructions. */ + src = SET_SRC (PATTERN (insn)); + dest = SET_DEST (PATTERN (insn)); - if (!REG_P (dest) - || !CC_REGNO_P (REGNO (dest)) - || GET_CODE (src) != COMPARE) - continue; + if (!REG_P (dest) + || !CC_REGNO_P (REGNO (dest)) + || GET_CODE (src) != COMPARE) + return false; - /* s390_swap_cmp will try to find the conditional - jump when passing NULL_RTX as condition. */ - cond = NULL_RTX; - op0 = &XEXP (src, 0); - op1 = &XEXP (src, 1); - } - else - continue; + /* s390_swap_cmp will try to find the conditional + jump when passing NULL_RTX as condition. */ + cond = NULL_RTX; + op0 = &XEXP (src, 0); + op1 = &XEXP (src, 1); + } + else + return false; - if (!REG_P (*op0) || !REG_P (*op1)) - continue; + if (!REG_P (*op0) || !REG_P (*op1)) + return false; - /* Swap the COMPARE arguments and its mask if there is a - conflicting access in the previous insn. */ - prev_insn = PREV_INSN (insn); + /* Swap the COMPARE arguments and its mask if there is a + conflicting access in the previous insn. */ + prev_insn = PREV_INSN (insn); + if (prev_insn != NULL_RTX && INSN_P (prev_insn) + && reg_referenced_p (*op1, PATTERN (prev_insn))) + s390_swap_cmp (cond, op0, op1, insn); + + /* Check if there is a conflict with the next insn. If there + was no conflict with the previous insn, then swap the + COMPARE arguments and its mask. If we already swapped + the operands, or if swapping them would cause a conflict + with the previous insn, issue a NOP after the COMPARE in + order to separate the two instuctions. */ + next_insn = NEXT_INSN (insn); + if (next_insn != NULL_RTX && INSN_P (next_insn) + && s390_non_addr_reg_read_p (*op1, next_insn)) + { if (prev_insn != NULL_RTX && INSN_P (prev_insn) - && reg_referenced_p (*op1, PATTERN (prev_insn))) - s390_swap_cmp (cond, op0, op1, insn); - - /* Check if there is a conflict with the next insn. If there - was no conflict with the previous insn, then swap the - COMPARE arguments and its mask. If we already swapped - the operands, or if swapping them would cause a conflict - with the previous insn, issue a NOP after the COMPARE in - order to separate the two instuctions. */ - next_insn = NEXT_INSN (insn); - if (next_insn != NULL_RTX && INSN_P (next_insn) - && s390_non_addr_reg_read_p (*op1, next_insn)) + && s390_non_addr_reg_read_p (*op0, prev_insn)) { - if (prev_insn != NULL_RTX && INSN_P (prev_insn) - && s390_non_addr_reg_read_p (*op0, prev_insn)) - { - if (REGNO (*op1) == 0) - emit_insn_after (gen_nop1 (), insn); - else - emit_insn_after (gen_nop (), insn); - added_NOPs = 1; - } + if (REGNO (*op1) == 0) + emit_insn_after (gen_nop1 (), insn); else - s390_swap_cmp (cond, op0, op1, insn); + emit_insn_after (gen_nop (), insn); + insn_added_p = true; } + else + s390_swap_cmp (cond, op0, op1, insn); } - - /* Adjust branches if we added new instructions. */ - if (added_NOPs) - shorten_branches (get_insns ()); + return insn_added_p; } - /* Perform machine-dependent processing. */ static void @@ -9885,10 +9939,33 @@ s390_reorg (void) /* Try to optimize prologue and epilogue further. */ s390_optimize_prologue (); - /* Eliminate z10-specific pipeline recycles related to some compare - instructions. */ + /* Walk over the insns and do some z10 specific changes. */ if (s390_tune == PROCESSOR_2097_Z10) - s390_z10_optimize_cmp (); + { + rtx insn; + bool insn_added_p = false; + + /* The insn lengths and addresses have to be up to date for the + following manipulations. */ + shorten_branches (get_insns ()); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn) || INSN_CODE (insn) <= 0) + continue; + + if (JUMP_P (insn)) + insn_added_p |= s390_z10_fix_long_loop_prediction (insn); + + if (GET_CODE (PATTERN (insn)) == PARALLEL + || GET_CODE (PATTERN (insn)) == SET) + insn_added_p |= s390_z10_optimize_cmp (insn); + } + + /* Adjust branches if we added new instructions. */ + if (insn_added_p) + shorten_branches (get_insns ()); + } } Index: gcc/config/s390/s390.md =================================================================== --- gcc/config/s390/s390.md.orig 2009-11-20 13:52:34.000000000 +0100 +++ gcc/config/s390/s390.md 2009-11-20 13:52:37.000000000 +0100 @@ -1073,6 +1073,64 @@ (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg ; 10 byte for clgr/jg +; And now the same two patterns as above but with a negated CC mask. + +; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr +; The following instructions do a complementary access of their second +; operand (z01 only): crj_c, cgrjc, cr, cgr +(define_insn "*icmp_and_br_signed_<mode>" + [(set (pc) + (if_then_else (match_operator 0 "s390_signed_integer_comparison" + [(match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "nonmemory_operand" "d,C")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" +{ + if (get_attr_length (insn) == 6) + return which_alternative ? + "c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3"; + else + return which_alternative ? + "c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3"; +} + [(set_attr "op_type" "RIE") + (set_attr "type" "branch") + (set_attr "z10prop" "z10_super_c,z10_super") + (set (attr "length") + (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000)) + (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg + ; 10 byte for cgr/jg + +; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr +; The following instructions do a complementary access of their second +; operand (z10 only): clrj, clgrj, clr, clgr +(define_insn "*icmp_and_br_unsigned_<mode>" + [(set (pc) + (if_then_else (match_operator 0 "s390_unsigned_integer_comparison" + [(match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "nonmemory_operand" "d,I")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" +{ + if (get_attr_length (insn) == 6) + return which_alternative ? + "cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3"; + else + return which_alternative ? + "cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3"; +} + [(set_attr "op_type" "RIE") + (set_attr "type" "branch") + (set_attr "z10prop" "z10_super_c,z10_super") + (set (attr "length") + (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000)) + (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg + ; 10 byte for clgr/jg + ;; ;;- Move instructions. ;;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor