Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
1103-Optimize-division-by-powers-of-two.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1103-Optimize-division-by-powers-of-two.patch of Package erlang
From 16dacd03bd0334f09d52383513f7bd9e45f1d6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Sat, 29 Jul 2023 14:17:18 +0200 Subject: [PATCH 3/7] Optimize division by powers of two We used to replace division by a power of two with a right shift only when the dividend was known to be a positive integer. Extend the implementation to do right shift when the range of the dividend is unknown. --- erts/emulator/beam/jit/arm/beam_asm.hpp | 9 ++ erts/emulator/beam/jit/arm/instr_arith.cpp | 159 +++++++++++++++------ erts/emulator/beam/jit/x86/instr_arith.cpp | 55 ++++++- erts/emulator/test/small_SUITE.erl | 5 +- 4 files changed, 181 insertions(+), 47 deletions(-) diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp index ab162f951a..38f1c6875f 100644 --- a/erts/emulator/beam/jit/arm/beam_asm.hpp +++ b/erts/emulator/beam/jit/arm/beam_asm.hpp @@ -1128,6 +1128,15 @@ protected: const a64::Gp rhs_reg, const Label next); + void emit_div_rem_literal(Sint divisor, + const ArgSource &Dividend, + arm::Gp dividend, + arm::Gp quotient, + arm::Gp remainder, + const Label &generic, + bool need_div, + bool need_rem); + void emit_div_rem(const ArgLabel &Fail, const ArgSource &LHS, const ArgSource &RHS, diff --git a/erts/emulator/beam/jit/arm/instr_arith.cpp b/erts/emulator/beam/jit/arm/instr_arith.cpp index 8ca898b675..a14ad5cbaf 100644 --- a/erts/emulator/beam/jit/arm/instr_arith.cpp +++ b/erts/emulator/beam/jit/arm/instr_arith.cpp @@ -853,6 +853,97 @@ void BeamGlobalAssembler::emit_int_div_rem_body_shared() { } } +void BeamModuleAssembler::emit_div_rem_literal(Sint divisor, + const ArgSource &Dividend, + arm::Gp dividend, + arm::Gp quotient, + arm::Gp remainder, + const Label &generic, + bool need_div, + bool need_rem) { + arm::Gp small_tag = TMP6; + bool small_dividend = !generic.isValid(); + + ASSERT(divisor != (Sint)0); + + if (!small_dividend) { + a.and_(small_tag, dividend, imm(_TAG_IMMED1_MASK)); + a.cmp(small_tag, imm(_TAG_IMMED1_SMALL)); + a.b_ne(generic); + } + + if (Support::isPowerOf2(divisor)) { + arm::Gp original_dividend = dividend; + int shift = Support::ctz<Eterm>(divisor); + + if (need_div && small_dividend) { + mov_imm(small_tag, _TAG_IMMED1_SMALL); + } + + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + if (std::get<0>(getClampedRange(Dividend)) >= 0) { + /* Positive dividend. */ + if (need_div) { + comment("optimized div by replacing with right shift"); + if (need_rem && quotient == dividend) { + original_dividend = TMP5; + a.mov(original_dividend, dividend); + } + a.orr(quotient, small_tag, dividend, arm::lsr(shift)); + } + if (need_rem) { + auto mask = Support::lsbMask<Uint>(shift + _TAG_IMMED1_SIZE); + comment("optimized rem by replacing with masking"); + a.and_(remainder, original_dividend, imm(mask)); + } + } else { + /* Negative dividend. */ + if (need_div) { + comment("optimized div by replacing with right shift"); + } + if (divisor == 2) { + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.add(TMP3, dividend, dividend, arm::lsr(63)); + } else { + add(TMP1, dividend, (divisor - 1) << _TAG_IMMED1_SIZE); + a.cmp(dividend, imm(0)); + a.csel(TMP3, TMP1, dividend, imm(arm::CondCode::kLT)); + } + if (need_div) { + if (need_rem && quotient == dividend) { + original_dividend = TMP5; + a.mov(original_dividend, dividend); + } + a.orr(quotient, small_tag, TMP3, arm::asr(shift)); + } + if (need_rem) { + Uint mask = (Uint)-1 << (shift + _TAG_IMMED1_SIZE); + comment("optimized rem by replacing with subtraction"); + a.and_(TMP1, TMP3, imm(mask)); + a.sub(remainder, original_dividend, TMP1); + } + } + } else { + a.asr(TMP1, dividend, imm(_TAG_IMMED1_SIZE)); + mov_imm(TMP2, divisor); + a.sdiv(quotient, TMP1, TMP2); + if (need_rem) { + a.msub(remainder, quotient, TMP2, TMP1); + } + + if (small_dividend) { + mov_imm(small_tag, _TAG_IMMED1_SMALL); + } + const arm::Shift tagShift = arm::lsl(_TAG_IMMED1_SIZE); + if (need_div) { + a.orr(quotient, small_tag, quotient, tagShift); + } + if (need_rem) { + a.orr(remainder, small_tag, remainder, tagShift); + } + } +} + void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, const ArgSource &LHS, const ArgSource &RHS, @@ -865,52 +956,26 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, if (RHS.isSmall()) { divisor = RHS.as<ArgSmall>().getSigned(); + if (divisor == -1) { + divisor = 0; + } } - if (always_small(LHS) && divisor != (Sint)0 && divisor != (Sint)-1) { + if (always_small(LHS) && divisor != 0) { auto lhs = load_source(LHS, ARG3); auto quotient = init_destination(Quotient, ARG1); auto remainder = init_destination(Remainder, ARG2); + Label invalidLabel; /* Intentionally not initialized */ comment("skipped test for smalls operands and overflow"); - if (Support::isPowerOf2(divisor) && - std::get<0>(getClampedRange(LHS)) >= 0) { - int trailing_bits = Support::ctz<Eterm>(divisor); - arm::Gp LHS_reg = lhs.reg; - if (need_div) { - comment("optimized div by replacing with right shift"); - ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); - if (need_rem && quotient.reg == lhs.reg) { - LHS_reg = TMP1; - a.mov(LHS_reg, lhs.reg); - } - a.lsr(quotient.reg, lhs.reg, imm(trailing_bits)); - a.orr(quotient.reg, quotient.reg, imm(_TAG_IMMED1_SMALL)); - } - if (need_rem) { - comment("optimized rem by replacing with masking"); - auto mask = Support::lsbMask<Uint>(trailing_bits + - _TAG_IMMED1_SIZE); - a.and_(remainder.reg, LHS_reg, imm(mask)); - } - } else { - a.asr(TMP1, lhs.reg, imm(_TAG_IMMED1_SIZE)); - mov_imm(TMP2, divisor); - a.sdiv(quotient.reg, TMP1, TMP2); - if (need_rem) { - a.msub(remainder.reg, quotient.reg, TMP2, TMP1); - } - - mov_imm(TMP3, _TAG_IMMED1_SMALL); - const arm::Shift tagShift = arm::lsl(_TAG_IMMED1_SIZE); - if (need_div) { - a.orr(quotient.reg, TMP3, quotient.reg, tagShift); - } - if (need_rem) { - a.orr(remainder.reg, TMP3, remainder.reg, tagShift); - } - } - + emit_div_rem_literal(divisor, + LHS, + lhs.reg, + quotient.reg, + remainder.reg, + invalidLabel, + need_div, + need_rem); if (need_div) { flush_var(quotient); } @@ -918,11 +983,24 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, flush_var(remainder); } } else { + Label generic = a.newLabel(), done = a.newLabel(); auto [lhs, rhs] = load_sources(LHS, ARG2, RHS, ARG3); + if (divisor != (Sint)0) { + emit_div_rem_literal(divisor, + LHS, + lhs.reg, + ARG1, + ARG2, + generic, + need_div, + need_rem); + a.b(done); + } + + a.bind(generic); mov_var(ARG2, lhs); mov_var(ARG3, rhs); - if (Fail.get() != 0) { fragment_call(ga->get_int_div_rem_guard_shared()); a.b_eq(resolve_beam_label(Fail, disp1MB)); @@ -931,6 +1009,7 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, fragment_call(ga->get_int_div_rem_body_shared()); } + a.bind(done); if (need_div) { mov_arg(Quotient, ARG1); } diff --git a/erts/emulator/beam/jit/x86/instr_arith.cpp b/erts/emulator/beam/jit/x86/instr_arith.cpp index fdb021fa7c..56c4eb06d3 100644 --- a/erts/emulator/beam/jit/x86/instr_arith.cpp +++ b/erts/emulator/beam/jit/x86/instr_arith.cpp @@ -652,10 +652,10 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, divisor = RHS.as<ArgSmall>().getSigned(); } - if (divisor != (Sint)0 && divisor != (Sint)-1) { + mov_arg(x86::rax, LHS); + + if (divisor != 0 && divisor != -1) { /* There is no possibility of overflow. */ - a.mov(ARG6, imm(divisor)); - mov_arg(x86::rax, LHS); if (always_small(LHS)) { comment("skipped test for small dividend since it is always small"); need_generic = false; @@ -672,10 +672,9 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, a.short_().jne(generic_div); } - /* Sign-extend and divide. The result is implicitly placed in - * RAX and the remainder in RDX (ARG3). */ if (Support::isPowerOf2(divisor) && std::get<0>(getClampedRange(LHS)) >= 0) { + /* Unsigned integer division. */ int trailing_bits = Support::ctz<Eterm>(divisor); if (need_rem) { @@ -692,8 +691,52 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, a.shr(x86::rax, imm(trailing_bits)); a.or_(x86::rax, imm(_TAG_IMMED1_SMALL)); } + } else if (Support::isPowerOf2(divisor)) { + /* Signed integer division. */ + int shift = Support::ctz<Eterm>(divisor); + Sint offset = (divisor - 1) << _TAG_IMMED1_SIZE; + + if (need_rem) { + a.mov(x86::rdx, x86::rax); + ASSERT(x86::rdx != ARG1); + } + + if (need_div) { + comment("optimized div by replacing with right shift"); + } + + if (divisor == 2) { + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.mov(ARG1, x86::rax); + a.shr(ARG1, imm(63)); + a.add(x86::rax, ARG1); + } else { + if (Support::isInt32(offset)) { + a.lea(ARG1, x86::qword_ptr(x86::rax, offset)); + } else { + a.mov(ARG1, offset); + a.add(ARG1, x86::rax); + } + a.test(x86::rax, x86::rax); + a.cmovs(x86::rax, ARG1); + } + + if (need_rem) { + Uint mask = (Uint)-1 << (shift + _TAG_IMMED1_SIZE); + comment("optimized rem by replacing with subtraction"); + mov_imm(ARG1, mask); + a.and_(ARG1, x86::rax); + a.sub(x86::rdx, ARG1); + } + + if (need_div) { + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.sar(x86::rax, imm(shift)); + a.or_(x86::rax, imm(_TAG_IMMED1_SMALL)); + } } else { comment("divide with inlined code"); + a.mov(ARG6, imm(divisor)); a.sar(x86::rax, imm(_TAG_IMMED1_SIZE)); a.cqo(); a.idiv(ARG6); @@ -723,7 +766,7 @@ void BeamModuleAssembler::emit_div_rem(const ArgLabel &Fail, a.bind(generic_div); if (need_generic) { mov_arg(ARG4, RHS); /* Done first as mov_arg may clobber ARG1 */ - mov_arg(ARG1, LHS); + a.mov(ARG1, x86::rax); if (Fail.get() != 0) { safe_fragment_call(ga->get_int_div_rem_guard_shared()); diff --git a/erts/emulator/test/small_SUITE.erl b/erts/emulator/test/small_SUITE.erl index c8a1b2fbf2..bb4d69e355 100644 --- a/erts/emulator/test/small_SUITE.erl +++ b/erts/emulator/test/small_SUITE.erl @@ -28,7 +28,7 @@ test_bitwise/1, test_bsl/1, element/1, range_optimization/1]). --export([mul_add/0]). +-export([mul_add/0, division/0]). -include_lib("common_test/include/ct.hrl"). @@ -713,6 +713,8 @@ madd(_, _, _, _) -> error. %% Test that the JIT only omits the overflow check when it's safe. +division() -> + [{timetrap, {minutes, 5}}]. division(_Config) -> _ = rand:uniform(), %Seed generator io:format("Seed: ~p", [rand:export_seed()]), @@ -945,6 +947,7 @@ gen_div_function({Name,{A,B}}) -> R = X rem Y, {Q, R}. "). + test_division([{Name,{A,B}}|T], Mod) -> F = fun Mod:Name/3, try -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor