Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:24
erlang
2962-Improve-seeding.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2962-Improve-seeding.patch of Package erlang
From f9d11572a323dbb959c776276a04def368424dc8 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen <raimo@erlang.org> Date: Tue, 10 May 2022 14:52:42 +0200 Subject: [PATCH 2/8] Improve seeding The seed function should not produce the same generator state for any two user input seeds. Restrict the user input and use reversible (bijectional) mappings to produce the seed. Suggest using multiply-and-shift for range capping in the documentation and tests. This will be revised in a later commit since it is only a good idea if the high bits are good. --- lib/stdlib/doc/src/rand.xml | 64 ++++++++++++------ lib/stdlib/src/rand.erl | 76 +++++++++++---------- lib/stdlib/test/rand_SUITE.erl | 118 +++++++++++++++++++++++++++++---- 3 files changed, 190 insertions(+), 68 deletions(-) diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml index 1cf2cca6fd..10e2133720 100644 --- a/lib/stdlib/doc/src/rand.xml +++ b/lib/stdlib/doc/src/rand.xml @@ -912,16 +912,12 @@ end.</pre> Because the generator uses a multiplier that is a power of 2 it gets statistical flaws for collision tests and birthday spacings tests in 2 and 3 dimensions, - so the state should be scrambled, to create - an output value. - </p><p> - The quality of the state bits <c><anno>CX1</anno></c> - as a random value is far from good, but if speed is - much more important than these imperfections, the lowest - say 16 bits of the generator state could be used - without scrambling. + and even these caveats apply only to the MWC "digit", + that is the low 32 bits (due to the multiplier) of + the generator state. </p> <p> + To create an output value, the state should be scrambled. Function <seemfa marker="#mwc59_fast_value/1"> <c>mwc59_fast_value</c> @@ -984,14 +980,27 @@ end.</pre> an 8-bit xorshift which masks the statistical imperfecions of the base generator <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa> - enough that the 32 highest bits are of decent quality. + enough that the 32 low bits are of decent quality. + </p> + <p> + Be careful to not accidentaly create a bignum + when handling the value <c><anno>V</anno></c>. </p> <p> To extract a power of two number it is recommended - to shift down the high bits, but for an arbitrary - range a `rem` operation on the whole value - can be used. Be careful to not accidentaly create - a bignum when handling the value <c><anno>V</anno></c>. + to use the high of the decent 32 bits. + </p> + <p> + For an arbitrary range less than 32 bits + a <c>rem</c> operation on the whole value can be used, + but that is a rather slow operation. + </p> + <p> + For a small arbitrary range less than about 16 bits + (to not get too much bias and to avoid bignums) + multiply-and-shift can be used, + which is much faster than using <c>rem</c>: + <c>(Range*(<anno>V</anno> band ((1 bsl 32)-1))) bsr 32</c>. </p> <p> It is not recommended to generate numbers @@ -1012,17 +1021,32 @@ end.</pre> <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa> enough that all 59 bits are of very good quality. </p> + <p> + Be careful to not accidentaly create a bignum + when handling the value <c><anno>V</anno></c>. + </p> <p> To extract a power of two number it is recommended - to shift down the high bits, but for an arbitrary - range a `rem` operation on the whole value - can be used. Be careful to not accidentaly create - a bignum when handling the value <c><anno>V</anno></c>. + to shift down the high bits. + </p> + <p> + For an arbitrary range a <c>rem</c> operation + on the whole value can be used but that is + a rather slow operation. Beware of bias + in the generated numbers if generating in a range + above about 2^30 (getting too close to 2^59). </p> <p> - As usual: beware of bias in the generated numbers if - generating on range too close to 2^59 that is not - a power of 2. + For a small arbitrary range less than about 20 bits + (to not get too much bias and to avoid bignums) + multiply-and-shift can be used, + which is much faster than using <c>rem</c>. + Example for range 1000000; + the range is 20 bits, we use 39 bits from the generator, + adding up to 59 bits, which is not a bignum: + <c>(1000000 * (<anno>V</anno> bsr (59-39))) bsr 39</c>. + <em> + </em> </p> </desc> </func> diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl index adefa8ab89..95fb011160 100644 --- a/lib/stdlib/src/rand.erl +++ b/lib/stdlib/src/rand.erl @@ -1508,6 +1508,18 @@ mwc59(CX0) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P -> X = ?MASK(?MWC59_B, CX), ?MWC59_A * X + C. +%%% %% Verification by equivalent MCG generator +%%% mwc59_r(CX1) -> +%%% (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse +%%% %%% (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward +%%% +%%% mwc59(CX0, 0) -> +%%% CX0; +%%% mwc59(CX0, N) -> +%%% CX1 = mwc59(CX0), +%%% CX0 = mwc59_r(CX1), +%%% mwc59(CX1, N - 1). + -spec mwc59_fast_value(CX :: mwc59_state()) -> V :: 0..?MASK(59). mwc59_fast_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P -> CX = ?MASK(59, CX1), @@ -1526,33 +1538,25 @@ mwc59_float(CX1) -> -spec mwc59_seed() -> CX :: mwc59_state(). mwc59_seed() -> {A1, A2, A3} = default_seed(), - {X1,_} = splitmix64_next(A1), - {X2,_} = splitmix64_next(A2), - {X3,_} = splitmix64_next(A3), - mwc59_seed_state(X1 bxor X2 bxor X3). - --spec mwc59_seed(S :: integer()) -> CX :: mwc59_state(). -mwc59_seed(S) when is_integer(S) -> - {X,_} = splitmix64_next(mod(?BIT(64), S)), - mwc59_seed_state(X). - -mwc59_seed_state(X) -> - %% Ensure non-zero carry and within state range - ?BIT(?MWC59_B) bor ?MASK(58, X). - + X1 = hash57(A1), + X2 = hash57(A2), + X3 = hash57(A3), + ?BIT(58) bor (X1 bxor X2 bxor X3). +-spec mwc59_seed(S :: 0..?MASK(57)) -> CX :: mwc59_state(). +mwc59_seed(S) when is_integer(S), 0 =< S, S =< ?MASK(57) -> + ?BIT(58) bor hash57(S). -%%% %% Verification by equivalent MCG generator -%%% mwc59_r(CX1) -> -%%% (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse -%%% %%% (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward -%%% -%%% mwc59(CX0, 0) -> -%%% CX0; -%%% mwc59(CX0, N) -> -%%% CX1 = mwc59(CX0), -%%% CX0 = mwc59_r(CX1), -%%% mwc59(CX1, N - 1). +%% Constants a'la SplitMix64, MurMurHash, etc. +%% Not that critical, just mix the bits using bijections +%% (reversible mappings) to not have any two user input seeds +%% become the same generator start state. +%% +hash57(X) -> + X0 = ?MASK(57, X), + X1 = ?MASK(57, (X0 bxor (X0 bsr 29)) * 16#151afd7ed558ccd), + X2 = ?MASK(57, (X1 bxor (X1 bsr 29)) * 16#0ceb9fe1a85ec53), + X2 bxor (X2 bsr 29). %% ===================================================================== @@ -1983,17 +1987,17 @@ bc(V, B, N) when B =< V -> N; bc(V, B, N) -> bc(V, B bsr 1, N - 1). -%% Non-negative rem -mod(Q, X) when 0 =< X, X < Q -> - X; -mod(Q, X) -> - Y = X rem Q, - if - Y < 0 -> - Y + Q; - true -> - Y - end. +%%% %% Non-negative rem +%%% mod(Q, X) when 0 =< X, X < Q -> +%%% X; +%%% mod(Q, X) -> +%%% Y = X rem Q, +%%% if +%%% Y < 0 -> +%%% Y + Q; +%%% true -> +%%% Y +%%% end. make_float(S, E, M) -> diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl index 04449a3d29..34da9c25a9 100644 --- a/lib/stdlib/test/rand_SUITE.erl +++ b/lib/stdlib/test/rand_SUITE.erl @@ -211,16 +211,30 @@ api_eq_1(S00) -> %% Verify mwc59 behaviour %% mwc59_api(Config) when is_list(Config) -> - mwc59_api(1, 1000000). + try rand:mwc59_seed(-1) of + CX1 -> + error({bad_return, CX1}) + catch + error : function_clause -> + try rand:mwc59_seed(1 bsl 57) of + CX2 -> + error({bad_return, CX2}) + catch + error : function_clause -> + Seed = 324109835948422043, + Seed = rand:mwc59_seed(1), + mwc59_api(Seed, 1000000) + end + end. mwc59_api(CX0, 0) -> - CX = 216355295181821136, + CX = 394988924775693874, {CX, CX} = {CX0, CX}, V0 = rand:mwc59_fast_value(CX0), - V = 262716604851324112, + V = 446733510867799090, {V, V} = {V0, V}, W0 = rand:mwc59_value(CX0), - W = 528437219928775120, + W = 418709302640385298, {W, W} = {W0, W}, F0 = rand:mwc59_float(CX0), F = (W bsr (59-53)) * (1 / (1 bsl 53)), @@ -1162,11 +1176,88 @@ do_measure(Iterations) -> case (V rem Range) + 1 of X when is_integer(X), 0 < X, X =< Range -> St1 - end + end end end, {exsp,mod}, Iterations, TMarkUniformRange10000, OverheadUniformRange1000), + _ = + measure_1( + fun (_Mod, _State) -> + Range = 10000, + fun (St0) -> + St1 = rand:mwc59(St0), + %% Fixpoint inversion, slightly skewed + case + ( (Range * ((St1 band ((1 bsl 32)-1)))) + bsr 32 ) + + 1 + of + R when is_integer(R), 0 < R, R =< Range -> + St1 + end + end + end, + {mwc59,raw_mas}, Iterations, + TMarkUniformRange10000, OverheadUniformRange1000), + _ = + measure_1( + fun (_Mod, _State) -> + Range = 10000, + fun (St0) -> + St1 = rand:mwc59(St0), + %% Fixpoint inversion, slightly skewed + case + ( (Range * + ((rand:mwc59_fast_value(St1) + band ((1 bsl 32)-1) )) ) + bsr 32 ) + + 1 + of + R when is_integer(R), 0 < R, R =< Range -> + St1 + end + end + end, + {mwc59,fast_mas}, Iterations, + TMarkUniformRange10000, OverheadUniformRange1000), + _ = + measure_1( + fun (_Mod, _State) -> + Range = 10000, % 14 bits + fun (St0) -> + St1 = rand:mwc59(St0), + %% Fixpoint inversion, slightly skewed + case + ( (Range * + (rand:mwc59_value(St1) bsr 14) ) + bsr (59-14) ) + + 1 + of + R when is_integer(R), 0 < R, R =< Range -> + St1 + end + end + end, + {mwc59,value_mas}, Iterations, + TMarkUniformRange10000, OverheadUniformRange1000), + _ = + measure_1( + fun (_Mod, _State) -> + Range = 10000, + fun (St0) -> + {V,St1} = rand:exsp_next(St0), + %% Fixpoint inversion, slightly skewed + case + ((Range * (V bsr 14)) bsr (58-14)) + 1 + of + X when is_integer(X), 0 < X, X =< Range -> + St1 + end + end + end, + {exsp,mas}, Iterations, + TMarkUniformRange10000, OverheadUniformRange1000), _ = measure_1( fun (_Mod, _State) -> @@ -1231,13 +1322,16 @@ do_measure(Iterations) -> Range = 1 bsl 32, fun (St0) -> St1 = rand:mwc59(St0), - case rand:mwc59_fast_value(St1) bsr (59-32) of + case + rand:mwc59_fast_value(St1) + band ((1 bsl 32)-1) + of R when is_integer(R), 0 =< R, R < Range -> St1 end end end, - {mwc59,fast_shift}, Iterations, + {mwc59,fast_mask}, Iterations, TMarkUniform32Bit, OverheadUniform32Bit), _ = measure_1( @@ -1419,7 +1513,7 @@ do_measure(Iterations) -> end end end, - splitmix64_inline, Iterations, + {splitmix64,next}, Iterations, TMarkUniformFullRange, OverheadUniformFullRange), _ = measure_1( @@ -1525,7 +1619,7 @@ do_measure(Iterations) -> end end end, - splitmix64_inline, Iterations, + {splitmix64,next}, Iterations, TMarkUniform64Bit, OverheadUniform64Bit), %% ByteSize = 16, % At about 100 bytes crypto_bytes breaks even to exsss @@ -1745,8 +1839,6 @@ measure_init(Alg) -> {?MODULE, undefined}; system_time -> {?MODULE, undefined}; - splitmix64_inline -> - {rand, erlang:unique_integer()}; procdict -> {rand, rand:seed(exsss)}; {Name, Tag} -> @@ -1758,7 +1850,9 @@ measure_init(Alg) -> {rand, rand:mwc59_seed()}; exsp -> {_, S} = rand:seed_s(exsp), - {rand, S} + {rand, S}; + splitmix64 -> + {rand, erlang:unique_integer()} end; _ -> {rand, rand:seed_s(Alg)} -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor