File 2962-Improve-seeding.patch of Package erlang

Overview Repositories Revisions Requests Users Attributes Meta

File 2962-Improve-seeding.patch of Package erlang

From f9d11572a323dbb959c776276a04def368424dc8 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Tue, 10 May 2022 14:52:42 +0200
Subject: [PATCH 2/8] Improve seeding

The seed function should not produce the same generator state
for any two user input seeds.  Restrict the user input and use
reversible (bijectional) mappings to produce the seed.

Suggest using multiply-and-shift for range capping in the
documentation and tests.  This will be revised in a later
commit since it is only a good idea if the high bits are good.
---
 lib/stdlib/doc/src/rand.xml    |  64 ++++++++++++------
 lib/stdlib/src/rand.erl        |  76 +++++++++++----------
 lib/stdlib/test/rand_SUITE.erl | 118 +++++++++++++++++++++++++++++----
 3 files changed, 190 insertions(+), 68 deletions(-)

diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 1cf2cca6fd..10e2133720 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -912,16 +912,12 @@ end.</pre>
 Because the generator uses a multiplier that is
 a power of 2 it gets statistical flaws for collision tests
 and birthday spacings tests in 2 and 3 dimensions,
- so the state should be scrambled, to create
- an output value.
- 
- The quality of the state bits <c><anno>CX1</anno></c>
- as a random value is far from good, but if speed is
- much more important than these imperfections, the lowest
- say 16 bits of the generator state could be used
- without scrambling.
+ and even these caveats apply only to the MWC "digit",
+ that is the low 32 bits (due to the multiplier) of
+ the generator state.
 
 
+ To create an output value, the state should be scrambled.
 Function
 <seemfa marker="#mwc59_fast_value/1">
 <c>mwc59_fast_value</c>
@@ -984,14 +980,27 @@ end.</pre>
 an 8-bit xorshift which masks
 the statistical imperfecions of the base generator
 <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
- enough that the 32 highest bits are of decent quality.
+ enough that the 32 low bits are of decent quality.
+ 
+ 
+ Be careful to not accidentaly create a bignum
+ when handling the value <c><anno>V</anno></c>.
 
 
 To extract a power of two number it is recommended
- to shift down the high bits, but for an arbitrary
- range a `rem` operation on the whole value
- can be used. Be careful to not accidentaly create
- a bignum when handling the value <c><anno>V</anno></c>.
+ to use the high of the decent 32 bits.
+ 
+ 
+ For an arbitrary range less than 32 bits
+ a <c>rem</c> operation on the whole value can be used,
+ but that is a rather slow operation.
+ 
+ 
+ For a small arbitrary range less than about 16 bits
+ (to not get too much bias and to avoid bignums)
+ multiply-and-shift can be used,
+ which is much faster than using <c>rem</c>:
+ <c>(Range*(<anno>V</anno>&nbsp;band&nbsp;((1&nbsp;bsl&nbsp;32)-1)))&nbsp;bsr&nbsp;32</c>.
 
 
 It is not recommended to generate numbers
@@ -1012,17 +1021,32 @@ end.</pre>
 <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
 enough that all 59 bits are of very good quality.
 
+ 
+ Be careful to not accidentaly create a bignum
+ when handling the value <c><anno>V</anno></c>.
+ 
 
 To extract a power of two number it is recommended
- to shift down the high bits, but for an arbitrary
- range a `rem` operation on the whole value
- can be used. Be careful to not accidentaly create
- a bignum when handling the value <c><anno>V</anno></c>.
+ to shift down the high bits.
+ 
+ 
+ For an arbitrary range a <c>rem</c> operation
+ on the whole value can be used but that is
+ a rather slow operation. Beware of bias
+ in the generated numbers if generating in a range
+ above about 2^30 (getting too close to 2^59).
 
 
- As usual: beware of bias in the generated numbers if
- generating on range too close to 2^59 that is not
- a power of 2.
+ For a small arbitrary range less than about 20 bits
+ (to not get too much bias and to avoid bignums)
+ multiply-and-shift can be used,
+ which is much faster than using <c>rem</c>.
+ Example for range 1000000;
+ the range is 20 bits, we use 39 bits from the generator,
+ adding up to 59 bits, which is not a bignum:
+ <c>(1000000&nbsp;*&nbsp;(<anno>V</anno>&nbsp;bsr&nbsp;(59-39)))&nbsp;bsr&nbsp;39</c>.
+ 
+ 
 
 </desc>
 </func>
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index adefa8ab89..95fb011160 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -1508,6 +1508,18 @@ mwc59(CX0) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
 X = ?MASK(?MWC59_B, CX),
 ?MWC59_A * X + C.
 
+%%% %% Verification by equivalent MCG generator
+%%% mwc59_r(CX1) ->
+%%% (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse
+%%% %%% (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward
+%%%
+%%% mwc59(CX0, 0) ->
+%%% CX0;
+%%% mwc59(CX0, N) ->
+%%% CX1 = mwc59(CX0),
+%%% CX0 = mwc59_r(CX1),
+%%% mwc59(CX1, N - 1).
+
 -spec mwc59_fast_value(CX :: mwc59_state()) -> V :: 0..?MASK(59).
 mwc59_fast_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
 CX = ?MASK(59, CX1),
@@ -1526,33 +1538,25 @@ mwc59_float(CX1) ->
 -spec mwc59_seed() -> CX :: mwc59_state().
 mwc59_seed() ->
 {A1, A2, A3} = default_seed(),
- {X1,_} = splitmix64_next(A1),
- {X2,_} = splitmix64_next(A2),
- {X3,_} = splitmix64_next(A3),
- mwc59_seed_state(X1 bxor X2 bxor X3).
-
--spec mwc59_seed(S :: integer()) -> CX :: mwc59_state().
-mwc59_seed(S) when is_integer(S) ->
- {X,_} = splitmix64_next(mod(?BIT(64), S)),
- mwc59_seed_state(X).
-
-mwc59_seed_state(X) ->
- %% Ensure non-zero carry and within state range
- ?BIT(?MWC59_B) bor ?MASK(58, X).
-
+ X1 = hash57(A1),
+ X2 = hash57(A2),
+ X3 = hash57(A3),
+ ?BIT(58) bor (X1 bxor X2 bxor X3).
 
+-spec mwc59_seed(S :: 0..?MASK(57)) -> CX :: mwc59_state().
+mwc59_seed(S) when is_integer(S), 0 =< S, S =< ?MASK(57) ->
+ ?BIT(58) bor hash57(S).
 
-%%% %% Verification by equivalent MCG generator
-%%% mwc59_r(CX1) ->
-%%% (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse
-%%% %%% (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward
-%%%
-%%% mwc59(CX0, 0) ->
-%%% CX0;
-%%% mwc59(CX0, N) ->
-%%% CX1 = mwc59(CX0),
-%%% CX0 = mwc59_r(CX1),
-%%% mwc59(CX1, N - 1).
+%% Constants a'la SplitMix64, MurMurHash, etc.
+%% Not that critical, just mix the bits using bijections
+%% (reversible mappings) to not have any two user input seeds
+%% become the same generator start state.
+%%
+hash57(X) ->
+ X0 = ?MASK(57, X),
+ X1 = ?MASK(57, (X0 bxor (X0 bsr 29)) * 16#151afd7ed558ccd),
+ X2 = ?MASK(57, (X1 bxor (X1 bsr 29)) * 16#0ceb9fe1a85ec53),
+ X2 bxor (X2 bsr 29).
 
 
 %% =====================================================================
@@ -1983,17 +1987,17 @@ bc(V, B, N) when B =< V -> N;
 bc(V, B, N) -> bc(V, B bsr 1, N - 1).
 
 
-%% Non-negative rem
-mod(Q, X) when 0 =< X, X < Q ->
- X;
-mod(Q, X) ->
- Y = X rem Q,
- if
- Y < 0 ->
- Y + Q;
- true ->
- Y
- end.
+%%% %% Non-negative rem
+%%% mod(Q, X) when 0 =< X, X < Q ->
+%%% X;
+%%% mod(Q, X) ->
+%%% Y = X rem Q,
+%%% if
+%%% Y < 0 ->
+%%% Y + Q;
+%%% true ->
+%%% Y
+%%% end.
 
 
 make_float(S, E, M) ->
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index 04449a3d29..34da9c25a9 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -211,16 +211,30 @@ api_eq_1(S00) ->
 %% Verify mwc59 behaviour
 %%
 mwc59_api(Config) when is_list(Config) ->
- mwc59_api(1, 1000000).
+ try rand:mwc59_seed(-1) of
+ CX1 ->
+ error({bad_return, CX1})
+ catch
+ error : function_clause ->
+ try rand:mwc59_seed(1 bsl 57) of
+ CX2 ->
+ error({bad_return, CX2})
+ catch
+ error : function_clause ->
+ Seed = 324109835948422043,
+ Seed = rand:mwc59_seed(1),
+ mwc59_api(Seed, 1000000)
+ end
+ end.
 
 mwc59_api(CX0, 0) ->
- CX = 216355295181821136,
+ CX = 394988924775693874,
 {CX, CX} = {CX0, CX},
 V0 = rand:mwc59_fast_value(CX0),
- V = 262716604851324112,
+ V = 446733510867799090,
 {V, V} = {V0, V},
 W0 = rand:mwc59_value(CX0),
- W = 528437219928775120,
+ W = 418709302640385298,
 {W, W} = {W0, W},
 F0 = rand:mwc59_float(CX0),
 F = (W bsr (59-53)) * (1 / (1 bsl 53)),
@@ -1162,11 +1176,88 @@ do_measure(Iterations) ->
 case (V rem Range) + 1 of
 X when is_integer(X), 0 < X, X =< Range ->
 St1
- end
+ end
 end
 end,
 {exsp,mod}, Iterations,
 TMarkUniformRange10000, OverheadUniformRange1000),
+ _ =
+ measure_1(
+ fun (_Mod, _State) ->
+ Range = 10000,
+ fun (St0) ->
+ St1 = rand:mwc59(St0),
+ %% Fixpoint inversion, slightly skewed
+ case
+ ( (Range * ((St1 band ((1 bsl 32)-1))))
+ bsr 32 )
+ + 1
+ of
+ R when is_integer(R), 0 < R, R =< Range ->
+ St1
+ end
+ end
+ end,
+ {mwc59,raw_mas}, Iterations,
+ TMarkUniformRange10000, OverheadUniformRange1000),
+ _ =
+ measure_1(
+ fun (_Mod, _State) ->
+ Range = 10000,
+ fun (St0) ->
+ St1 = rand:mwc59(St0),
+ %% Fixpoint inversion, slightly skewed
+ case
+ ( (Range *
+ ((rand:mwc59_fast_value(St1)
+ band ((1 bsl 32)-1) )) )
+ bsr 32 )
+ + 1
+ of
+ R when is_integer(R), 0 < R, R =< Range ->
+ St1
+ end
+ end
+ end,
+ {mwc59,fast_mas}, Iterations,
+ TMarkUniformRange10000, OverheadUniformRange1000),
+ _ =
+ measure_1(
+ fun (_Mod, _State) ->
+ Range = 10000, % 14 bits
+ fun (St0) ->
+ St1 = rand:mwc59(St0),
+ %% Fixpoint inversion, slightly skewed
+ case
+ ( (Range *
+ (rand:mwc59_value(St1) bsr 14) )
+ bsr (59-14) )
+ + 1
+ of
+ R when is_integer(R), 0 < R, R =< Range ->
+ St1
+ end
+ end
+ end,
+ {mwc59,value_mas}, Iterations,
+ TMarkUniformRange10000, OverheadUniformRange1000),
+ _ =
+ measure_1(
+ fun (_Mod, _State) ->
+ Range = 10000,
+ fun (St0) ->
+ {V,St1} = rand:exsp_next(St0),
+ %% Fixpoint inversion, slightly skewed
+ case
+ ((Range * (V bsr 14)) bsr (58-14)) + 1
+ of
+ X when is_integer(X), 0 < X, X =< Range ->
+ St1
+ end
+ end
+ end,
+ {exsp,mas}, Iterations,
+ TMarkUniformRange10000, OverheadUniformRange1000),
 _ =
 measure_1(
 fun (_Mod, _State) ->
@@ -1231,13 +1322,16 @@ do_measure(Iterations) ->
 Range = 1 bsl 32,
 fun (St0) ->
 St1 = rand:mwc59(St0),
- case rand:mwc59_fast_value(St1) bsr (59-32) of
+ case
+ rand:mwc59_fast_value(St1)
+ band ((1 bsl 32)-1)
+ of
 R when is_integer(R), 0 =< R, R < Range ->
 St1
 end
 end
 end,
- {mwc59,fast_shift}, Iterations,
+ {mwc59,fast_mask}, Iterations,
 TMarkUniform32Bit, OverheadUniform32Bit),
 _ =
 measure_1(
@@ -1419,7 +1513,7 @@ do_measure(Iterations) ->
 end
 end
 end,
- splitmix64_inline, Iterations,
+ {splitmix64,next}, Iterations,
 TMarkUniformFullRange, OverheadUniformFullRange),
 _ =
 measure_1(
@@ -1525,7 +1619,7 @@ do_measure(Iterations) ->
 end
 end
 end,
- splitmix64_inline, Iterations,
+ {splitmix64,next}, Iterations,
 TMarkUniform64Bit, OverheadUniform64Bit),
 %%
 ByteSize = 16, % At about 100 bytes crypto_bytes breaks even to exsss
@@ -1745,8 +1839,6 @@ measure_init(Alg) ->
 {?MODULE, undefined};
 system_time ->
 {?MODULE, undefined};
- splitmix64_inline ->
- {rand, erlang:unique_integer()};
 procdict ->
 {rand, rand:seed(exsss)};
 {Name, Tag} ->
@@ -1758,7 +1850,9 @@ measure_init(Alg) ->
 {rand, rand:mwc59_seed()};
 exsp ->
 {_, S} = rand:seed_s(exsp),
- {rand, S}
+ {rand, S};
+ splitmix64 ->
+ {rand, erlang:unique_integer()}
 end;
 _ ->
 {rand, rand:seed_s(Alg)}
-- 
2.35.3

Places

File 2962-Improve-seeding.patch of Package erlang

Places