File 2191-Implement-fast-niche-PRNGs.patch of Package erlang (Revision 224487e47114a0ea2cb48daa80098e75)
Currently displaying revision 224487e47114a0ea2cb48daa80098e75 , Show latest
1199
1
From 10fd8779f5620aac9974f13235a6be7353368fe5 Mon Sep 17 00:00:00 2001
2
From: Raimo Niskanen <raimo@erlang.org>
3
Date: Mon, 14 Mar 2022 15:30:02 +0100
4
Subject: [PATCH 01/11] Implement fast niche PRNGs
5
6
* Add a warm-up set to measure/1
7
* Present measure/1 overhead better
8
* Measure process dictionary variant
9
* Remove redundant mask operations on the argument
10
to splitmix64_next/1
11
12
Implement, document and measure niche PRNGs:
13
* splitmix64_next
14
* exsp_next & exsp_jump
15
* mcg35
16
* lcg35
17
---
18
lib/stdlib/doc/src/rand.xml | 244 ++++++++++++++++++++--
19
lib/stdlib/src/rand.erl | 169 ++++++++++++----
20
lib/stdlib/test/rand_SUITE.erl | 360 +++++++++++++++++++++++++++++++--
21
3 files changed, 704 insertions(+), 69 deletions(-)
22
23
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
24
index 4a9d2d1ea4..361692f33c 100644
25
--- a/lib/stdlib/doc/src/rand.xml
26
+++ b/lib/stdlib/doc/src/rand.xml
27
28
non-overlapping sequences for parallel computations.
29
The jump functions perform calculations
30
equivalent to perform a large number of repeated calls
31
- for calculating new states.
32
+ for calculating new states, but execute in a time
33
+ roughly equivalent to one regular iteration per generator bit.
34
</p>
35
36
<p>
37
38
If this is a problem; to generate a boolean with these algorithms
39
use something like this:
40
</p>
41
- <pre>(rand:uniform(16) > 8)</pre>
42
+ <pre>(rand:uniform(256) > 128) % -> boolean()</pre>
43
+ <pre>((rand:uniform(256) - 1) bsr 7) % -> 0 | 1</pre>
44
<p>
45
- And for a general range, with <c>N = 1</c> for <c>exrop</c>,
46
+ For a general range, with <c>N = 1</c> for <c>exrop</c>,
47
and <c>N = 3</c> for <c>exs1024s</c>:
48
</p>
49
<pre>(((rand:uniform(Range bsl N) - 1) bsr N) + 1)</pre>
50
51
<name name="dummy_state"/>
52
<desc><p>Algorithm specific internal state</p></desc>
53
</datatype>
54
+ <datatype>
55
+ <name name="splitmix64_state"/>
56
+ <desc><p>Algorithm specific state</p></desc>
57
+ </datatype>
58
+ <datatype>
59
+ <name name="uint58"/>
60
+ <desc><p>0 .. (2^58 - 1)</p></desc>
61
+ </datatype>
62
+ <datatype>
63
+ <name name="uint64"/>
64
+ <desc><p>0 .. (2^64 - 1)</p></desc>
65
+ </datatype>
66
+ <datatype>
67
+ <name name="mcg35_state"/>
68
+ <desc><p>1 .. ((2^35 - 31) - 1)</p></desc>
69
+ </datatype>
70
+ <datatype>
71
+ <name name="lcg35_state"/>
72
+ <desc><p>0 .. (2^35 - 1)</p></desc>
73
+ </datatype>
74
</datatypes>
75
76
+
77
<funcs>
78
+ <fsdescription>
79
+ <title>Plug-in framework API</title>
80
+ </fsdescription>
81
<func>
82
<name name="bytes" arity="1" since="OTP 24.0"/>
83
<fsummary>Return a random binary.</fsummary>
84
- <desc><marker id="bytes-1"/>
85
+ <desc>
86
<p>
87
Returns, for a specified integer <c><anno>N</anno> >= 0</c>,
88
a <c>binary()</c> with that number of random bytes.
89
90
<func>
91
<name name="bytes_s" arity="2" since="OTP 24.0"/>
92
<fsummary>Return a random binary.</fsummary>
93
- <desc><marker id="bytes-1"/>
94
+ <desc>
95
<p>
96
Returns, for a specified integer <c><anno>N</anno> >= 0</c>
97
and a state, a <c>binary()</c> with that number of random bytes,
98
99
<func>
100
<name name="export_seed" arity="0" since="OTP 18.0"/>
101
<fsummary>Export the random number generation state.</fsummary>
102
- <desc><marker id="export_seed-0"/>
103
+ <desc>
104
<p>Returns the random number state in an external format.
105
To be used with <seemfa marker="#seed/1"><c>seed/1</c></seemfa>.</p>
106
</desc>
107
108
<func>
109
<name name="export_seed_s" arity="1" since="OTP 18.0"/>
110
<fsummary>Export the random number generation state.</fsummary>
111
- <desc><marker id="export_seed_s-1"/>
112
+ <desc>
113
<p>Returns the random number generator state in an external format.
114
To be used with <seemfa marker="#seed/1"><c>seed/1</c></seemfa>.</p>
115
</desc>
116
117
<name name="jump" arity="0" since="OTP 20.0"/>
118
<fsummary>Return the seed after performing jump calculation
119
to the state in the process dictionary.</fsummary>
120
- <desc><marker id="jump-0" />
121
+ <desc>
122
<p>Returns the state
123
after performing jump calculation
124
to the state in the process dictionary.</p>
125
126
<func>
127
<name name="jump" arity="1" since="OTP 20.0"/>
128
<fsummary>Return the seed after performing jump calculation.</fsummary>
129
- <desc><marker id="jump-1" />
130
+ <desc>
131
<p>Returns the state after performing jump calculation
132
to the given state. </p>
133
<p>This function generates a <c>not_implemented</c> error exception
134
135
<name name="seed" arity="1" clause_i="2" since="OTP 24.0"/>
136
<fsummary>Seed random number generator.</fsummary>
137
<desc>
138
- <marker id="seed-1"/>
139
<p>
140
Seeds random number generation with the specifed algorithm and
141
time-dependent data if <c><anno>AlgOrStateOrExpState</anno></c>
142
143
<func>
144
<name name="uniform" arity="0" since="OTP 18.0"/>
145
<fsummary>Return a random float.</fsummary>
146
- <desc><marker id="uniform-0"/>
147
+ <desc>
148
<p>
149
Returns a random float uniformly distributed in the value
150
range <c>0.0 =< <anno>X</anno> < 1.0</c> and
151
152
<func>
153
<name name="uniform_real" arity="0" since="OTP 21.0"/>
154
<fsummary>Return a random float.</fsummary>
155
- <desc><marker id="uniform_real-0"/>
156
+ <desc>
157
<p>
158
Returns a random float
159
uniformly distributed in the value range
160
161
<func>
162
<name name="uniform" arity="1" since="OTP 18.0"/>
163
<fsummary>Return a random integer.</fsummary>
164
- <desc><marker id="uniform-1"/>
165
+ <desc>
166
<p>Returns, for a specified integer <c><anno>N</anno> >= 1</c>,
167
a random integer uniformly distributed in the value range
168
<c>1 =< <anno>X</anno> =< <anno>N</anno></c> and
169
170
</desc>
171
</func>
172
</funcs>
173
+
174
+
175
+ <funcs>
176
+ <fsdescription>
177
+ <title>Niche algorithms API</title>
178
+ </fsdescription>
179
+ <func>
180
+ <name name="splitmix64_next" arity="1" since="OTP 25.0"/>
181
+ <fsummary>Return a random integer and new state.</fsummary>
182
+ <desc>
183
+ <p>
184
+ Returns a random 64-bit integer <c><anno>X</anno></c>
185
+ and a new generator state <c><anno>NewAlgState</anno></c>,
186
+ according to the SplitMix64 algorithm.
187
+ </p>
188
+ <p>
189
+ This generator is used internally in the <c>rand</c>
190
+ module for seeding other generators since it is of a
191
+ quite different breed which reduces the probability for
192
+ creating an accidentally bad seed.
193
+ </p>
194
+ </desc>
195
+ </func>
196
+ <func>
197
+ <name name="exsp_next" arity="1" since="OTP 25.0"/>
198
+ <fsummary>Return a random integer and new state.</fsummary>
199
+ <desc>
200
+ <p>
201
+ Returns a random 58-bit integer <c><anno>X</anno></c>
202
+ and a new generator state <c><anno>NewAlgState</anno></c>,
203
+ according to the Xorshift116+ algorithm.
204
+ </p>
205
+ <p>
206
+ This is an API function into the internal implementation of the
207
+ <seeerl marker="#algorithms"><c>exsp</c></seeerl>
208
+ algorithm that enables using it without the overhead
209
+ of the plug-in framework, which might be useful
210
+ for time critial applications.
211
+ On a typical 64 bit Erlang VM this approach executes
212
+ in just above 30% (1/3) of the time
213
+ for the default algorithm through
214
+ this module's normal plug-in framework.
215
+ </p>
216
+ <p>
217
+ To seed this generator use
218
+ <seemfa marker="#seed_s/1">
219
+ <c>{_, <anno>AlgState</anno>} = rand:seed_s(exsp)</c>
220
+ </seemfa>
221
+ or
222
+ <seemfa marker="#seed_s/1">
223
+ <c>{_, <anno>AlgState</anno>} = rand:seed_s(exsp, Seed)</c>
224
+ </seemfa>
225
+ with a specific <c>Seed</c>.
226
+ </p>
227
+ <note>
228
+ <p>
229
+ This function offers no help in generating a number
230
+ on a selected range, nor in generating a floating point number.
231
+ It is easy to accidentally mess up the fairly good
232
+ statistical properties of this generator when doing either.
233
+ Note also the caveat about weak low bits that
234
+ this generator suffers from.
235
+ The generator is exported in this form primarily for speed.
236
+ </p>
237
+ </note>
238
+ </desc>
239
+ </func>
240
+ <func>
241
+ <name name="exsp_jump" arity="1" since="OTP 25.0"/>
242
+ <fsummary>Return the new state as from 2^64 iterations.</fsummary>
243
+ <desc>
244
+ <p>
245
+ Returns a new generator state equivalent of the state
246
+ after iterating over
247
+ <seemfa marker="#exsp_next/1"><c>exsp_next/1</c></seemfa>
248
+ 2^64 times.
249
+ </p>
250
+ <p>
251
+ See the description of jump functions
252
+ at the top of this module description.
253
+ </p>
254
+ </desc>
255
+ </func>
256
+ <func>
257
+ <name name="mcg35" arity="1" since="OTP 25.0"/>
258
+ <fsummary>Return a new generator state / number.</fsummary>
259
+ <desc>
260
+ <p>
261
+ Returns a new generator state which is also
262
+ the generated number, <c><anno>X1</anno></c>,
263
+ according to a classical Multiplicative Congruential Generator
264
+ (a.k.a Multiplicative Linear Congruential Generator,
265
+ Lehmer random number generator,
266
+ Park-Miller random number generator).
267
+ </p>
268
+ <p>
269
+ This generator uses the modulus 2^35 - 31
270
+ and the multiplication constant 185852
271
+ from the paper "Tables of Linear Congruential Generators
272
+ of different sizes and good lattice structure" by
273
+ Pierre L'Ecuyer (1997) and they are selected
274
+ for speed to keep the computation under
275
+ the Erlang bignum limit.
276
+ </p>
277
+ <p>
278
+ The generator may be written as
279
+ <c><anno>X1</anno> = (185852*<anno>X0</anno>) rem ((1 bsl 35)-31)</c>,
280
+ but the properties of the chosen constants has allowed
281
+ an optimization of the otherwise expensive <c>rem</c> operation.
282
+ </p>
283
+ <p>
284
+ On a typical 64 bit Erlang VM this generator executes
285
+ in just below 10% (1/10) of the time
286
+ for the default algorithm in this module.
287
+ </p>
288
+ <note>
289
+ <p>
290
+ This generator is only suitable for insensitive
291
+ special niche applications since it has
292
+ a short period (2^35 - 32),
293
+ few bits (under 35),
294
+ is not a power of 2 generator (range 1 .. (2^35 - 32)),
295
+ offers no help in generating numbers on a specified range,
296
+ etc...
297
+ </p>
298
+ <p>
299
+ But for pseudo random load distribution
300
+ and such it might be useful, since it is very fast.
301
+ It normally beats even the known-to-be-fast trick
302
+ <c>erlang:phash2(erlang:unique_integer())</c>.
303
+ </p>
304
+ </note>
305
+ </desc>
306
+ </func>
307
+ <func>
308
+ <name name="lcg35" arity="1" since="OTP 25.0"/>
309
+ <fsummary>Return a new generator state / number.</fsummary>
310
+ <desc>
311
+ <p>
312
+ Returns a new generator state which is also
313
+ the generated number, <c><anno>X1</anno></c>,
314
+ according to a classical Linear Congruential Generator,
315
+ a power of 2 mixed congruential generator.
316
+ </p>
317
+ <p>
318
+ This generator uses the modulus 2^35
319
+ and the multiplication constant 15319397
320
+ from the paper "Tables of Linear Congruential Generators
321
+ of different sizes and good lattice structure" by
322
+ Pierre L'Ecuyer (1997) and they are selected
323
+ for speed to keep the computation under
324
+ the Erlang bignum limit.
325
+ The addition constant has been selected to
326
+ 15366142135 (it has to be odd) which looks
327
+ more interesting than simply 1.
328
+ </p>
329
+ <p>
330
+ The generator may be written as
331
+ <c><anno>X1</anno> = ((15319397*<anno>X0</anno>) + 15366142135) band ((1 bsl 35)-1)</c>.
332
+ </p>
333
+ <p>
334
+ On a typical 64 bit Erlang VM this generator executes
335
+ in just below 7% (1/15) of the time
336
+ for the default algorithm in this module,
337
+ which is the fastest generator the author has seen.
338
+ It can hardly be beaten by even a BIF
339
+ implementation of any generator since the execution
340
+ time is close to the overhead of a BIF call.
341
+ </p>
342
+ <note>
343
+ <p>
344
+ This generator is only suitable for insensitive
345
+ special niche applications since it has
346
+ a short period (2^35), few bits (35),
347
+ offers no help in generating numbers on a specified range,
348
+ and has, among others, the known statistical artifact
349
+ that the lowest bit simply alternates,
350
+ the next to lowest has a period of 4,
351
+ and so on, so it is only the highest bits
352
+ that achieve any form of statistical quality.
353
+ </p>
354
+ <p>
355
+ But for pseudo random load distribution
356
+ and such it might be useful, since it is extremely fast.
357
+ The <seemfa marker="#mcg35/1"><c>mcg35/1</c></seemfa>
358
+ generator above has got less statistical artifacts,
359
+ but instead other pecularities since it is not
360
+ a power of 2 generator.
361
+ </p>
362
+ </note>
363
+ </desc>
364
+ </func>
365
+ </funcs>
366
</erlref>
367
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
368
index b2c3d4a557..d9ae09100e 100644
369
--- a/lib/stdlib/src/rand.erl
370
+++ b/lib/stdlib/src/rand.erl
371
372
normal/0, normal/2, normal_s/1, normal_s/3
373
]).
374
375
+%% Utilities
376
+-export([exsp_next/1, exsp_jump/1, splitmix64_next/1, mcg35/1, lcg35/1]).
377
+
378
%% Test, dev and internal
379
-export([exro928_jump_2pow512/1, exro928_jump_2pow20/1,
380
exro928_seed/1, exro928_next/1, exro928_next_state/1,
381
382
%% Debug
383
-export([make_float/3, float2str/1, bc64/1]).
384
385
--compile({inline, [exs64_next/1, exsplus_next/1, exsss_next/1,
386
+-compile({inline, [exs64_next/1, exsp_next/1, exsss_next/1,
387
exs1024_next/1, exs1024_calc/2,
388
exro928_next_state/4,
389
exrop_next/1, exrop_next_s/2,
390
391
-export_type(
392
[exsplus_state/0, exro928_state/0, exrop_state/0, exs1024_state/0,
393
exs64_state/0, dummy_state/0]).
394
+-export_type(
395
+ [uint58/0, uint64/0, splitmix64_state/0, mcg35_state/0, lcg35_state/0]).
396
397
%% =====================================================================
398
%% Range macro and helper
399
400
{#{type=>exs64, max=>?MASK(64), next=>fun exs64_next/1},
401
fun exs64_seed/1};
402
mk_alg(exsplus) ->
403
- {#{type=>exsplus, max=>?MASK(58), next=>fun exsplus_next/1,
404
+ {#{type=>exsplus, max=>?MASK(58), next=>fun exsp_next/1,
405
jump=>fun exsplus_jump/1},
406
fun exsplus_seed/1};
407
mk_alg(exsp) ->
408
- {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsplus_next/1,
409
+ {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsp_next/1,
410
uniform=>fun exsp_uniform/1, uniform_n=>fun exsp_uniform/2,
411
jump=>fun exsplus_jump/1},
412
fun exsplus_seed/1};
413
414
[R] = seed64_nz(1, L),
415
R;
416
exs64_seed(A) when is_integer(A) ->
417
- [R] = seed64(1, ?MASK(64, A)),
418
+ [R] = seed64(1, A),
419
R;
420
%%
421
%% Traditional integer triplet seed
422
423
[S0,S1] = seed58_nz(2, L),
424
[S0|S1];
425
exsplus_seed(X) when is_integer(X) ->
426
- [S0,S1] = seed58(2, ?MASK(64, X)),
427
+ [S0,S1] = seed58(2, X),
428
[S0|S1];
429
%%
430
%% Traditional integer triplet seed
431
exsplus_seed({A1, A2, A3}) ->
432
- {_, R1} = exsplus_next(
433
+ {_, R1} = exsp_next(
434
[?MASK(58, (A1 * 4294967197) + 1)|
435
?MASK(58, (A2 * 4294967231) + 1)]),
436
- {_, R2} = exsplus_next(
437
+ {_, R2} = exsp_next(
438
[?MASK(58, (A3 * 4294967279) + 1)|
439
tl(R1)]),
440
R2.
441
442
[S0,S1] = seed58_nz(2, L),
443
[S0|S1];
444
exsss_seed(X) when is_integer(X) ->
445
- [S0,S1] = seed58(2, ?MASK(64, X)),
446
+ [S0,S1] = seed58(2, X),
447
[S0|S1];
448
%%
449
%% Seed from traditional integer triple - mix into splitmix
450
exsss_seed({A1, A2, A3}) ->
451
- {_, X0} = seed58(?MASK(64, A1)),
452
- {S0, X1} = seed58(?MASK(64, A2) bxor X0),
453
- {S1, _} = seed58(?MASK(64, A3) bxor X1),
454
+ {_, X0} = seed58(A1),
455
+ {S0, X1} = seed58(A2 bxor X0),
456
+ {S1, _} = seed58(A3 bxor X1),
457
[S0|S1].
458
459
%% Advance Xorshift116 state one step
460
461
?MASK(58, V_b + ?BSL(58, V_b, 3)) % V_b * 9
462
end).
463
464
--dialyzer({no_improper_lists, exsplus_next/1}).
465
466
%% Advance state and generate 58bit unsigned integer
467
--spec exsplus_next(exsplus_state()) -> {uint58(), exsplus_state()}.
468
-exsplus_next([S1|S0]) ->
469
+%%
470
+-dialyzer({no_improper_lists, exsp_next/1}).
471
+-spec exsp_next(AlgState :: exsplus_state()) ->
472
+ {X :: uint58(), NewAlgState :: exsplus_state()}.
473
+exsp_next([S1|S0]) ->
474
%% Note: members s0 and s1 are swapped here
475
NewS1 = ?exs_next(S0, S1, S1_1),
476
{?MASK(58, S0 + NewS1), [S0|NewS1]}.
477
-%% %% Note: members s0 and s1 are swapped here
478
-%% S11 = S1 bxor ?BSL(58, S1, 24),
479
-%% S12 = S11 bxor S0 bxor (S11 bsr 11) bxor (S0 bsr 41),
480
-%% {?MASK(58, S0 + S12), [S0|S12]}.
481
482
-dialyzer({no_improper_lists, exsss_next/1}).
483
484
485
%% Note: members s0 and s1 are swapped here
486
NewS1 = ?exs_next(S0, S1, S1_1),
487
{?scramble_starstar(S0, V_0, V_1), [S0|NewS1]}.
488
-%% {?MASK(58, S0 + NewS1), [S0|NewS1]}.
489
490
exsp_uniform({AlgHandler, R0}) ->
491
- {I, R1} = exsplus_next(R0),
492
+ {I, R1} = exsp_next(R0),
493
%% Waste the lowest bit since it is of lower
494
%% randomness quality than the others
495
{(I bsr (58-53)) * ?TWO_POW_MINUS53, {AlgHandler, R1}}.
496
497
{(I bsr (58-53)) * ?TWO_POW_MINUS53, {AlgHandler, R1}}.
498
499
exsp_uniform(Range, {AlgHandler, R}) ->
500
- {V, R1} = exsplus_next(R),
501
+ {V, R1} = exsp_next(R),
502
MaxMinusRange = ?BIT(58) - Range,
503
?uniform_range(Range, AlgHandler, R1, V, MaxMinusRange, I).
504
505
506
?uniform_range(Range, AlgHandler, R1, V, MaxMinusRange, I).
507
508
509
-%% This is the jump function for the exs* generators,
510
+%% This is the jump function for the exs... generators,
511
%% i.e the Xorshift116 generators, equivalent
512
%% to 2^64 calls to next/1; it can be used to generate 2^52
513
%% non-overlapping subsequences for parallel computations.
514
515
-spec exsplus_jump({alg_handler(), exsplus_state()}) ->
516
{alg_handler(), exsplus_state()}.
517
exsplus_jump({AlgHandler, S}) ->
518
+ {AlgHandler, exsp_jump(S)}.
519
+
520
+-dialyzer({no_improper_lists, exsp_jump/1}).
521
+-spec exsp_jump(AlgState :: exsplus_state()) ->
522
+ NewAlgState :: exsplus_state().
523
+exsp_jump(S) ->
524
{S1, AS1} = exsplus_jump(S, [0|0], ?JUMPCONST1, ?JUMPELEMLEN),
525
{_, AS2} = exsplus_jump(S1, AS1, ?JUMPCONST2, ?JUMPELEMLEN),
526
- {AlgHandler, AS2}.
527
+ AS2.
528
529
-dialyzer({no_improper_lists, exsplus_jump/4}).
530
exsplus_jump(S, AS, _, 0) ->
531
{S, AS};
532
exsplus_jump(S, [AS0|AS1], J, N) ->
533
- {_, NS} = exsplus_next(S),
534
+ {_, NS} = exsp_next(S),
535
case ?MASK(1, J) of
536
1 ->
537
[S0|S1] = S,
538
539
exs1024_seed(L) when is_list(L) ->
540
{seed64_nz(16, L), []};
541
exs1024_seed(X) when is_integer(X) ->
542
- {seed64(16, ?MASK(64, X)), []};
543
+ {seed64(16, X), []};
544
%%
545
%% Seed from traditional triple, remain backwards compatible
546
exs1024_seed({A1, A2, A3}) ->
547
548
exro928_seed(L) when is_list(L) ->
549
{seed58_nz(16, L), []};
550
exro928_seed(X) when is_integer(X) ->
551
- {seed58(16, ?MASK(64, X)), []};
552
+ {seed58(16, X), []};
553
%%
554
%% Seed from traditional integer triple - mix into splitmix
555
exro928_seed({A1, A2, A3}) ->
556
- {S0, X0} = seed58(?MASK(64, A1)),
557
- {S1, X1} = seed58(?MASK(64, A2) bxor X0),
558
- {S2, X2} = seed58(?MASK(64, A3) bxor X1),
559
+ {S0, X0} = seed58(A1),
560
+ {S1, X1} = seed58(A2 bxor X0),
561
+ {S2, X2} = seed58(A3 bxor X1),
562
{[S0,S1,S2|seed58(13, X2)], []}.
563
564
565
566
[S0,S1] = seed58_nz(2, L),
567
[S0|S1];
568
exrop_seed(X) when is_integer(X) ->
569
- [S0,S1] = seed58(2, ?MASK(64, X)),
570
+ [S0,S1] = seed58(2, X),
571
[S0|S1];
572
%%
573
%% Traditional integer triplet seed
574
575
%% =====================================================================
576
%% dummy "PRNG": Benchmark dummy overhead reference
577
%%
578
-%% As fast as possible - return a constant; to measure overhead.
579
+%% As fast as possible - return something daft and update state;
580
+%% to measure plug-in framework overhead.
581
%%
582
%% =====================================================================
583
584
--opaque dummy_state() :: 0..?MASK(58).
585
+-type dummy_state() :: uint58().
586
587
-dummy_uniform(_Range, State) -> {1, State}.
588
-dummy_next(R) -> {?BIT(57), R}.
589
-dummy_uniform(State) -> {0.5, State}.
590
+dummy_uniform(_Range, {AlgHandler,R}) ->
591
+ {1, {AlgHandler,(R bxor ?MASK(58))}}. % 1 is always in Range
592
+dummy_next(R) ->
593
+ {R, R bxor ?MASK(58)}.
594
+dummy_uniform({AlgHandler,R}) ->
595
+ {0.5, {AlgHandler,(R bxor ?MASK(58))}}. % Perfect mean value
596
597
-%% Serious looking seed, to avoid at least seed test failure
598
+%% Serious looking seed, to avoid rand_SUITE seed test failure
599
%%
600
dummy_seed(L) when is_list(L) ->
601
case L of
602
[X] when is_integer(X) ->
603
?MASK(58, X);
604
[X|_] when is_integer(X) ->
605
- erlang:error(too_many_seed_integer);
606
+ erlang:error(too_many_seed_integers);
607
[_|_] ->
608
erlang:error(non_integer_seed)
609
end;
610
611
{Z3, _} = splitmix64_next(A3 bxor X2),
612
?MASK(58, Z3).
613
614
+
615
+%% =====================================================================
616
+%% mcg35 PRNG: Multiplicative Linear Congruential Generator
617
+%%
618
+%% Parameters by Pierre L'Ecuyer from the paper:
619
+%% TABLES OF LINEAR CONGRUENTIAL GENERATORS
620
+%% OF DIFFERENT SIZES AND GOOD LATTICE STRUCTURE
621
+%%
622
+%% X1 = (A * X0) rem M
623
+%%
624
+%% A = 185852
625
+%% M = 2^B - D
626
+%% B = 35, D = 31
627
+%%
628
+%% X cannot, and never will be 0.
629
+%% Take X1 as output value with range 1..(M-1).
630
+%%
631
+%% Use this generator for speed, not for quality.
632
+%% The calculation should avoid bignum operations,
633
+%% so A * X0 should not become a bignum.
634
+%% M and A has been selected accordingly.
635
+%% The period is M - 1 since 0 cannot occur.
636
+%%
637
+%% =====================================================================
638
+-define(MCG35_A, (185852)).
639
+-define(MCG35_B, (35)).
640
+-define(MCG35_D, (31)).
641
+-define(MCG35_M, (?BIT(?MCG35_B) - ?MCG35_D)).
642
+
643
+-type mcg35_state() :: 1..(?MCG35_M-1).
644
+
645
+-spec mcg35(X0 :: mcg35_state()) -> X1 :: mcg35_state().
646
+mcg35(X0) ->
647
+ X = ?MCG35_A * X0,
648
+ %% rem M = rem (2^B - D), optimization to avoid 'rem'
649
+ X1 = ?MASK(?MCG35_B, X) + ?MCG35_D*(X bsr ?MCG35_B),
650
+ if
651
+ X1 < ?MCG35_M -> X1;
652
+ true -> X1 - ?MCG35_M
653
+ end.
654
+
655
+
656
+%% =====================================================================
657
+%% lcg35 PRNG: Multiplicative Linear Congruential Generator
658
+%%
659
+%% Parameters by Pierre L'Ecuyer from the paper:
660
+%% TABLES OF LINEAR CONGRUENTIAL GENERATORS
661
+%% OF DIFFERENT SIZES AND GOOD LATTICE STRUCTURE
662
+%%
663
+%% X1 = (A * X0 + C) rem M
664
+%%
665
+%% M = 2^35, A = 15319397, C = 1
666
+%%
667
+%% Choosing C = 15366142135, which is an odd value
668
+%% about 2^35 / sqrt(5) gives a perhaps nicer value in
669
+%% the sequence after 0 (-> C).
670
+%%
671
+%% Take X1 as output value with range 1..(M-1).
672
+%%
673
+%% Use this generator for speed, not for quality.
674
+%% The calculation should avoid bignum operations,
675
+%% so A * X0 + C should not become a bignum.
676
+%% M, A and C has been selected accordingly.
677
+%% The period is M.
678
+%%
679
+%% =====================================================================
680
+-define(LCG35_A, (15319397)).
681
+-define(LCG35_C, (15366142135)). % (1 bsl 35) / sqrt(5), odd
682
+-define(LCG35_B, 35). % Number of bits
683
+
684
+-type lcg35_state() :: 0..?MASK(?LCG35_B).
685
+
686
+-spec lcg35(X0 :: lcg35_state()) -> X1 :: lcg35_state().
687
+lcg35(X0) ->
688
+ ?MASK(?LCG35_B, ?LCG35_A * X0 + ?LCG35_C).
689
+
690
+
691
%% =====================================================================
692
%% Mask and fill state list, ensure not all zeros
693
%% =====================================================================
694
695
ZX
696
end.
697
698
+%% =====================================================================
699
%% The SplitMix64 generator:
700
%%
701
%% uint64_t splitmix64_next() {
702
703
%% return z ^ (z >> 31);
704
%% }
705
%%
706
+
707
+-type splitmix64_state() :: uint64().
708
+
709
+-spec splitmix64_next(AlgState :: integer()) ->
710
+ {X :: uint64(), NewAlgState :: splitmix64_state()}.
711
splitmix64_next(X_0) ->
712
X = ?MASK(64, X_0 + 16#9e3779b97f4a7c15),
713
Z_0 = ?MASK(64, (X bxor (X bsr 30)) * 16#bf58476d1ce4e5b9),
714
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
715
index ff0ad72d99..da721ca900 100644
716
--- a/lib/stdlib/test/rand_SUITE.erl
717
+++ b/lib/stdlib/test/rand_SUITE.erl
718
719
[seed, interval_int, interval_float,
720
bytes_count,
721
api_eq,
722
+ mcg35_api, mcg35_rem, lcg35_api, exsp_next_api, exsp_jump_api,
723
reference,
724
{group, basic_stats},
725
{group, distr_stats},
726
727
728
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
729
730
+-define(MCG35_M, ((1 bsl 35) - 31)).
731
+
732
+%% Verify mcg35 behaviour
733
+%%
734
+mcg35_api(Config) when is_list(Config) ->
735
+ mcg35_api(1, 1000000).
736
+
737
+mcg35_api(X0, 0) ->
738
+ X = 30203473714,
739
+ {X, X} = {X0, X},
740
+ ok;
741
+mcg35_api(X0, N)
742
+ when is_integer(X0), 1 =< X0, X0 < ?MCG35_M ->
743
+ mcg35_api(rand:mcg35(X0), N - 1).
744
+
745
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
746
+
747
+%% Verify the 'rem' optimization in mcg35
748
+%%
749
+mcg35_rem(Config) when is_list(Config) ->
750
+ {_, X0} = rand:seed_s(dummy),
751
+ mcg35_rem((X0 rem (?MCG35_M - 1)) + 1, 10000000).
752
+
753
+mcg35_rem(_X0, 0) ->
754
+ ok;
755
+mcg35_rem(X0, N) ->
756
+ X1 = (185852 * X0) rem ?MCG35_M,
757
+ {X1, X1, X0, N} = {rand:mcg35(X0), X1, X0, N},
758
+ mcg35_rem(X1, N - 1).
759
+
760
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
761
+
762
+%% Verify lcg35 behaviour
763
+%%
764
+lcg35_api(Config) when is_list(Config) ->
765
+ lcg35_api(0, 1000000).
766
+
767
+lcg35_api(X0, 0) ->
768
+ X = 19243690048,
769
+ {X, X} = {X0, X},
770
+ ok;
771
+lcg35_api(X0, N)
772
+ when is_integer(X0), 0 =< X0, X0 < 1 bsl 35 ->
773
+ lcg35_api(rand:lcg35(X0), N - 1).
774
+
775
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
776
+
777
+%% Verify exsp_next behaviour
778
+%%
779
+exsp_next_api(Config) when is_list(Config) ->
780
+ {_, AlgState} = State = rand:seed_s(exsp, 87654321),
781
+ exsp_next_api(State, AlgState, 1000000).
782
+
783
+exsp_next_api(_State, _AlgState, 0) ->
784
+ ok;
785
+exsp_next_api(State, AlgState, N) ->
786
+ {X, NewState} = rand:uniform_s(1 bsl 58, State),
787
+ {Y, NewAlgState} = rand:exsp_next(AlgState),
788
+ Y1 = Y + 1,
789
+ {X, X, N} = {Y1, X, N},
790
+ exsp_next_api(NewState, NewAlgState, N - 1).
791
+
792
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
793
+
794
+%% Verify exsp_jump behaviour
795
+%%
796
+exsp_jump_api(Config) when is_list(Config) ->
797
+ {_, AlgState} = State = rand:seed_s(exsp, 12345678),
798
+ exsp_jump_api(State, AlgState, 10000).
799
+
800
+exsp_jump_api(_State, _AlgState, 0) ->
801
+ ok;
802
+exsp_jump_api(State, AlgState, N) ->
803
+ {X, NewState} = rand:uniform_s(1 bsl 58, State),
804
+ {Y, NewAlgState} = rand:exsp_next(AlgState),
805
+ Y1 = Y + 1,
806
+ {X, X, N} = {Y1, X, N},
807
+ exsp_jump_api(
808
+ rand:jump(NewState),
809
+ rand:exsp_jump(NewAlgState),
810
+ N - 1).
811
+
812
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
813
+
814
%% Check that uniform/1 returns values within the proper interval.
815
interval_int(Config) when is_list(Config) ->
816
Algs = [default|algs()],
817
818
end,
819
%%
820
ct:pal("~nRNG uniform integer range 10000 performance~n",[]),
821
- _ =
822
+ [TMarkUniformRange10000,OverheadUniformRange1000|_] =
823
measure_1(
824
fun (_) -> 10000 end,
825
fun (State, Range, Mod) ->
826
827
State)
828
end,
829
Algs),
830
+ _ =
831
+ measure_1(
832
+ fun (_) -> 10000 end,
833
+ fun (State, Range, _) ->
834
+ measure_loop(
835
+ fun (State0) ->
836
+ State1 = rand:mcg35(State0),
837
+ %% Just a 'rem' with slightly skewed distribution
838
+ case (State1 rem Range) + 1 of
839
+ R when is_integer(R), 0 < R, R =< Range ->
840
+ State1
841
+ end
842
+ end,
843
+ State)
844
+ end,
845
+ mcg35_inline, TMarkUniformRange10000, OverheadUniformRange1000),
846
+ _ =
847
+ measure_1(
848
+ fun (_) -> 10000 end,
849
+ fun (State, Range, _) ->
850
+ measure_loop(
851
+ fun (State0) ->
852
+ State1 = rand:lcg35(State0),
853
+ %% Just a 'rem' with slightly skewed distribution
854
+ case (State1 rem Range) + 1 of
855
+ R when is_integer(R), 0 < R, R =< Range ->
856
+ State1
857
+ end
858
+ end,
859
+ State)
860
+ end,
861
+ lcg35_inline, TMarkUniformRange10000, OverheadUniformRange1000),
862
+ _ =
863
+ measure_1(
864
+ fun (_) -> 10000 end,
865
+ fun (State, Range, _) ->
866
+ measure_loop(
867
+ fun (State0) ->
868
+ {V,State1} = rand:exsp_next(State0),
869
+ %% Just a 'rem' with slightly skewed distribution
870
+ case (V rem Range) + 1 of
871
+ R when is_integer(R), 0 < R, R =< Range ->
872
+ State1
873
+ end
874
+ end,
875
+ State)
876
+ end,
877
+ exsp_inline, TMarkUniformRange10000, OverheadUniformRange1000),
878
+ _ =
879
+ measure_1(
880
+ fun (_) -> 10000 end,
881
+ fun (State, Range, _) ->
882
+ measure_loop(
883
+ fun (State0) ->
884
+ %% Just a 'rem' with slightly skewed distribution
885
+ case
886
+ erlang:phash2(erlang:unique_integer(), Range)
887
+ of
888
+ R
889
+ when is_integer(R), 0 =< R, R < Range ->
890
+ State0
891
+ end
892
+ end,
893
+ State)
894
+ end,
895
+ unique_phash2, TMarkUniformRange10000, OverheadUniformRange1000),
896
%%
897
ct:pal("~nRNG uniform integer 32 bit performance~n",[]),
898
- _ =
899
+ [TMarkUniform32Bit,OverheadUniform32Bit|_] =
900
measure_1(
901
fun (_) -> 1 bsl 32 end,
902
fun (State, Range, Mod) ->
903
904
State)
905
end,
906
Algs),
907
+ _ =
908
+ measure_1(
909
+ fun (_) -> 1 bsl 32 end,
910
+ fun (State, Range, _) ->
911
+ measure_loop(
912
+ fun (State0) ->
913
+ case rand:lcg35(State0) bsr (35-32) of
914
+ R when is_integer(R), 0 =< R, R < Range ->
915
+ R
916
+ end
917
+ end,
918
+ State)
919
+ end,
920
+ lcg35_inline, TMarkUniform32Bit, OverheadUniform32Bit),
921
+ _ =
922
+ measure_1(
923
+ fun (_) -> 1 bsl 32 end,
924
+ fun (State, Range, _) ->
925
+ measure_loop(
926
+ fun (State0) ->
927
+ {V, State1} = rand:exsp_next(State0),
928
+ case V bsr (58-32) of
929
+ R when is_integer(R), 0 =< R, R < Range ->
930
+ State1
931
+ end
932
+ end,
933
+ State)
934
+ end,
935
+ exsp_inline, TMarkUniform32Bit, OverheadUniform32Bit),
936
+ _ =
937
+ measure_1(
938
+ fun (_) -> 1 bsl 32 end,
939
+ fun (State, Range, _) ->
940
+ measure_loop(
941
+ fun (State0) ->
942
+ case
943
+ erlang:phash2(erlang:unique_integer(), Range)
944
+ of
945
+ R
946
+ when is_integer(R), 0 =< R, R < Range ->
947
+ State0
948
+ end
949
+ end,
950
+ State)
951
+ end,
952
+ unique_phash2, TMarkUniform32Bit, OverheadUniform32Bit),
953
%%
954
ct:pal("~nRNG uniform integer half range performance~n",[]),
955
_ =
956
957
Algs),
958
%%
959
ct:pal("~nRNG uniform integer full range performance~n",[]),
960
- [TMarkUniformFullRange|_] =
961
+ [TMarkUniformFullRange,OverheadUniformFullRange|_] =
962
measure_1(
963
fun (State) -> half_range(State) bsl 1 end,
964
fun (State, Range, Mod) ->
965
966
Algs ++ [dummy]),
967
_ =
968
measure_1(
969
- fun (_) -> 0 end,
970
- fun (State, _, _) ->
971
+ fun (_) -> ((1 bsl 35) - 31) - 1 end,
972
+ fun (State, Range, _) ->
973
+ measure_loop(
974
+ fun (State0) ->
975
+ case rand:mcg35(State0) of
976
+ R when is_integer(R), 1 =< R, R =< Range ->
977
+ R
978
+ end
979
+ end,
980
+ State)
981
+ end,
982
+ mcg35_inline, TMarkUniformFullRange, OverheadUniformFullRange),
983
+ _ =
984
+ measure_1(
985
+ fun (_) -> 1 bsl 35 end,
986
+ fun (State, Range, _) ->
987
+ measure_loop(
988
+ fun (State0) ->
989
+ case rand:lcg35(State0) of
990
+ R when is_integer(R), 0 =< R, R < Range ->
991
+ R
992
+ end
993
+ end,
994
+ State)
995
+ end,
996
+ lcg35_inline, TMarkUniformFullRange, OverheadUniformFullRange),
997
+ _ =
998
+ measure_1(
999
+ fun (_) -> 1 bsl 58 end,
1000
+ fun (State, Range, _) ->
1001
+ measure_loop(
1002
+ fun (State0) ->
1003
+ {V, State1} = rand:exsp_next(State0),
1004
+ case V of
1005
+ V when is_integer(V), 0 =< V, V < Range ->
1006
+ State1
1007
+ end
1008
+ end,
1009
+ State)
1010
+ end,
1011
+ exsp_inline, TMarkUniformFullRange, OverheadUniformFullRange),
1012
+ _ =
1013
+ measure_1(
1014
+ fun (_) -> 1 bsl 27 end,
1015
+ fun (State, Range, _) ->
1016
measure_loop(
1017
- fun (State0) -> State0 end,
1018
+ fun (State0) ->
1019
+ case erlang:phash2(erlang:unique_integer()) of
1020
+ R
1021
+ when is_integer(R), 0 =< R, R < Range ->
1022
+ State0
1023
+ end
1024
+ end,
1025
State)
1026
end,
1027
- benchmark_dummy, TMarkUniformFullRange),
1028
+ unique_phash2, TMarkUniformFullRange, OverheadUniformFullRange),
1029
_ =
1030
measure_1(
1031
fun (State) -> half_range(State) bsl 1 end,
1032
1033
end,
1034
State)
1035
end,
1036
- procdict, TMarkUniformFullRange),
1037
+ procdict, TMarkUniformFullRange, OverheadUniformFullRange),
1038
%%
1039
ct:pal("~nRNG uniform integer full range + 1 performance~n",[]),
1040
_ =
1041
1042
end),
1043
%%
1044
ct:pal("~nRNG uniform float performance~n",[]),
1045
- _ =
1046
+ [TMarkUniformFloat,OverheadUniformFloat|_] =
1047
measure_1(
1048
fun (_) -> 0 end,
1049
fun (State, _, Mod) ->
1050
1051
State)
1052
end,
1053
Algs),
1054
+ _ =
1055
+ measure_1(
1056
+ fun (_) -> 0 end,
1057
+ fun (State, _, _) ->
1058
+ measure_loop(
1059
+ fun (State0) ->
1060
+ State1 = rand:mcg35(State0),
1061
+ %% Too few bits for full mantissa
1062
+ case (State1 - 1) * (1/((1 bsl 35) - 31)) of
1063
+ R when is_float(R), 0.0 =< R, R < 1.0 ->
1064
+ State1
1065
+ end
1066
+ end,
1067
+ State)
1068
+ end,
1069
+ mcg35_inline, TMarkUniformFloat, OverheadUniformFloat),
1070
+ _ =
1071
+ measure_1(
1072
+ fun (_) -> 0 end,
1073
+ fun (State, _, _) ->
1074
+ measure_loop(
1075
+ fun (State0) ->
1076
+ State1 = rand:lcg35(State0),
1077
+ %% Too few bits for full mantissa
1078
+ case State1 * (1/(1 bsl 35)) of
1079
+ R when is_float(R), 0.0 =< R, R < 1.0 ->
1080
+ State1
1081
+ end
1082
+ end,
1083
+ State)
1084
+ end,
1085
+ lcg35_inline, TMarkUniformFloat, OverheadUniformFloat),
1086
+ _ =
1087
+ measure_1(
1088
+ fun (_) -> 0 end,
1089
+ fun (State, _, _) ->
1090
+ measure_loop(
1091
+ fun (State0) ->
1092
+ {V,State1} = rand:exsp_next(State0),
1093
+ case V * (1/(1 bsl 58)) of
1094
+ R when is_float(R), 0.0 =< R, R < 1.0 ->
1095
+ State1
1096
+ end
1097
+ end,
1098
+ State)
1099
+ end,
1100
+ exsp_inline, TMarkUniformFloat, OverheadUniformFloat),
1101
%%
1102
ct:pal("~nRNG uniform_real float performance~n",[]),
1103
_ =
1104
1105
Algs),
1106
%%
1107
ct:pal("~nRNG normal float performance~n",[]),
1108
- [TMarkNormalFloat|_] =
1109
+ [TMarkNormalFloat, OverheadNormalFloat|_] =
1110
measure_1(
1111
fun (_) -> 0 end,
1112
fun (State, _, Mod) ->
1113
1114
end,
1115
State)
1116
end,
1117
- exsss, TMarkNormalFloat),
1118
+ exsss, TMarkNormalFloat, OverheadNormalFloat),
1119
ok.
1120
1121
-define(LOOP_MEASURE, (?LOOP div 5)).
1122
1123
ok.
1124
1125
measure_1(RangeFun, Fun, Algs) ->
1126
- TMark = measure_1(RangeFun, Fun, hd(Algs), undefined),
1127
- [TMark] ++
1128
- [measure_1(RangeFun, Fun, Alg, TMark) || Alg <- tl(Algs)].
1129
+ WMark = measure_1(RangeFun, Fun, hd(Algs), warm_up, 0),
1130
+ Overhead =
1131
+ measure_1(
1132
+ fun (_) -> 1 end,
1133
+ fun (State, Range, _) ->
1134
+ measure_loop(
1135
+ fun (State0)
1136
+ when
1137
+ is_integer(State0),
1138
+ 1 =< State0,
1139
+ State0 =< Range ->
1140
+ State0
1141
+ end,
1142
+ State)
1143
+ end,
1144
+ overhead, WMark, 0),
1145
+ TMark = measure_1(RangeFun, Fun, hd(Algs), undefined, Overhead),
1146
+ [TMark,Overhead] ++
1147
+ [measure_1(RangeFun, Fun, Alg, TMark, Overhead) || Alg <- tl(Algs)].
1148
1149
-measure_1(RangeFun, Fun, Alg, TMark) ->
1150
+measure_1(RangeFun, Fun, Alg, TMark, Overhead) ->
1151
Parent = self(),
1152
{Mod, State} =
1153
case Alg of
1154
+ overhead ->
1155
+ {?MODULE, 1};
1156
crypto64 ->
1157
{rand, crypto64_seed()};
1158
crypto_cache ->
1159
1160
{?MODULE, ignored_state};
1161
crypto_bytes_cached ->
1162
{?MODULE, <<>>};
1163
- benchmark_dummy ->
1164
+ unique_phash2 ->
1165
{?MODULE, ignored_state};
1166
+ mcg35_inline ->
1167
+ {_, S} = rand:seed_s(dummy),
1168
+ {?MODULE, (S rem ((1 bsl 35)-31 - 1)) + 1};
1169
+ lcg35_inline ->
1170
+ {_, S} = rand:seed_s(dummy),
1171
+ {?MODULE, S bsr (58-35)};
1172
+ exsp_inline ->
1173
+ {_, S} = rand:seed_s(exsp),
1174
+ {?MODULE, S};
1175
procdict ->
1176
- {rand, rand:seed(default)};
1177
+ {rand, rand:seed(exsss)};
1178
_ ->
1179
{rand, rand:seed_s(Alg)}
1180
end,
1181
Range = RangeFun(State),
1182
Pid = spawn_link(
1183
fun() ->
1184
- {Time, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
1185
+ {T, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
1186
+ Time = T - Overhead,
1187
Percent =
1188
case TMark of
1189
+ warm_up -> TMark;
1190
undefined -> 100;
1191
- _ -> (Time * 100 + 50) div TMark
1192
+ _ -> (Time * 100 + 50) div TMark
1193
end,
1194
io:format(
1195
"~.20w: ~p ns ~p% [16#~.16b]~n",
1196
--
1197
2.34.1
1198
1199