Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
1041-erts-Refactor-internal-term-hashing.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1041-erts-Refactor-internal-term-hashing.patch of Package erlang
From ef0d257501d7a9687e0a3e1be1f37b1ac4334b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org> Date: Tue, 16 May 2023 15:30:02 +0200 Subject: [PATCH] erts: Refactor internal term hashing Improve performance and hash quality by using MurmurHash3 instead of our hand-rolled Jenkins96 derivative, and reduce the chances of needing collision nodes in maps by making the hash width follow the word size, allowing an extra 8 levels of collision before that happens on 64-bit platforms. --- erts/emulator/beam/beam_common.c | 8 +- erts/emulator/beam/beam_common.h | 2 +- erts/emulator/beam/emu/map_instrs.tab | 2 +- erts/emulator/beam/emu/ops.tab | 4 +- erts/emulator/beam/erl_bif_info.c | 2 +- erts/emulator/beam/erl_bif_persistent.c | 2 +- erts/emulator/beam/erl_bits.c | 14 +- erts/emulator/beam/erl_bits.h | 4 +- erts/emulator/beam/erl_db_hash.c | 2 +- erts/emulator/beam/erl_map.c | 229 +++-- erts/emulator/beam/erl_map.h | 41 +- erts/emulator/beam/erl_nif.c | 2 +- erts/emulator/beam/erl_proc_sig_queue.c | 2 +- erts/emulator/beam/erl_process_dict.c | 18 +- erts/emulator/beam/erl_process_dict.h | 7 +- erts/emulator/beam/erl_term_hashing.c | 794 +++++++++++------- erts/emulator/beam/erl_term_hashing.h | 30 +- erts/emulator/beam/erl_trace.c | 2 +- erts/emulator/beam/global.h | 6 +- .../beam/jit/arm/beam_asm_global.hpp.pl | 2 +- erts/emulator/beam/jit/arm/instr_map.cpp | 89 +- erts/emulator/beam/jit/arm/ops.tab | 4 +- erts/emulator/beam/jit/beam_jit_common.cpp | 2 +- .../beam/jit/x86/beam_asm_global.hpp.pl | 2 +- erts/emulator/beam/jit/x86/instr_map.cpp | 116 ++- erts/emulator/beam/jit/x86/ops.tab | 4 +- erts/emulator/test/map_SUITE.erl | 230 ++++- erts/emulator/test/nif_SUITE.erl | 2 +- erts/emulator/test/persistent_term_SUITE.erl | 248 +++--- 29 files changed, 1163 insertions(+), 707 deletions(-) diff --git a/erts/emulator/beam/beam_common.c b/erts/emulator/beam/beam_common.c index 6eb97fc864..0cd6a5a5db 100644 --- a/erts/emulator/beam/beam_common.c +++ b/erts/emulator/beam/beam_common.c @@ -1877,7 +1877,7 @@ is_function2(Eterm Term, Uint arity) Eterm get_map_element(Eterm map, Eterm key) { - Uint32 hx; + erts_ihash_t hx; const Eterm *vs; if (is_flatmap(map)) { flatmap_t *mp; @@ -1910,7 +1910,7 @@ Eterm get_map_element(Eterm map, Eterm key) return vs ? *vs : THE_NON_VALUE; } -Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx) +Eterm get_map_element_hash(Eterm map, Eterm key, erts_ihash_t hx) { const Eterm *vs; @@ -2082,7 +2082,7 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, map = reg[live]; if (is_not_flatmap(map)) { - Uint32 hx; + erts_ihash_t hx; Eterm val; ASSERT(is_hashmap(map)); @@ -2325,7 +2325,7 @@ erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, map = reg[live]; if (is_not_flatmap(map)) { - Uint32 hx; + erts_ihash_t hx; Eterm val; /* apparently the compiler does not emit is_map instructions, diff --git a/erts/emulator/beam/beam_common.h b/erts/emulator/beam/beam_common.h index 0349d488ac..ee7a41c9b8 100644 --- a/erts/emulator/beam/beam_common.h +++ b/erts/emulator/beam/beam_common.h @@ -268,7 +268,7 @@ Eterm erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, Eterm erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, const Eterm* data); Eterm get_map_element(Eterm map, Eterm key); -Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); +Eterm get_map_element_hash(Eterm map, Eterm key, erts_ihash_t hx); int raw_raise(Eterm stacktrace, Eterm exc_class, Eterm value, Process *c_p); void erts_sanitize_freason(Process* c_p, Eterm exc); Eterm add_stacktrace(Process* c_p, Eterm Value, Eterm exc); diff --git a/erts/emulator/beam/emu/map_instrs.tab b/erts/emulator/beam/emu/map_instrs.tab index 9cd64662f8..9afc8a2226 100644 --- a/erts/emulator/beam/emu/map_instrs.tab +++ b/erts/emulator/beam/emu/map_instrs.tab @@ -104,7 +104,7 @@ i_get_map_elements(Fail, Src, N) { $FAIL($Fail); } else { const Eterm *v; - Uint32 hx; + erts_ihash_t hx; ASSERT(is_hashmap(map)); while(n--) { hx = fs[2]; diff --git a/erts/emulator/beam/emu/ops.tab b/erts/emulator/beam/emu/ops.tab index 7c4447d145..0615f32382 100644 --- a/erts/emulator/beam/emu/ops.tab +++ b/erts/emulator/beam/emu/ops.tab @@ -1013,7 +1013,7 @@ bif1 Fail=f Bif S1 Dst => i_bif1 S1 Fail Bif Dst bif2 p Bif S1 S2 Dst => i_bif2_body S2 S1 Bif Dst bif2 Fail=f Bif S1 S2 Dst => i_bif2 S2 S1 Fail Bif Dst -i_get_hash c I d +i_get_hash c W d i_get s d self xy @@ -1573,7 +1573,7 @@ i_get_map_elements f? s I * i_get_map_element_hash Fail Src=c Key Hash Dst => move Src x | i_get_map_element_hash Fail x Key Hash Dst -i_get_map_element_hash f? xy c I xy +i_get_map_element_hash f? xy c W xy i_get_map_element Fail Src=c Key Dst => move Src x | i_get_map_element Fail x Key Dst diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 4ba368ec7c..07265c1402 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -4527,7 +4527,7 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) BIF_RET(erts_debug_reader_groups_map(BIF_P, (int) groups)); } else if (ERTS_IS_ATOM_STR("internal_hash", tp[1])) { - Uint hash = (Uint) make_internal_hash(tp[2], 0); + Uint hash = (Uint) erts_internal_hash(tp[2]); Uint hsz = 0; Eterm* hp; erts_bld_uint(NULL, &hsz, hash); diff --git a/erts/emulator/beam/erl_bif_persistent.c b/erts/emulator/beam/erl_bif_persistent.c index 08e15f55d6..e811e96e49 100644 --- a/erts/emulator/beam/erl_bif_persistent.c +++ b/erts/emulator/beam/erl_bif_persistent.c @@ -971,8 +971,8 @@ cleanup_trap_data(Binary *bp) static Uint lookup(HashTable* hash_table, Eterm key, Eterm *bucket) { + erts_ihash_t idx = erts_internal_hash(key); Uint mask = hash_table->mask; - Uint32 idx = make_internal_hash(key, 0); Eterm term; while (1) { diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index 8b6698b6ae..13e5107127 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -2162,13 +2162,13 @@ erts_cmp_bits(byte* a_ptr, size_t a_offs, byte* b_ptr, size_t b_offs, size_t siz void -erts_copy_bits(byte* src, /* Base pointer to source. */ - size_t soffs, /* Bit offset for source relative to src. */ - int sdir, /* Direction: 1 (forward) or -1 (backward). */ - byte* dst, /* Base pointer to destination. */ - size_t doffs, /* Bit offset for destination relative to dst. */ - int ddir, /* Direction: 1 (forward) or -1 (backward). */ - size_t n) /* Number of bits to copy. */ +erts_copy_bits(const byte* src, /* Base pointer to source. */ + size_t soffs, /* Bit offset for source relative to src. */ + int sdir, /* Direction: 1 (forward) or -1 (backward). */ + byte* dst, /* Base pointer to destination. */ + size_t doffs, /* Bit offset for destination relative to dst. */ + int ddir, /* Direction: 1 (forward) or -1 (backward). */ + size_t n) /* Number of bits to copy. */ { Uint lmask; Uint rmask; diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h index 1d95536a68..e7cc6017a3 100644 --- a/erts/emulator/beam/erl_bits.h +++ b/erts/emulator/beam/erl_bits.h @@ -192,8 +192,8 @@ Eterm erts_bs_init_writable(Process* p, Eterm sz); /* * Common utilities. */ -void erts_copy_bits(byte* src, size_t soffs, int sdir, - byte* dst, size_t doffs,int ddir, size_t n); +void erts_copy_bits(const byte* src, size_t soffs, int sdir, + byte* dst, size_t doffs, int ddir, size_t n); int erts_cmp_bits(byte* a_ptr, size_t a_offs, byte* b_ptr, size_t b_offs, size_t size); /* diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index fa8a8c15ec..fd61747f17 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -276,7 +276,7 @@ static ERTS_INLINE int is_pseudo_deleted(HashDbTerm* p) /* optimised version of make_hash (normal case? atomic key) */ #define MAKE_HASH(term) \ ((is_atom(term) ? (atom_tab(atom_val(term))->slot.bucket.hvalue) : \ - make_internal_hash(term, 0)) & MAX_HASH_MASK) + erts_internal_hash(term)) & MAX_HASH_MASK) # define GET_LOCK_MASK(NUMBER_OF_LOCKS) ((NUMBER_OF_LOCKS)-1) diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index 337567406b..431fcf66e8 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -77,12 +77,14 @@ /* for hashmap_from_list/1 */ typedef struct { - Uint32 hx; - Uint32 skip; + erts_ihash_t hx; + Uint skip; Uint i; Eterm val; } hxnode_t; +/* Reverses the path element/slot order of `hash` */ +static ERTS_INLINE erts_ihash_t swizzle_map_hash(erts_ihash_t hash); static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB); static BIF_RETTYPE map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args); @@ -92,7 +94,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_ static Export hashmap_merge_trap_export; static BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1); static Uint hashmap_subtree_size(Eterm node); -static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, Eterm node, Eterm *value); +static Eterm hashmap_delete(Process *p, erts_ihash_t hx, Eterm key, Eterm node, Eterm *value); static Eterm flatmap_from_validated_list(Process *p, Eterm list, Eterm fill_value, Uint size); static Eterm hashmap_from_unsorted_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int reject_dupkeys, ErtsAlcType_t temp_memory_allocator); static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, ErtsAlcType_t temp_memory_allocator); @@ -101,18 +103,12 @@ static Eterm hashmap_info(Process *p, Eterm node); static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]); static int hxnodecmp(const void* a, const void* b); static int hxnodecmpkey(const void* a, const void* b); -#define swizzle32(D,S) \ - do { \ - (D) = ((S) & 0x0000000f) << 28 | ((S) & 0x000000f0) << 20 \ - | ((S) & 0x00000f00) << 12 | ((S) & 0x0000f000) << 4 \ - | ((S) & 0x000f0000) >> 4 | ((S) & 0x00f00000) >> 12 \ - | ((S) & 0x0f000000) >> 20 | ((S) & 0xf0000000) >> 28; \ - } while(0) #define cdepth(V1,V2) (hashmap_clz((V1) ^ (V2)) >> 2) -#define maskval(V,L) (((V) >> ((7 - (L))*4)) & 0xf) +#define maskval(V,L) (((V) >> (((HAMT_MAX_LEVEL - 1) - (L)) * 4)) & 0xF) #define DBG_PRINT(X) /*erts_printf X*/ #define HALLOC_EXTRA 200 + /* ******************************* * ** Yielding C Fun (YCF) Note ** * ******************************* @@ -191,7 +187,7 @@ erts_map_size(Eterm map) const Eterm * erts_maps_get(Eterm key, Eterm map) { - Uint32 hx; + erts_ihash_t hx; if (is_flatmap(map)) { Eterm *ks, *vs; flatmap_t *mp; @@ -495,8 +491,8 @@ static Eterm hashmap_from_validated_list(Process *p, Eterm res; Eterm key; Eterm value; - Uint32 sw; - Uint32 hx; + erts_ihash_t sw; + erts_ihash_t hx; Uint ix = 0; hxnode_t *hxns; ErtsHeapFactory *factory; @@ -528,7 +524,7 @@ static Eterm hashmap_from_validated_list(Process *p, value = kv[2]; } hx = hashmap_restore_hash(0,key); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[ix].hx = sw; hxns[ix].val = CONS(hp, key, value); hp += 2; hxns[ix].skip = 1; /* will be reassigned in from_array */ @@ -649,7 +645,7 @@ BIF_RETTYPE maps_from_keys_2(BIF_ALIST_2) { Eterm erts_hashmap_from_array(ErtsHeapFactory* factory, Eterm *leafs, Uint n, int reject_dupkeys) { - Uint32 sw, hx; + erts_ihash_t sw, hx; Uint ix; hxnode_t *hxns; Eterm res; @@ -659,7 +655,7 @@ Eterm erts_hashmap_from_array(ErtsHeapFactory* factory, Eterm *leafs, Uint n, for (ix = 0; ix < n; ix++) { hx = hashmap_make_hash(*leafs); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[ix].hx = sw; hxns[ix].val = make_list(leafs); hxns[ix].skip = 1; @@ -734,7 +730,7 @@ Eterm erts_map_from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Ui Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Uint n, Eterm key, Eterm value) { - Uint32 sw, hx; + erts_ihash_t sw, hx; Uint i,sz; hxnode_t *hxns; Eterm *hp, res; @@ -748,7 +744,7 @@ Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, for(i = 0; i < n; i++) { hx = hashmap_make_hash(ks[i]); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[i].hx = sw; hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; hxns[i].skip = 1; /* will be reassigned in from_array */ @@ -757,7 +753,7 @@ Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, if (key != THE_NON_VALUE) { hx = hashmap_make_hash(key); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[i].hx = sw; hxns[i].val = CONS(hp, key, value); hp += 2; hxns[i].skip = 1; @@ -839,23 +835,26 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory, } if (cx > 1) { - /* recursive decompose array */ - res = hashmap_from_sorted_unique_array(factory, hxns, cx, + /* recursive decompose array */ + res = hashmap_from_sorted_unique_array(factory, hxns, cx, temp_memory_allocator); } else { - Eterm *hp; + Eterm slot; + Eterm *hp; - /* we only have one item, either because n was 1 or - * because we hade multiples of the same key. - * - * hash value has been swizzled, need to drag it down to get the - * correct slot. */ + /* We only have one item, either because n was 1 or because we have + * multiples of the same key. + * + * As the hash value has been swizzled, we need to drag it down to get + * the correct slot. */ + slot = hxns[0].hx >> ((HAMT_MAX_LEVEL - 1) * 4); + ASSERT(slot < 16); - hp = erts_produce_heap(factory, HAMT_HEAD_BITMAP_SZ(1), 0); - hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << ((hxns[0].hx >> 0x1c) & 0xf)); - hp[1] = 1; - hp[2] = hxns[0].val; - res = make_hashmap(hp); + hp = erts_produce_heap(factory, HAMT_HEAD_BITMAP_SZ(1), 0); + hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot); + hp[1] = 1; + hp[2] = hxns[0].val; + res = make_hashmap(hp); } return res; @@ -943,9 +942,9 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns Uint dc; Uint slot; Uint elems; - Uint32 v; - Uint32 vp; - Uint32 vn; + erts_ihash_t v; + erts_ihash_t vp; + erts_ihash_t vn; Uint32 hdr; Uint bp; Uint sz; @@ -978,7 +977,7 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns if (n == 1) { res = hxns[0].val; v = hxns[0].hx; - for (d = 7; d > 0; d--) { + for (d = HAMT_MAX_LEVEL-1; d > 0; d--) { slot = maskval(v,d); hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << slot); @@ -1015,7 +1014,7 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns res = hxns[ix].val; if (hxns[ix].skip > 1) { - dc = 7; + dc = HAMT_MAX_LEVEL - 1; /* build collision nodes */ while (dc > d) { hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); @@ -1045,7 +1044,7 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns if (hxns[ix].skip > 1) { int wat = (d > dn) ? d : dn; - dc = 7; + dc = HAMT_MAX_LEVEL - 1; /* build collision nodes */ while (dc > wat) { hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); @@ -1114,7 +1113,7 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns res = hxns[ix].val; if (hxns[ix].skip > 1) { - dc = 7; + dc = HAMT_MAX_LEVEL - 1; /* build collision nodes */ while (dc > dn) { hp = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA); @@ -1400,7 +1399,7 @@ static Eterm flatmap_merge(Process *p, Eterm map1, Eterm map2) { /* Reshape map to a hashmap if the map exceeds the limit */ if (n > MAP_SMALL_MAP_LIMIT) { - Uint32 hx,sw; + erts_ihash_t hx,sw; Uint i; Eterm res; hxnode_t *hxns; @@ -1415,7 +1414,7 @@ static Eterm flatmap_merge(Process *p, Eterm map1, Eterm map2) { for (i = 0; i < n; i++) { hx = hashmap_make_hash(ks[i]); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[i].hx = sw; hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; hxns[i].skip = 1; @@ -1440,7 +1439,7 @@ static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) flatmap_t *mp; Uint n, i; hxnode_t *hxns; - Uint32 sw, hx; + erts_ihash_t sw, hx; ErtsHeapFactory factory; /* convert flat to tree */ @@ -1461,7 +1460,7 @@ static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) for (i = 0; i < n; i++) { hx = hashmap_make_hash(ks[i]); - swizzle32(sw,hx); + sw = swizzle_map_hash(hx); hxns[i].hx = sw; hxns[i].val = CONS(hp, ks[i], vs[i]); hp += 2; hxns[i].skip = 1; @@ -1574,7 +1573,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, PSTACK_DECLARE(s, 4); HashmapMergeContext local_ctx; struct HashmapMergePStackType* sp; - Uint32 hx; + erts_ihash_t hx; Eterm res = THE_NON_VALUE; Eterm hdrA, hdrB; Eterm *hp, *nhp; @@ -1875,29 +1874,35 @@ static Uint hashmap_subtree_size(Eterm node) { return size; } - -static int hash_cmp(Uint32 ha, Uint32 hb) +static int hash_cmp(erts_ihash_t ha, erts_ihash_t hb) { - int i; - for (i=0; i<8; i++) { - int cmp = (int)(ha & 0xF) - (int)(hb & 0xF); - if (cmp) - return cmp; - ha >>= 4; - hb >>= 4; + for (int i = 0; i < HAMT_MAX_LEVEL; i++) { + int cmp = (int)(ha & 0xF) - (int)(hb & 0xF); + + if (cmp) { + return cmp; + } + + ha >>= 4; + hb >>= 4; } + return 0; } int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp) { if (ap && bp) { - Uint32 ha, hb; - ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0); + erts_ihash_t ha, hb; + + ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0); + ha = hashmap_make_hash(CAR(ap)); hb = hashmap_make_hash(CAR(bp)); + return hash_cmp(ha, hb); } + ASSERT(ap || bp); return ap ? -1 : 1; } @@ -1949,7 +1954,7 @@ BIF_RETTYPE maps_remove_2(BIF_ALIST_2) { */ int erts_maps_take(Process *p, Eterm key, Eterm map, Eterm *res, Eterm *value) { - Uint32 hx; + erts_ihash_t hx; Eterm ret; if (is_flatmap(map)) { Sint n; @@ -2041,7 +2046,7 @@ found_key: } int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res) { - Uint32 hx; + erts_ihash_t hx; if (is_flatmap(map)) { Sint n,i; Eterm* hp,*shp; @@ -2110,7 +2115,7 @@ found_key: } Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) { - Uint32 hx; + erts_ihash_t hx; Eterm res; if (is_flatmap(map)) { Sint n,i; @@ -2427,7 +2432,7 @@ Eterm* hashmap_iterator_prev(ErtsWStack* s) { } const Eterm * -erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) +erts_hashmap_get(erts_ihash_t hx, Eterm key, Eterm node) { Eterm *ptr, hdr; Uint ix, lvl = 0; @@ -2469,6 +2474,7 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) } while (!is_arity_value(hdr)); /* collision node */ + ASSERT(lvl == HAMT_MAX_LEVEL); ix = arityval(hdr); ASSERT(ix > 1); do { @@ -2479,7 +2485,7 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) return NULL; } -Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, +Eterm erts_hashmap_insert(Process *p, erts_ihash_t hx, Eterm key, Eterm value, Eterm map, int is_update) { Uint size, upsz; Eterm *hp, res = THE_NON_VALUE; @@ -2508,13 +2514,14 @@ Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, } -int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint *sz, +int erts_hashmap_insert_down(erts_ihash_t hx, Eterm key, Eterm value, Eterm node, Uint *sz, Uint *update_size, ErtsEStack *sp, int is_update) { Eterm *ptr; Eterm hdr, ckey; - Uint32 ix, cix, bp, hval, chx; + Uint32 ix, cix, bp, hval; Uint slot, lvl = 0, clvl; Uint size = 0, n = 0; + erts_ihash_t chx; *update_size = 1; @@ -2881,7 +2888,7 @@ static Eterm hashmap_values(Process* p, Eterm node) { } #endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */ -static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, +static Eterm hashmap_delete(Process *p, erts_ihash_t hx, Eterm key, Eterm map, Eterm *value) { Eterm *hp = NULL, *nhp = NULL, *hp_end = NULL; Eterm *ptr; @@ -2961,6 +2968,7 @@ static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, goto not_found; default: /* collision node */ ERTS_ASSERT(is_arity_value(hdr)); + ASSERT(lvl == HAMT_MAX_LEVEL); n = arityval(hdr); ASSERT(n >= 2); for (slot = 0; slot < n; slot++) { @@ -3521,6 +3529,7 @@ static Eterm hashmap_info(Process *p, Eterm node) { break; default: /* collision node */ ERTS_ASSERT(is_arity_value(hdr)); + ASSERT(clvl == HAMT_MAX_LEVEL); ncollision++; sz = arityval(hdr); ASSERT(sz >= 2); @@ -4025,30 +4034,86 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { } } -/* implementation of builtin emulations */ +/* Implementation of builtin emulations */ + +#if defined(ARCH_64) && (ERTS_AT_LEAST_GCC_VSN__(5, 1, 0) || \ + __has_builtin(__builtin_bswap64)) +# define hashmap_byte_swap(N) __builtin_bswap64((Uint64)(N)) +#elif defined(ARCH_32) && (ERTS_AT_LEAST_GCC_VSN__(5, 1, 0) || \ + __has_builtin(__builtin_bswap32)) +# define hashmap_byte_swap(N) __builtin_bswap32((Uint32)(N)) +#elif defined(_MSC_VER) && _MSC_VER >= 1900 +/* UCRT intrinsics are spread throughout the ordinary C headers, strangely + * enough. */ +# include <stdlib.h> + +# if defined(ARCH_64) +# define hashmap_byte_swap(N) _byteswap_uint64((Uint64)(N)) +# elif defined(ARCH_32) +# define hashmap_byte_swap(N) _byteswap_ulong((Uint32)(N)) +# endif +#else +/* No byte-swap intrinsic available. Fall back to C and hope that the compiler + * turns it into something efficient. */ +static ERTS_INLINE erts_ihash_t hashmap_byte_swap(erts_ihash_t hash) { + erts_ihash_t result = 0; + + for (int i = 0; i < sizeof(hash); i++) { + ERTS_CT_ASSERT(CHAR_BIT == 8); + result |= (((hash) >> i * 8) & 0xFF) << ((sizeof(hash) - i - 1) * 8); + } + + return result; +} +#endif + +static ERTS_INLINE erts_ihash_t swizzle_map_hash(erts_ihash_t hash) { + const erts_ihash_t mask = (erts_ihash_t)0xF0F0F0F0F0F0F0F0ull; + erts_ihash_t result; + + /* ABCDEFGH -> GHEFCDAB */ + result = hashmap_byte_swap(hash); + + /* GHEFCDAB -> HGFEDCBA */ + return ((result & mask)) >> 4 | ((result & (mask >> 4)) << 4); +} -#if !ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) /* Count leading zeros emulation */ -Uint32 hashmap_clz(Uint32 x) { - Uint32 y; +#ifndef hashmap_clz +erts_ihash_t hashmap_clz(erts_ihash_t x) { + erts_ihash_t y; + +#if defined(ARCH_64) + int n = 64; + + y = x >> 32; if (y != 0) { n = n - 32; x = y; } +#elif defined(ARCH_32) int n = 32; - y = x >>16; if (y != 0) {n = n -16; x = y;} - y = x >> 8; if (y != 0) {n = n - 8; x = y;} - y = x >> 4; if (y != 0) {n = n - 4; x = y;} - y = x >> 2; if (y != 0) {n = n - 2; x = y;} - y = x >> 1; if (y != 0) return n - 2; +#endif + + y = x >> 16; if (y != 0) { n = n - 16; x = y; } + y = x >> 8; if (y != 0) { n = n - 8; x = y; } + y = x >> 4; if (y != 0) { n = n - 4; x = y; } + y = x >> 2; if (y != 0) { n = n - 2; x = y; } + y = x >> 1; if (y != 0) { return n - 2; } + return n - x; } - -const Uint32 SK5 = 0x55555555, SK3 = 0x33333333; -const Uint32 SKF0 = 0xF0F0F0F, SKFF = 0xFF00FF; +#endif /* CTPOP emulation */ -Uint32 hashmap_bitcount(Uint32 x) { - x -= ((x >> 1 ) & SK5); - x = (x & SK3 ) + ((x >> 2 ) & SK3 ); - x = (x & SKF0) + ((x >> 4 ) & SKF0); - x += x >> 8; - return (x + (x >> 16)) & 0x3F; +#ifndef hashmap_bitcount +erts_ihash_t hashmap_bitcount(erts_ihash_t x) { + const erts_ihash_t SK55 = (erts_ihash_t)0x5555555555555555ull; + const erts_ihash_t SK33 = (erts_ihash_t)0x3333333333333333ull; + const erts_ihash_t SK0F = (erts_ihash_t)0x0F0F0F0F0F0F0F0Full; + const erts_ihash_t SK01 = (erts_ihash_t)0x0101010101010101ull; + + x -= ((x >> 1) & SK55); + x = (x & SK33) + ((x >> 2) & SK33); + x = ((x + (x >> 4)) & SK0F); + x *= SK01; + + return x >> (sizeof(erts_ihash_t) - 1) * CHAR_BIT; } #endif diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index f046ae2093..27415a639e 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -23,14 +23,31 @@ #define __ERL_MAP_H__ #include "sys.h" +#include "erl_term_hashing.h" /* intrinsic wrappers */ -#if ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) -#define hashmap_clz(x) ((Uint32) __builtin_clz((unsigned int)(x))) -#define hashmap_bitcount(x) ((Uint32) __builtin_popcount((unsigned int) (x))) +#if ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) || __has_builtin(__builtin_clz) +# if defined(ARCH_64) +# define hashmap_clz(x) \ + ((erts_ihash_t)__builtin_clzl((erts_ihash_t)(x))) +# elif defined(ARCH_32) +# define hashmap_clz(x) \ + ((erts_ihash_t)__builtin_clz((erts_ihash_t)(x))) +# endif #else -Uint32 hashmap_clz(Uint32 x); -Uint32 hashmap_bitcount(Uint32 x); +erts_ihash_t hashmap_clz(erts_ihash_t x); +#endif + +#if ERTS_AT_LEAST_GCC_VSN__(3, 4, 0) || __has_builtin(__builtin_popcount) +# if defined(ARCH_64) +# define hashmap_bitcount(x) \ + ((erts_ihash_t)__builtin_popcountl((erts_ihash_t)(x))) +# elif defined(ARCH_32) +# define hashmap_bitcount(x) \ + ((erts_ihash_t)__builtin_popcount((erts_ihash_t)(x))) +# endif +#else +erts_ihash_t hashmap_bitcount(erts_ihash_t x); #endif /* MAP */ @@ -56,10 +73,10 @@ typedef struct flatmap_s { /* the head-node is a bitmap or array with an untagged size */ #define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size) -#define hashmap_make_hash(Key) make_map_hash(Key) +#define hashmap_make_hash(Key) erts_map_hash(Key) #define hashmap_restore_hash(Lvl, Key) \ - (ASSERT(Lvl < 8), \ + (ASSERT(Lvl < HAMT_MAX_LEVEL), \ hashmap_make_hash(Key) >> (4*(Lvl))) #define hashmap_shift_hash(Hx, Lvl, Key) \ @@ -85,9 +102,9 @@ int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *re int erts_maps_remove(Process *p, Eterm key, Eterm map, Eterm *res); int erts_maps_take(Process *p, Eterm key, Eterm map, Eterm *res, Eterm *value); -Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, +Eterm erts_hashmap_insert(Process *p, erts_ihash_t hx, Eterm key, Eterm value, Eterm node, int is_update); -int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint *sz, +int erts_hashmap_insert_down(erts_ihash_t hx, Eterm key, Eterm value, Eterm node, Uint *sz, Uint *upsz, struct ErtsEStack_ *sp, int is_update); Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, Uint upsz, struct ErtsEStack_ *sp); @@ -110,7 +127,7 @@ Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, const Eterm *erts_maps_get(Eterm key, Eterm map); -const Eterm *erts_hashmap_get(Uint32 hx, Eterm key, Eterm map); +const Eterm *erts_hashmap_get(erts_ihash_t hx, Eterm key, Eterm map); Sint erts_map_size(Eterm map); @@ -191,9 +208,9 @@ typedef struct hashmap_head_s { #define HAMT_SUBTAG_HEAD_BITMAP ((MAP_HEADER_TAG_HAMT_HEAD_BITMAP << _HEADER_ARITY_OFFS) | MAP_SUBTAG) #define HAMT_SUBTAG_HEAD_FLATMAP ((MAP_HEADER_TAG_FLATMAP_HEAD << _HEADER_ARITY_OFFS) | MAP_SUBTAG) -#define hashmap_index(hash) (((Uint32)hash) & 0xf) +#define hashmap_index(hash) ((hash) & 0xf) -#define HAMT_MAX_LEVEL 8 +#define HAMT_MAX_LEVEL ((sizeof(erts_ihash_t) * CHAR_BIT) / 4) /* hashmap heap size: [one cons cell + one list term in parent node] per key diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index a9661dc780..119138eaab 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1535,7 +1535,7 @@ ErlNifUInt64 enif_hash(ErlNifHash type, Eterm term, ErlNifUInt64 salt) { switch (type) { case ERL_NIF_INTERNAL_HASH: - return make_internal_hash(term, (Uint32) salt); + return erts_internal_salted_hash(term, (erts_ihash_t)salt); case ERL_NIF_PHASH2: /* It appears that make_hash2 doesn't always react to seasoning * as well as it should. Therefore, let's make it ignore the salt diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index ee1d345fb3..d6fc9bcfd4 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -8686,7 +8686,7 @@ erts_proc_sig_queue_try_enqueue_to_buffer(Eterm from, /* Use the sender id to hash to an outer signal queue buffer. This * guarantees that all signals from the same process are ordered in * send order. */ - slot = make_internal_hash(from, 0) % + slot = erts_internal_hash(from) % ERTS_PROC_SIG_INQ_BUFFERED_NR_OF_BUFFERS; buffer = &buffers->slots[slot]; diff --git a/erts/emulator/beam/erl_process_dict.c b/erts/emulator/beam/erl_process_dict.c index 4ffaf35edc..b2027fe207 100644 --- a/erts/emulator/beam/erl_process_dict.c +++ b/erts/emulator/beam/erl_process_dict.c @@ -53,11 +53,11 @@ /* Hash utility macros */ #define HASH_RANGE(PDict) ((PDict)->usedSlots) -#define MAKE_HASH(Term) \ - ((is_small(Term)) ? (Uint32) unsigned_val(Term) : \ - ((is_atom(Term)) ? \ - (Uint32) atom_val(Term) : \ - make_internal_hash(Term, 0))) +#define MAKE_HASH(Term) \ + ((is_small(Term)) ? (erts_ihash_t) unsigned_val(Term) : \ + ((is_atom(Term)) ? \ + (erts_ihash_t) atom_val(Term) : \ + erts_internal_hash(Term))) #define PD_SZ2BYTES(Sz) (sizeof(ProcDict) + ((Sz) - 1)*sizeof(Eterm)) @@ -103,7 +103,7 @@ static void grow(Process *p); static void array_shrink(ProcDict **ppd, unsigned int need); static void ensure_array_size(ProcDict**, unsigned int size); -static unsigned int pd_hash_value_to_ix(ProcDict *pdict, Uint32 hx); +static unsigned int pd_hash_value_to_ix(ProcDict *pdict, erts_ihash_t hx); static unsigned int next_array_size(unsigned int need); /* @@ -441,12 +441,12 @@ static void pd_hash_erase_all(Process *p) } } -Uint32 erts_pd_make_hx(Eterm key) +erts_ihash_t erts_pd_make_hx(Eterm key) { return MAKE_HASH(key); } -Eterm erts_pd_hash_get_with_hx(Process *p, Uint32 hx, Eterm id) +Eterm erts_pd_hash_get_with_hx(Process *p, erts_ihash_t hx, Eterm id) { unsigned int hval; ProcDict *pd = p->dictionary; @@ -1003,7 +1003,7 @@ static void ensure_array_size(ProcDict **ppdict, unsigned int size) ** Basic utilities */ -static unsigned int pd_hash_value_to_ix(ProcDict *pdict, Uint32 hx) +static unsigned int pd_hash_value_to_ix(ProcDict *pdict, erts_ihash_t hx) { Uint high; diff --git a/erts/emulator/beam/erl_process_dict.h b/erts/emulator/beam/erl_process_dict.h index 3ff2354f91..a79de56faf 100644 --- a/erts/emulator/beam/erl_process_dict.h +++ b/erts/emulator/beam/erl_process_dict.h @@ -20,7 +20,9 @@ #ifndef _ERL_PROCESS_DICT_H #define _ERL_PROCESS_DICT_H + #include "sys.h" +#include "erl_term_hashing.h" typedef struct proc_dict { unsigned int sizeMask; @@ -43,7 +45,8 @@ void erts_deep_dictionary_dump(fmtfn_t to, void *to_arg, Eterm erts_dictionary_copy(ErtsHeapFactory *hfact, ProcDict *pd, Uint reserve_size); Eterm erts_pd_hash_get(struct process *p, Eterm id); -Uint32 erts_pd_make_hx(Eterm key); -Eterm erts_pd_hash_get_with_hx(Process *p, Uint32 hx, Eterm id); + +erts_ihash_t erts_pd_make_hx(Eterm key); +Eterm erts_pd_hash_get_with_hx(Process *p, erts_ihash_t hx, Eterm id); #endif diff --git a/erts/emulator/beam/erl_term_hashing.c b/erts/emulator/beam/erl_term_hashing.c index 848757c2f2..e13c5e9773 100644 --- a/erts/emulator/beam/erl_term_hashing.c +++ b/erts/emulator/beam/erl_term_hashing.c @@ -32,14 +32,6 @@ #include "erl_binary.h" #include "erl_bits.h" -#ifdef ERL_INTERNAL_HASH_CRC32C -# if defined(__x86_64__) -# include <immintrin.h> -# elif defined(__aarch64__) -# include <arm_acle.h> -# endif -#endif - /* *\ * * \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -1532,6 +1524,16 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ #undef TRAP_LOCATION_NO_CTX } +#undef HASH_MAP_TAIL +#undef HASH_MAP_PAIR + +#undef UINT32_HASH_2 +#undef UINT32_HASH +#undef SINT32_HASH + +#undef HCONST +#undef MIX + Uint32 make_hash2(Eterm term) { @@ -1559,124 +1561,256 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p) * hashmap implementation that now uses collision nodes at the bottom of * the HAMT when all hash bits are exhausted. * - */ + * The underlying hash primitive is the public-domain `MurmurHash3` by Austin + * Appleby, which has been modified to work incrementally over our terms rather + * than plain byte arrays. It provides a decent 128-bit hash with good + * performance on most hardware, only narrowly losing to variants that use + * specialized instructions (e.g. SHA3 or AES) that are much harder to + * maintain. + * + * Note that we only implement the 64-bit variant of MurmurHash and skip the + * 32-bit optimized version, as the difference in performance appears to be + * modest on the most popular 32-bit platform (ARM). It should not be terribly + * difficult to adapt this for both versions if that becomes a problem. */ + +enum { + IHASH_TYPE_IMMEDIATE = 1, + IHASH_TYPE_ARRAY_ELEMENT, + IHASH_TYPE_CAR, + IHASH_TYPE_CDR, + IHASH_TYPE_STRING, + IHASH_TYPE_TUPLE, + IHASH_TYPE_FLATMAP, + IHASH_TYPE_HASHMAP_HEAD_ARRAY, + IHASH_TYPE_HASHMAP_HEAD_BITMAP, + IHASH_TYPE_HASHMAP_NODE, + IHASH_TYPE_BINARY, + IHASH_TYPE_LOCAL_FUN, + IHASH_TYPE_EXTERNAL_FUN, + IHASH_TYPE_NEG_BIGNUM, + IHASH_TYPE_POS_BIGNUM, + IHASH_TYPE_LOCAL_REF, + IHASH_TYPE_EXTERNAL_REF, + IHASH_TYPE_EXTERNAL_PID, + IHASH_TYPE_EXTERNAL_PORT, + IHASH_TYPE_FLOAT +}; + +#define IHASH_CAR_MARKER (_make_header(1,_TAG_HEADER_REF)) +#define IHASH_CDR_MARKER (_make_header(2,_TAG_HEADER_REF)) + +#define ROTL64(x, y) (x << y) | (x >> (64 - y)); + +static const Uint64 IHASH_C1 = 0x87C37B91114253D5ull; +static const Uint64 IHASH_C2 = 0x4CF5AD432745937Full; + +#define IHASH_MIX_ALPHA(Expr) \ + do { \ + Uint64 expr = (Uint64)(Expr); \ + expr *= IHASH_C1; \ + expr = ROTL64(expr, 31); \ + expr *= IHASH_C2; \ + hash_alpha ^= expr; \ + hash_alpha = ROTL64(hash_alpha, 27) \ + hash_alpha += hash_beta; \ + hash_alpha = hash_alpha * 5 + 0x52DCE729ull; \ + hash_ticks += 1; \ + } while(0) + +#define IHASH_MIX_ALPHA_2F32(Expr1, Expr2) \ + IHASH_MIX_ALPHA((Uint64)(Expr1) | ((Uint64)(Expr2) << 32)) + +#define IHASH_MIX_BETA(Expr) \ + do { \ + Uint64 expr = (Uint64)(Expr); \ + expr *= IHASH_C2; \ + expr = ROTL64(expr, 33); \ + expr *= IHASH_C1; \ + hash_beta ^= expr; \ + hash_beta = ROTL64(hash_beta, 31); \ + hash_beta += hash_alpha; \ + hash_beta = hash_beta * 5 + 0x38495AB5ull; \ + hash_ticks += 1; \ + } while(0) + +#define IHASH_MIX_BETA_2F32(Expr1, Expr2) \ + IHASH_MIX_BETA((Uint64)(Expr1) | ((Uint64)(Expr2) << 32)) + +#ifdef ARCH_64 +# define IHASH_MIX_IMMEDIATE(term) \ + do { \ + IHASH_MIX_ALPHA(IHASH_TYPE_IMMEDIATE); \ + IHASH_MIX_BETA(term); \ + } while(0) +#else +# define IHASH_MIX_IMMEDIATE(term) \ + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_IMMEDIATE, term); +#endif -/* Use a better mixing function if available. */ -#if defined(ERL_INTERNAL_HASH_CRC32C) -# undef MIX -# if defined(__x86_64__) -# define MIX(a,b,c) \ - do { \ - Uint32 initial_hash = c; \ - c = __builtin_ia32_crc32si(c, a); \ - c = __builtin_ia32_crc32si(c + initial_hash, b); \ - } while(0) -# elif defined(__aarch64__) -# define MIX(a,b,c) \ - do { \ - Uint32 initial_hash = c; \ - c = __crc32cw(c, a); \ - c = __crc32cw(c + initial_hash, b); \ - } while(0) -# else -# error "No suitable CRC32 intrinsic available." -# endif +/* Pushes a term to the stack, optionally handling it up-front if it's an + * immediate to speed up `{atom(), immed()}` keys in maps. We hash the presence + * of non-immediates to ensure that terms with a different internal order hash + * differently. + * + * Take for example `{a,{},b,{}}` and `{{},a,{},b}`. This will be processed in + * the order `a,b,{},{}` in both cases as the non-immediates are deferred. If + * we don't hash the order of the terms, they will always hash equally. */ +#define IHASH_PUSH_TERM(stack, term) \ + do { \ + if (ERTS_LIKELY(is_immed(term))) { \ + IHASH_MIX_IMMEDIATE(term); \ + } else { \ + IHASH_MIX_ALPHA(IHASH_TYPE_ARRAY_ELEMENT); \ + ESTACK_PUSH(stack, (term)); \ + } \ + } while(0) + +/* Endian-agnostic 64-bit read. This helps the compiler generate optimized code + * in a hot loop where the data is unlikely to be properly aligned, saving us + * from having to wrangle that manually. */ +static ERTS_FORCE_INLINE +Uint64 read_u64(const byte *data) { + Uint64 value = 0; + + for (int i = 0; i < sizeof(Uint64); i++) { +#ifdef WORDS_BIGENDIAN + value = ((Uint64)data[i]) | (value << CHAR_BIT); +#else + value |= ((Uint64)data[i]) << (i * CHAR_BIT); #endif + } -#define CONST_HASH(AConst) \ - do { /* Lightweight mixing of constant (type info) */ \ - hash ^= AConst; \ - hash = (hash << 17) ^ (hash >> (32-17)); \ - } while (0) + return value; +} -/* - * Start with salt, 32-bit prime number, to avoid getting same hash as phash2 - * which can cause bad hashing in distributed ETS tables for example. - */ -#define INTERNAL_HASH_SALT 3432918353U +static Uint64 ihash_mix64(Uint64 input) +{ + Uint64 hash = input; + + hash ^= hash >> 33; + hash *= 0xFF51AFD7ED558CCDull; + hash ^= hash >> 33; + hash *= 0xC4CEB9FE1A85EC53ull; + hash ^= hash >> 33; + + /* Inverse, if needed for testing. The constants are the modular inverse of + * the ones above (over 1 << 64). + * + * hash ^= hash >> 33; + * hash *= 0x9CB4B2F8129337DBull; + * hash ^= hash >> 33; + * hash *= 0x4F74430C22A54005ull; + * hash ^= hash >> 33; */ -Uint32 -make_internal_hash(Eterm term, Uint32 salt) + return hash; +} + +static erts_ihash_t +make_internal_hash(Eterm term, erts_ihash_t salt) { - Uint32 hash = salt ^ INTERNAL_HASH_SALT; + Uint64 hash_alpha, hash_beta; + Uint hash_ticks; - /* Optimization. Simple cases before declaration of estack. */ - if (primary_tag(term) == TAG_PRIMARY_IMMED1) { - #if ERTS_SIZEOF_ETERM == 8 - UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST); - #elif ERTS_SIZEOF_ETERM == 4 - UINT32_HASH(term, HCONST); - #else - # error "No you don't" - #endif - return hash; - } - { - Eterm tmp; DECLARE_ESTACK(s); + hash_alpha = (Uint64)salt; + hash_beta = (Uint64)salt; + hash_ticks = 0; + for (;;) { switch (primary_tag(term)) { case TAG_PRIMARY_LIST: { - int c = 0; - Uint32 sh = 0; - Eterm* ptr = list_val(term); - while (is_byte(*ptr)) { - /* Optimization for strings. */ - sh = (sh << 8) + unsigned_val(*ptr); - if (c == 3) { - UINT32_HASH(sh, HCONST_4); - c = sh = 0; - } else { - c++; - } - term = CDR(ptr); - if (is_not_list(term)) + const Eterm *cell; + UWord value = 0; + int bytes = 0; + + /* Optimization for strings. */ + while (is_list(term)) { + cell = list_val(term); + + if (!is_byte(CAR(cell))) { break; - ptr = list_val(term); + } + + value = (value << 8) | unsigned_val(CAR(cell)); + bytes++; + + if ((bytes % 4) == 0) { + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_STRING | (bytes << 8), + value); + value = 0; + bytes = 0; + } + + term = CDR(cell); + } + + if (bytes > 0) { + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_STRING | (bytes << 8), value); } - if (c > 0) - UINT32_HASH_2(sh, (Uint32)c, HCONST_22); if (is_list(term)) { - tmp = CDR(ptr); - CONST_HASH(HCONST_17); /* Hash CAR in cons cell */ - ESTACK_PUSH(s, tmp); - if (is_not_list(tmp)) { - ESTACK_PUSH(s, HASH_CDR); + Eterm head, tail; + + cell = list_val(term); + head = CAR(cell); + tail = CDR(cell); + + if (is_immed(head)) { + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_IMMEDIATE, IHASH_TYPE_CAR); + IHASH_MIX_BETA(head); + + if (is_not_list(tail)) { + IHASH_MIX_ALPHA(IHASH_TYPE_CDR); + } + + term = tail; + } else { + ESTACK_PUSH(s, tail); + if (is_not_list(tail)) { + ESTACK_PUSH(s, IHASH_CDR_MARKER); + } + + IHASH_MIX_ALPHA(IHASH_TYPE_CAR); + term = head; } - term = CAR(ptr); } + + continue; } break; case TAG_PRIMARY_BOXED: { Eterm hdr = *boxed_val(term); ASSERT(is_header(hdr)); + switch (hdr & _TAG_HEADER_MASK) { case ARITYVAL_SUBTAG: { - int i; - int arity = header_arity(hdr); - Eterm* elem = tuple_val(term); - UINT32_HASH(arity, HCONST_9); - if (arity == 0) /* Empty tuple */ - goto pop_next; - for (i = arity; ; i--) { - term = elem[i]; - if (i == 1) - break; - ESTACK_PUSH(s, term); + const Eterm *elements = &tuple_val(term)[0]; + const int arity = header_arity(hdr); + + IHASH_MIX_ALPHA(IHASH_TYPE_TUPLE); + IHASH_MIX_BETA(arity); + + if (arity > 0) { + for (int i = 1; i < arity; i++) { + IHASH_PUSH_TERM(s, elements[i]); + } + + term = elements[arity]; + continue; } + + goto pop_next; } break; case MAP_SUBTAG: { - Eterm* ptr = boxed_val(term) + 1; + const Eterm *elements = &boxed_val(term)[1]; Uint size; - int i; /* * We rely on key-value iteration order being constant @@ -1685,79 +1819,113 @@ make_internal_hash(Eterm term, Uint32 salt) switch (hdr & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_FLATMAP: { - flatmap_t *mp = (flatmap_t *)flatmap_val(term); - Eterm *ks = flatmap_get_keys(mp); - Eterm *vs = flatmap_get_values(mp); - size = flatmap_get_size(mp); - UINT32_HASH(size, HCONST_16); - if (size == 0) - goto pop_next; + const flatmap_t *mp = (const flatmap_t *)flatmap_val(term); + const Eterm *ks = flatmap_get_keys(mp); + const Eterm *vs = flatmap_get_values(mp); + size = flatmap_get_size(mp); + + IHASH_MIX_ALPHA(IHASH_TYPE_FLATMAP); + IHASH_MIX_BETA(size); + + if (size > 0) { + for (int i = 0; i < size - 1; i++) { + IHASH_PUSH_TERM(s, vs[i]); + IHASH_PUSH_TERM(s, ks[i]); + } - for (i = size - 1; i >= 0; i--) { - ESTACK_PUSH(s, vs[i]); - ESTACK_PUSH(s, ks[i]); + IHASH_PUSH_TERM(s, vs[size - 1]); + term = ks[size - 1]; + continue; } + goto pop_next; } case HAMT_SUBTAG_HEAD_ARRAY: + size = *elements++; + + IHASH_MIX_ALPHA(IHASH_TYPE_HASHMAP_HEAD_ARRAY); + IHASH_MIX_BETA(size); + + if (size == 0) { + goto pop_next; + } + break; case HAMT_SUBTAG_HEAD_BITMAP: - size = *ptr++; - UINT32_HASH(size, HCONST_16); - if (size == 0) + size = *elements++; + + IHASH_MIX_ALPHA(IHASH_TYPE_HASHMAP_HEAD_BITMAP); + IHASH_MIX_BETA(size); + + if (size == 0) { goto pop_next; + } + break; + case HAMT_SUBTAG_NODE_BITMAP: + IHASH_MIX_ALPHA(IHASH_TYPE_HASHMAP_NODE); + break; } + switch (hdr & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: - i = 16; + size = 16; break; case HAMT_SUBTAG_HEAD_BITMAP: case HAMT_SUBTAG_NODE_BITMAP: - i = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + size = hashmap_bitcount(MAP_HEADER_VAL(hdr)); break; default: erts_exit(ERTS_ERROR_EXIT, "bad header"); } - while (i) { - if (is_list(*ptr)) { - Eterm* cons = list_val(*ptr); - ESTACK_PUSH(s, CDR(cons)); - ESTACK_PUSH(s, CAR(cons)); - } - else { - ASSERT(is_boxed(*ptr)); - /* no special treatment of collision nodes needed, - hash them as the tuples they are */ - ESTACK_PUSH(s, *ptr); + + for (int i = 0; i < size; i++) { + if (is_list(elements[i])) { + /* [Key | Value] */ + const Eterm *cons = list_val(elements[i]); + IHASH_PUSH_TERM(s, CDR(cons)); + IHASH_PUSH_TERM(s, CAR(cons)); + } else { + /* Child or collision node. We don't need to treat the + * latter in any special way, and can hash them as the + * tuples they are. */ + ASSERT(is_boxed(elements[i])); + ESTACK_PUSH(s, elements[i]); } - i--; ptr++; } + goto pop_next; } break; case FUN_SUBTAG: { - ErlFunThing* funp = (ErlFunThing *) fun_val(term); + const ErlFunThing *funp = (const ErlFunThing*)fun_val(term); if (is_local_fun(funp)) { - ErlFunEntry* fe = funp->entry.fun; + const ErlFunEntry *fe = funp->entry.fun; Uint num_free = funp->num_free; - UINT32_HASH_2(num_free, fe->module, HCONST_20); - UINT32_HASH_2(fe->index, fe->old_uniq, HCONST_21); - if (num_free == 0) { - goto pop_next; - } else { - Eterm* bptr = funp->env + num_free - 1; - while (num_free-- > 1) { - term = *bptr--; - ESTACK_PUSH(s, term); + + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_LOCAL_FUN, num_free); + IHASH_MIX_BETA_2F32(fe->index, fe->old_uniq); + + IHASH_MIX_ALPHA(IHASH_TYPE_IMMEDIATE); + IHASH_MIX_BETA(fe->module); + + if (num_free > 0) { + for (int i = 0; i < num_free - 1; i++) { + IHASH_PUSH_TERM(s, funp->env[i]); } - term = *bptr; + + term = funp->env[num_free - 1]; + continue; } + + goto pop_next; } else { ASSERT(is_external_fun(funp) && funp->next == NULL); /* Assumes Export entries never move */ - POINTER_HASH(funp->entry.exp, HCONST_14); + IHASH_MIX_ALPHA(IHASH_TYPE_EXTERNAL_FUN); + IHASH_MIX_BETA((UWord)funp->entry.exp); + goto pop_next; } } @@ -1766,249 +1934,303 @@ make_internal_hash(Eterm term, Uint32 salt) case HEAP_BINARY_SUBTAG: case SUB_BINARY_SUBTAG: { - byte* bptr; - Uint sz = binary_size(term); - Uint32 con = HCONST_13 + hash; - Uint bitoffs; - Uint bitsize; + Uint bit_offset, bit_size, byte_size; + const byte *data; + + ERTS_GET_BINARY_BYTES(term, data, bit_offset, bit_size); + byte_size = binary_size(term); + + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_BINARY, bit_size); + IHASH_MIX_BETA(byte_size); + + if (byte_size > 0 || bit_size > 0) { + const byte *bytes = data; + Uint64 value; + Uint it; + + if (ERTS_UNLIKELY(bit_offset != 0)) { + byte *tmp = (byte*)erts_alloc(ERTS_ALC_T_TMP, + byte_size + (bit_size != 0)); + erts_copy_bits(data, bit_offset, 1, tmp, 0, 1, + byte_size * 8 + bit_size); + bytes = tmp; + } - ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize); - if (sz == 0 && bitsize == 0) { - hash = con; - } else { - if (bitoffs == 0) { - hash = block_hash(bptr, sz, con); - if (bitsize > 0) { - UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)), - HCONST_15); + for (it = 0; + (it + sizeof(Uint64[2])) <= byte_size; + it += sizeof(Uint64[2])) { + IHASH_MIX_ALPHA(read_u64(&bytes[it])); + IHASH_MIX_BETA(read_u64(&bytes[it + sizeof(Uint64)])); + } + + value = 0; + switch(byte_size % sizeof(Uint64[2])) + { + case 15: value ^= ((Uint64)bytes[it + 14]) << 0x30; + case 14: value ^= ((Uint64)bytes[it + 13]) << 0x28; + case 13: value ^= ((Uint64)bytes[it + 12]) << 0x20; + case 12: value ^= ((Uint64)bytes[it + 11]) << 0x18; + case 11: value ^= ((Uint64)bytes[it + 10]) << 0x10; + case 10: value ^= ((Uint64)bytes[it + 9]) << 0x08; + case 9: value ^= ((Uint64)bytes[it + 8]) << 0x00; + { + value *= IHASH_C2; + value = ROTL64(value, 33); + value *= IHASH_C1; + hash_beta ^= value; + value = 0; + /* !! FALL THROUGH !! */ } - } else { - byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, - sz + (bitsize != 0)); - erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize); - hash = block_hash(buf, sz, con); - if (bitsize > 0) { - UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)), - HCONST_15); + case 8: value ^= ((Uint64)bytes[it + 7]) << 0x38; + case 7: value ^= ((Uint64)bytes[it + 6]) << 0x30; + case 6: value ^= ((Uint64)bytes[it + 5]) << 0x28; + case 5: value ^= ((Uint64)bytes[it + 4]) << 0x20; + case 4: value ^= ((Uint64)bytes[it + 3]) << 0x18; + case 3: value ^= ((Uint64)bytes[it + 2]) << 0x10; + case 2: value ^= ((Uint64)bytes[it + 1]) << 0x08; + case 1: value ^= ((Uint64)bytes[it + 0]) << 0x00; + { + value *= IHASH_C1; + value = ROTL64(value, 31); + value *= IHASH_C2; + hash_alpha ^= value; + break; } - erts_free(ERTS_ALC_T_TMP, (void *) buf); + }; + + if (bit_size > 0) { + IHASH_MIX_ALPHA(bytes[byte_size] >> (8 - bit_size)); + } + + if (bytes != data) { + erts_free(ERTS_ALC_T_TMP, (void *)bytes); } } + goto pop_next; } break; case POS_BIG_SUBTAG: case NEG_BIG_SUBTAG: { - Eterm* ptr = big_val(term); - Uint i = 0; - Uint n = BIG_SIZE(ptr); - Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11; -#if D_EXP == 16 - do { - Uint32 x, y; - x = i < n ? BIG_DIGIT(ptr, i++) : 0; - x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16; - y = i < n ? BIG_DIGIT(ptr, i++) : 0; - y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16; - UINT32_HASH_2(x, y, con); - } while (i < n); -#elif D_EXP == 32 - do { - Uint32 x, y; - x = i < n ? BIG_DIGIT(ptr, i++) : 0; - y = i < n ? BIG_DIGIT(ptr, i++) : 0; - UINT32_HASH_2(x, y, con); - } while (i < n); -#elif D_EXP == 64 - do { - Uint t; - Uint32 x, y; - ASSERT(i < n); - t = BIG_DIGIT(ptr, i++); - x = t & 0xffffffff; - y = t >> 32; - UINT32_HASH_2(x, y, con); - } while (i < n); -#else -#error "unsupported D_EXP size" -#endif + const Eterm *ptr = big_val(term); + int i, n; + + /* `n` must fit in a signed int. */ + ERTS_CT_ASSERT((1ull << 31) > (Uint64)BIG_ARITY_MAX); + n = BIG_SIZE(ptr); + ASSERT(n < BIG_ARITY_MAX); + + IHASH_MIX_ALPHA_2F32((BIG_SIGN(ptr) ? + IHASH_TYPE_NEG_BIGNUM : + IHASH_TYPE_POS_BIGNUM), + n); + + for (i = 0; (i + 2) <= n; i += 2) { + IHASH_MIX_ALPHA(BIG_DIGIT(ptr, i+0)); + IHASH_MIX_BETA(BIG_DIGIT(ptr, i+1)); + } + + if (i < n) { + IHASH_MIX_BETA(BIG_DIGIT(ptr, i)); + } + goto pop_next; } break; case REF_SUBTAG: { Uint32 *numbers = internal_ref_numbers(term); ASSERT(internal_ref_no_numbers(term) >= 3); - UINT32_HASH(numbers[0], HCONST_7); - UINT32_HASH_2(numbers[1], numbers[2], HCONST_8); + + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_LOCAL_REF, numbers[0]); + IHASH_MIX_BETA_2F32(numbers[1], numbers[2]); + if (is_internal_pid_ref(term)) { #ifdef ARCH_64 ASSERT(internal_ref_no_numbers(term) == 5); - UINT32_HASH_2(numbers[3], numbers[4], HCONST_9); + IHASH_MIX_ALPHA_2F32(numbers[3], numbers[4]); #else ASSERT(internal_ref_no_numbers(term) == 4); - UINT32_HASH(numbers[3], HCONST_9); + IHASH_MIX_ALPHA(numbers[3]); #endif } + goto pop_next; } case EXTERNAL_REF_SUBTAG: { - ExternalThing* thing = external_thing_ptr(term); - Uint n = external_thing_ref_no_numbers(thing); - Uint32 *numbers = external_thing_ref_numbers(thing); + const ExternalThing* thing = external_thing_ptr(term); + const Uint32 *numbers; + int i, n; /* Can contain 0 to 5 32-bit numbers... */ + n = external_thing_ref_no_numbers(thing); + numbers = external_thing_ref_numbers(thing); + ASSERT(n <= 5); - /* See limitation #2 */ - switch (n) { - case 5: { - Uint32 num4 = numbers[4]; - if (0) { - case 4: - num4 = 0; - /* Fall through... */ - } - UINT32_HASH_2(numbers[3], num4, HCONST_9); - /* Fall through... */ - } - case 3: { - Uint32 num2 = numbers[2]; - if (0) { - case 2: - num2 = 0; - /* Fall through... */ - } - UINT32_HASH_2(numbers[1], num2, HCONST_8); - /* Fall through... */ + IHASH_MIX_ALPHA_2F32(IHASH_TYPE_EXTERNAL_REF, n); + + for (i = 0; (i + 2) <= n; i += 2) { + IHASH_MIX_BETA_2F32(numbers[i], numbers[i + 1]); } - case 1: -#ifdef ARCH_64 - POINTER_HASH(thing->node, HCONST_7); - UINT32_HASH(numbers[0], HCONST_7); -#else - UINT32_HASH_2(thing->node, numbers[0], HCONST_7); -#endif - break; - case 0: - POINTER_HASH(thing->node, HCONST_7); - break; - default: - ASSERT(!"Invalid amount of external reference numbers"); - break; + + if (i < n) { + IHASH_MIX_BETA(numbers[i]); } + + IHASH_MIX_ALPHA((UWord)thing->node); goto pop_next; } case EXTERNAL_PID_SUBTAG: { - ExternalThing* thing = external_thing_ptr(term); + const ExternalThing *thing = external_thing_ptr(term); /* See limitation #2 */ - POINTER_HASH(thing->node, HCONST_5); - UINT32_HASH_2(thing->data.pid.num, thing->data.pid.ser, HCONST_5); + IHASH_MIX_ALPHA(IHASH_TYPE_EXTERNAL_PID); + IHASH_MIX_BETA((UWord)thing->node); + IHASH_MIX_ALPHA_2F32(thing->data.pid.num, thing->data.pid.ser); goto pop_next; } case EXTERNAL_PORT_SUBTAG: { - ExternalThing* thing = external_thing_ptr(term); + const ExternalThing *thing = external_thing_ptr(term); /* See limitation #2 */ - POINTER_HASH(thing->node, HCONST_6); - UINT32_HASH_2(thing->data.ui32[0], thing->data.ui32[1], HCONST_6); + IHASH_MIX_ALPHA(IHASH_TYPE_EXTERNAL_PORT); + IHASH_MIX_BETA((UWord)thing->node); +#ifdef ARCH_64 + IHASH_MIX_ALPHA(thing->data.port.id); +#else + IHASH_MIX_ALPHA_2F32(thing->data.port.low, + thing->data.port.high); +#endif goto pop_next; } case FLOAT_SUBTAG: { FloatDef ff; + GET_DOUBLE(term, ff); + if (ff.fd == 0.0f) { /* ensure positive 0.0 */ ff.fd = erts_get_positive_zero_float(); } - UINT32_HASH_2(ff.fw[0], ff.fw[1], HCONST_12); + + IHASH_MIX_ALPHA(IHASH_TYPE_FLOAT); + IHASH_MIX_BETA_2F32(ff.fw[0], ff.fw[1]); + goto pop_next; } default: - erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt); + erts_exit(ERTS_ERROR_EXIT, + "Invalid tag in make_internal_hash(0x%X, _, %i)\n", + term); } } break; case TAG_PRIMARY_IMMED1: - #if ERTS_SIZEOF_ETERM == 8 - UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST); - #else - UINT32_HASH(term, HCONST); - #endif + IHASH_MIX_IMMEDIATE(term); goto pop_next; default: - erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_internal_hash(0x%X, %lu)\n", term, salt); + erts_exit(ERTS_ERROR_EXIT, + "Invalid tag in make_internal_hash(0x%X, _, %i)\n", + term); pop_next: if (ESTACK_ISEMPTY(s)) { DESTROY_ESTACK(s); - return hash; + hash_alpha ^= hash_ticks; + hash_beta ^= hash_ticks; + + hash_alpha += hash_beta; + hash_beta += hash_alpha; + + hash_alpha = ihash_mix64(hash_alpha); + hash_beta = ihash_mix64(hash_beta); + + hash_alpha += hash_beta; + hash_beta += hash_alpha; + + return (erts_ihash_t)(hash_alpha ^ hash_beta); } term = ESTACK_POP(s); switch (term) { - case HASH_CDR: - CONST_HASH(HCONST_18); /* Hash CDR i cons cell */ - goto pop_next; - default: - break; + case IHASH_CAR_MARKER: + /* Hash CAR in cons cell */ + IHASH_MIX_BETA(IHASH_TYPE_CAR); + term = ESTACK_POP(s); + continue; + case IHASH_CDR_MARKER: + /* Hash CDR in cons cell */ + IHASH_MIX_BETA(IHASH_TYPE_CDR); + term = ESTACK_POP(s); + continue; } } } - } - } #ifdef DBG_HASHMAP_COLLISION_BONANZA -Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key) +erts_ihash_t erts_dbg_hashmap_collision_bonanza(erts_ihash_t hash, Eterm key) { -/*{ - static Uint32 hashvec[7] = { - 0x02345678, - 0x12345678, - 0xe2345678, - 0xf2345678, - 0x12abcdef, - 0x13abcdef, - 0xcafebabe - }; - hash = hashvec[hash % (sizeof(hashvec) / sizeof(hashvec[0]))]; - }*/ - const Uint32 bad_hash = (hash & 0x12482481) * 1442968193; - const Uint32 bad_bits = hash % 67; - if (bad_bits < 32) { + /* Keep only 8 bits to ensure a high collision rate (1/256). */ + erts_ihash_t bad_hash = (hash & 0x12482481u); + erts_ihash_t bad_bits; + + switch (sizeof(erts_ihash_t) * CHAR_BIT) { + case 64: + bad_hash *= UWORD_CONSTANT(11400714819323198485); + bad_hash ^= (bad_hash >> 31); + bad_bits = hash % 137; + break; + case 32: + bad_hash *= UWORD_CONSTANT(2654435769); + bad_hash ^= (bad_hash >> 15); + bad_bits = hash % 67; + break; + default: + ASSERT(!"Unknown sizeof(erts_ihash_t)"); + } + + (void)key; + + if (bad_bits < (sizeof(erts_ihash_t) * CHAR_BIT)) { /* Mix in a number of high good bits to get "randomly" close - to the collision nodes */ - const Uint32 bad_mask = (1 << bad_bits) - 1; - return (hash & ~bad_mask) | (bad_hash & bad_mask); + * to the collision nodes */ + const erts_ihash_t bad_mask = (1 << bad_bits) - 1; + bad_hash = (hash & ~bad_mask) | (bad_hash & bad_mask); } + return bad_hash; } #endif -/* Term hash function for hashmaps */ -Uint32 make_map_hash(Eterm key) { - Uint32 hash; +erts_ihash_t erts_internal_salted_hash(Eterm term, erts_ihash_t salt) { + if (ERTS_LIKELY(is_immed(term))) { + /* Fast path for immediates. The vast majority of calls land here. */ + return ihash_mix64(term + salt); + } + + return make_internal_hash(term, salt); +} + +erts_ihash_t erts_internal_hash(Eterm term) { + if (ERTS_LIKELY(is_immed(term))) { + return ihash_mix64(term); + } - hash = make_internal_hash(key, 0); + return make_internal_hash(term, 0); +} + +/* Term hash function for hashmaps, identical to erts_internal_hash except in + * certain debug configurations that weaken the hash. */ +erts_ihash_t erts_map_hash(Eterm key) { + erts_ihash_t hash = erts_internal_hash(key); #ifdef DBG_HASHMAP_COLLISION_BONANZA hash = erts_dbg_hashmap_collision_bonanza(hash, key); #endif + return hash; } - -#undef CONST_HASH -#undef HASH_MAP_TAIL -#undef HASH_MAP_PAIR -#undef HASH_CDR - -#undef UINT32_HASH_2 -#undef UINT32_HASH -#undef SINT32_HASH - -#undef HCONST -#undef MIX diff --git a/erts/emulator/beam/erl_term_hashing.h b/erts/emulator/beam/erl_term_hashing.h index 8a898b7c52..dacd944d4d 100644 --- a/erts/emulator/beam/erl_term_hashing.h +++ b/erts/emulator/beam/erl_term_hashing.h @@ -24,11 +24,25 @@ #include "sys.h" #include "erl_drv_nif.h" -#if (defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)) || \ - (defined(__x86_64__) && defined(__SSE4_2__)) -# define ERL_INTERNAL_HASH_CRC32C +/* Internal hash routines that can be changed at will. */ + +typedef UWord erts_ihash_t; + +erts_ihash_t erts_internal_salted_hash(Eterm term, erts_ihash_t salt); +erts_ihash_t erts_internal_hash(Eterm term); +erts_ihash_t erts_map_hash(Eterm term); + +#ifdef DEBUG +# define DBG_HASHMAP_COLLISION_BONANZA +#endif + +#ifdef DBG_HASHMAP_COLLISION_BONANZA +erts_ihash_t erts_dbg_hashmap_collision_bonanza(erts_ihash_t hash, Eterm key); #endif +/* Portable hash routines whose results should be bug-compatible across + * versions. */ + typedef struct { Uint32 a,b,c; } ErtsBlockHashHelperCtx; @@ -52,14 +66,7 @@ typedef struct { Uint32 make_hash2(Eterm); Uint32 trapping_make_hash2(Eterm, Eterm*, struct process*); Uint32 make_hash(Eterm); -Uint32 make_internal_hash(Eterm, Uint32 salt); -#ifdef DEBUG -# define DBG_HASHMAP_COLLISION_BONANZA -#endif -#ifdef DBG_HASHMAP_COLLISION_BONANZA -Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key); -#endif -Uint32 make_map_hash(Eterm key); + void erts_block_hash_init(ErtsBlockHashState *state, const byte *ptr, Uint len, @@ -75,5 +82,4 @@ int erts_iov_block_hash(Uint32 *hashp, Uint *sizep, ErtsIovBlockHashState *state); - #endif diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index 2dd6c99d4c..d689b653f5 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -3130,7 +3130,7 @@ static int tracer_cmp_fun(void* a, void* b) static HashValue tracer_hash_fun(void* obj) { - return make_internal_hash(((ErtsTracerNif*)obj)->module, 0); + return erts_internal_hash(((ErtsTracerNif*)obj)->module); } static void *tracer_alloc_fun(void* tmpl) diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 3170cebe95..75db8fe792 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -734,8 +734,8 @@ do { \ typedef struct ErtsPStack_ { byte* pstart; - int offs; /* "stack pointer" as byte offset from pstart */ - int size; /* allocated size in bytes */ + SWord offs; /* "stack pointer" as byte offset from pstart */ + SWord size; /* allocated size in bytes */ ErtsAlcType_t alloc_type; }ErtsPStack; @@ -746,7 +746,7 @@ void erl_grow_pstack(ErtsPStack* s, void* default_pstack, unsigned need_bytes); #define PSTACK_DECLARE(s, DEF_PSTACK_SIZE) \ PSTACK_TYPE PSTK_DEF_STACK(s)[DEF_PSTACK_SIZE]; \ ErtsPStack s = { (byte*)PSTK_DEF_STACK(s), /* pstart */ \ - -(int)sizeof(PSTACK_TYPE), /* offs */ \ + -(SWord)sizeof(PSTACK_TYPE), /* offs */ \ DEF_PSTACK_SIZE*sizeof(PSTACK_TYPE), /* size */ \ ERTS_ALC_T_ESTACK /* alloc_type */ \ } diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl index b2478bd56d..59524b32c7 100644 --- a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl +++ b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl @@ -94,7 +94,6 @@ my @beam_global_funcs = qw( i_bxor_body_shared int_div_rem_body_shared int_div_rem_guard_shared - internal_hash_helper is_in_range_shared is_ge_lt_shared minus_body_shared @@ -194,6 +193,7 @@ $decl_emit_funcs void emit_bif_element_helper(Label fail); void emit_bif_tuple_size_helper(Label fail); + void emit_internal_hash_helper(); void emit_flatmap_get_element(); void emit_hashmap_get_element(); diff --git a/erts/emulator/beam/jit/arm/instr_map.cpp b/erts/emulator/beam/jit/arm/instr_map.cpp index 8d7ad6f45f..daecdb80ef 100644 --- a/erts/emulator/beam/jit/arm/instr_map.cpp +++ b/erts/emulator/beam/jit/arm/instr_map.cpp @@ -29,58 +29,32 @@ extern "C" #include "beam_common.h" } -static const Uint32 INTERNAL_HASH_SALT = 3432918353; -static const Uint32 HCONST = 0x9E3779B9; - -/* ARG6 = lower 32 - * ARG7 = upper 32 - * - * Helper function for calculating the internal hash of keys before looking - * them up in a map. +/* ARG2 = term * - * This is essentially just a manual expansion of the `UINT32_HASH_2` macro. - * Whenever the internal hash algorithm is updated, this and all of its users - * must follow suit. + * Helper for calculating the internal hash of keys before looking them up in a + * map. This is a manual expansion of `erts_internal_hash`, and all changes to + * that function must be mirrored here. * - * Result is returned in ARG3. All arguments are clobbered. */ + * Result in ARG3. Clobbers TMP1. */ void BeamGlobalAssembler::emit_internal_hash_helper() { - a64::Gp hash = ARG3.w(), lower = ARG6.w(), upper = ARG7.w(), - constant = ARG8.w(); - - mov_imm(hash, INTERNAL_HASH_SALT); - mov_imm(constant, HCONST); - - a.add(lower, lower, constant); - a.add(upper, upper, constant); - -#if defined(ERL_INTERNAL_HASH_CRC32C) - a.crc32cw(lower, hash, lower); - a.add(hash, hash, lower); - a.crc32cw(hash, hash, upper); -#else - using rounds = - std::initializer_list<std::tuple<a64::Gp, a64::Gp, a64::Gp, int>>; - for (const auto &round : rounds{{lower, upper, hash, 13}, - {upper, hash, lower, -8}, - {hash, lower, upper, 13}, - {lower, upper, hash, 12}, - {upper, hash, lower, -16}, - {hash, lower, upper, 5}, - {lower, upper, hash, 3}, - {upper, hash, lower, -10}, - {hash, lower, upper, 15}}) { - const auto &[r_a, r_b, r_c, shift] = round; - - a.sub(r_a, r_a, r_b); - a.sub(r_a, r_a, r_c); - - if (shift > 0) { - a.eor(r_a, r_a, r_c, arm::lsr(shift)); - } else { - a.eor(r_a, r_a, r_c, arm::lsl(-shift)); - } - } -#endif + a64::Gp key = ARG2, key_hash = ARG3; + + /* key_hash = key ^ (key >> 33); */ + a.eor(key_hash, key, key, arm::lsr(33)); + + /* key_hash *= 0xFF51AFD7ED558CCDull */ + mov_imm(TMP1, 0xFF51AFD7ED558CCDull); + a.mul(key_hash, key_hash, TMP1); + + /* key_hash ^= key_hash >> 33; */ + a.eor(key_hash, key_hash, key_hash, arm::lsr(33)); + + /* key_hash *= 0xC4CEB9FE1A85EC53ull */ + mov_imm(TMP1, 0xC4CEB9FE1A85EC53ull); + a.mul(key_hash, key_hash, TMP1); + + /* key_hash ^= key_hash >> 33; */ + a.eor(key_hash, key_hash, key_hash, arm::lsr(33)); #ifdef DBG_HASHMAP_COLLISION_BONANZA emit_enter_runtime_frame(); @@ -99,8 +73,6 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { emit_leave_runtime(); emit_leave_runtime_frame(); #endif - - a.ret(a64::x30); } /* ARG1 = untagged hash map root @@ -171,7 +143,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { * word. */ a.ldr(header_val, arm::Mem(node).post(sizeof(Eterm))); - /* After 8 nodes we've run out of the 32 bits we started with + /* After 8/16 nodes we've run out of the hash bits we've started with * and we end up in a collision node. */ a.cmp(depth, imm(HAMT_MAX_LEVEL)); a.b_ne(node_loop); @@ -375,15 +347,9 @@ void BeamGlobalAssembler::emit_i_get_map_element_shared() { a.bind(hashmap); { - emit_enter_runtime_frame(); - - /* Calculate the internal hash of ARG2 before diving into the HAMT. */ - a.mov(ARG6.w(), ARG2.w()); - a.lsr(ARG7, ARG2, imm(32)); - a.bl(labels[internal_hash_helper]); - - emit_leave_runtime_frame(); - + /* Calculate the internal hash of the key before diving into the + * HAMT. */ + emit_internal_hash_helper(); emit_hashmap_get_element(); } } @@ -527,7 +493,6 @@ void BeamGlobalAssembler::emit_i_get_map_element_hash_shared() { a.and_(TMP1, ARG4, imm(_HEADER_MAP_SUBTAG_MASK)); a.cmp(TMP1, imm(HAMT_SUBTAG_HEAD_FLATMAP)); a.b_ne(hashmap); - emit_flatmap_get_element(); a.bind(hashmap); diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index 6d27a18c2d..3989f5139b 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -786,7 +786,7 @@ nofail_bif2 S1=d S2 Bif Dst | is_ne_exact_bif(Bif) => bif_is_ne_exact S1 S2 Dst nofail_bif2 S1 S2 Bif Dst | is_ge_bif(Bif) => bif_is_ge S1 S2 Dst nofail_bif2 S1 S2 Bif Dst | is_lt_bif(Bif) => bif_is_lt S1 S2 Dst -i_get_hash c I d +i_get_hash c W d i_get s d self d @@ -1283,7 +1283,7 @@ i_get_map_elements f s I * i_get_map_element_hash Fail Src=c Key Hash Dst => move Src x | i_get_map_element_hash Fail x Key Hash Dst -i_get_map_element_hash f S c I S +i_get_map_element_hash f S c W S i_get_map_element Fail Src=c Key Dst => move Src x | i_get_map_element Fail x Key Dst diff --git a/erts/emulator/beam/jit/beam_jit_common.cpp b/erts/emulator/beam/jit/beam_jit_common.cpp index 3200f75407..04ee598abc 100644 --- a/erts/emulator/beam/jit/beam_jit_common.cpp +++ b/erts/emulator/beam/jit/beam_jit_common.cpp @@ -744,8 +744,8 @@ Uint beam_jit_get_map_elements(Eterm map, ASSERT(is_hashmap(map)); while (n--) { + erts_ihash_t hx; const Eterm *v; - Uint32 hx; hx = fs[2]; ASSERT(hx == hashmap_make_hash(fs[0])); diff --git a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl index af16da1eee..3c620462b3 100755 --- a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl +++ b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl @@ -88,7 +88,6 @@ my @beam_global_funcs = qw( int_div_rem_guard_shared is_in_range_shared is_ge_lt_shared - internal_hash_helper minus_body_shared minus_guard_shared new_map_shared @@ -184,6 +183,7 @@ $decl_emit_funcs x86::Mem emit_i_length_common(Label fail, int state_size); + void emit_internal_hash_helper(); void emit_flatmap_get_element(); void emit_hashmap_get_element(); diff --git a/erts/emulator/beam/jit/x86/instr_map.cpp b/erts/emulator/beam/jit/x86/instr_map.cpp index 5f89077ba6..110054881e 100644 --- a/erts/emulator/beam/jit/x86/instr_map.cpp +++ b/erts/emulator/beam/jit/x86/instr_map.cpp @@ -29,63 +29,55 @@ extern "C" #include "beam_common.h" } -static const Uint32 INTERNAL_HASH_SALT = 3432918353; -static const Uint32 HCONST = 0x9E3779B9; - -/* - * ARG4 = lower 32 - * ARG5 = upper 32 +/* ARG2 = term * - * Helper function for calculating the internal hash of keys before looking - * them up in a map. + * Helper for calculating the internal hash of keys before looking them up in a + * map. This is a manual expansion of `erts_internal_hash`, and all changes to + * that function must be mirrored here. * - * This is essentially just a manual expansion of the `UINT32_HASH_2` macro. - * Whenever the internal hash algorithm is updated, this and all of its users - * must follow suit. - * - * Result is returned in ARG3. */ + * Result in ARG3. */ void BeamGlobalAssembler::emit_internal_hash_helper() { - x86::Gp hash = ARG3d, lower = ARG4d, upper = ARG5d; - - a.mov(hash, imm(INTERNAL_HASH_SALT)); - a.add(lower, imm(HCONST)); - a.add(upper, imm(HCONST)); - -#if defined(ERL_INTERNAL_HASH_CRC32C) - a.mov(ARG6d, hash); - a.crc32(hash, lower); - a.add(hash, ARG6d); - a.crc32(hash, upper); -#else - using rounds = - std::initializer_list<std::tuple<x86::Gp, x86::Gp, x86::Gp, int>>; - for (const auto &round : rounds{{lower, upper, hash, 13}, - {upper, hash, lower, -8}, - {hash, lower, upper, 13}, - {lower, upper, hash, 12}, - {upper, hash, lower, -16}, - {hash, lower, upper, 5}, - {lower, upper, hash, 3}, - {upper, hash, lower, -10}, - {hash, lower, upper, 15}}) { - const auto &[r_a, r_b, r_c, shift] = round; - - a.sub(r_a, r_b); - a.sub(r_a, r_c); - - /* We have no use for the type constant anymore, reuse its register for - * the `a ^= r_c << shift` expression. */ - a.mov(ARG6d, r_c); - - if (shift > 0) { - a.shr(ARG6d, imm(shift)); - } else { - a.shl(ARG6d, imm(-shift)); - } + x86::Gp key = ARG2, key_hash = ARG3; - a.xor_(r_a, ARG6d); + /* Unsigned multiplication instructions on x86 either use RDX as an + * implicit source or clobber it. Sigh. */ + if (key == x86::rdx) { + a.mov(TMP_MEM1q, x86::rdx); + } else { + ASSERT(key_hash == x86::rdx); + } + + /* key_hash = key ^ (key >> 33); */ + a.mov(ARG4, ARG2); + a.shr(ARG4, imm(33)); + a.mov(x86::rdx, ARG2); + a.xor_(x86::rdx, ARG4); + + /* `RDX * ARG6` storing a 128 bit result in ARG4:RDX. We only want the + * lower 64 bits in RDX. + * + * key_hash *= 0xFF51AFD7ED558CCDull */ + mov_imm(ARG6, 0xFF51AFD7ED558CCDull); + a.mulx(ARG4, x86::rdx, ARG6); + + /* key_hash ^= key_hash >> 33; */ + a.mov(ARG4, x86::rdx); + a.shr(ARG4, imm(33)); + a.xor_(x86::rdx, ARG4); + + /* key_hash *= 0xC4CEB9FE1A85EC53ull */ + mov_imm(ARG6, 0xC4CEB9FE1A85EC53ull); + a.mulx(ARG4, x86::rdx, ARG6); + + /* key_hash ^= key_hash >> 33; */ + a.mov(ARG4, x86::rdx); + a.shr(ARG4, imm(33)); + a.xor_(x86::rdx, ARG4); + + if (key == x86::rdx) { + a.mov(key_hash, x86::rdx); + a.mov(key, TMP_MEM1q); } -#endif #ifdef DBG_HASHMAP_COLLISION_BONANZA a.mov(TMP_MEM1q, ARG1); @@ -97,14 +89,12 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { runtime_call<2>(erts_dbg_hashmap_collision_bonanza); emit_leave_runtime(); - a.mov(ARG3d, RETd); + a.mov(ARG3, RET); a.mov(ARG1, TMP_MEM1q); a.mov(ARG2, TMP_MEM2q); a.mov(RET, TMP_MEM3q); #endif - - a.ret(); } /* ARG1 = hash map root, ARG2 = key, ARG3 = key hash, RETd = node header @@ -113,7 +103,7 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { void BeamGlobalAssembler::emit_hashmap_get_element() { Label node_loop = a.newLabel(); - x86::Gp node = ARG1, key = ARG2, key_hash = ARG3d, header_val = RETd, + x86::Gp node = ARG1, key = ARG2, key_hash = ARG3, header_val = RETd, index = ARG4d, depth = ARG5d; const int header_shift = @@ -131,7 +121,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { /* Find out which child we should follow, and shift the hash for the * next round. */ - a.mov(index, key_hash); + a.mov(index, key_hash.r32()); a.and_(index, imm(0xF)); a.shr(key_hash, imm(4)); a.inc(depth); @@ -171,7 +161,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { /* Nope, we have to search another node. */ a.mov(header_val, emit_boxed_val(node, 0, sizeof(Uint32))); - /* After 8 nodes we've run out of the 32 bits we started with + /* After 8/16 nodes we've run out of the hash bits we've started with * and we end up in a collision node. */ a.test(depth, imm(HAMT_MAX_LEVEL - 1)); a.short_().jnz(node_loop); @@ -390,13 +380,9 @@ void BeamGlobalAssembler::emit_i_get_map_element_shared() { a.bind(hashmap); { - /* Calculate the internal hash of ARG2 before diving into the HAMT. */ - a.mov(ARG5, ARG2); - a.shr(ARG5, imm(32)); - a.mov(ARG4d, ARG2d); - - a.call(labels[internal_hash_helper]); - + /* Calculate the internal hash of the key before diving into the + * HAMT. */ + emit_internal_hash_helper(); emit_hashmap_get_element(); } } diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index ef57021d83..de7ac5961f 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -751,7 +751,7 @@ nofail_bif2 S1=d S2 Bif Dst | is_ne_exact_bif(Bif) => bif_is_ne_exact S1 S2 Dst nofail_bif2 S1 S2 Bif Dst | is_ge_bif(Bif) => bif_is_ge S1 S2 Dst nofail_bif2 S1 S2 Bif Dst | is_lt_bif(Bif) => bif_is_lt S1 S2 Dst -i_get_hash c I d +i_get_hash c W d i_get s d self d @@ -1244,7 +1244,7 @@ i_get_map_elements f s I * i_get_map_element_hash Fail Src=c Key Hash Dst => move Src x | i_get_map_element_hash Fail x Key Hash Dst -i_get_map_element_hash f S c I S +i_get_map_element_hash f S c W S i_get_map_element Fail Src=c Key Dst => move Src x | i_get_map_element Fail x Key Dst diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 5d7546c1a4..ef4db545b4 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -97,6 +97,10 @@ %% Benchmarks -export([benchmarks/1]). +%% Helper for generating new colliding keys after the internal hashing +%% algorithm changes. +-export([find_colliding_keys/1]). + -include_lib("stdlib/include/ms_transform.hrl"). -include_lib("common_test/include/ct_event.hrl"). @@ -3559,45 +3563,213 @@ minor_gcs() -> {minor_gcs, GCS} = lists:keyfind(minor_gcs, 1, Info), GCS. -%% Generate a map with N (or N+1) keys that has an abnormal heap demand. -%% Done by finding keys that collide in the first 32-bit hash. +%% Generate a map with N (or N+1) keys that have an abnormal heap demand. Done +%% by finding keys that collide in the first 32 bits of the hash. fatmap(N) -> - %%erts_debug:set_internal_state(available_internal_state, true), - Table = ets:new(void, [bag, private]), - - Seed0 = rand:seed_s(exsplus, {4711, 3141592, 2718281}), - Seed1 = fatmap_populate(Table, Seed0, (1 bsl 16)), - Keys = fatmap_generate(Table, Seed1, N, []), - ets:delete(Table), - maps:from_list([{K,K} || K <- Keys]). + Groups0 = colliding_keys(), + Groups = lists:nthtail(length(Groups0) - (N div 2), Groups0), + Keys = lists:append([[A, B] || [A, B | _Rest] <- Groups]), + maps:from_keys(Keys, []). + +colliding_keys() -> + %% Collide to 8 levels, anything more than this takes way too long to + %% generate. + Mask = 16#FFFFFFFF, + + %% Collisions found by find_colliding_keys(Mask) below. When regenerating + %% keys, make sure to run it outside testing as it might time-trap + %% otherwise. + %% + %% io:format("Finding new colliding keys for mask ~p~n", [Mask]), + %% io:format("Colliding keys\n\t~p\n", [find_colliding_keys(Mask)]), + ByMethod = #{ + %% 64-bit internal hash of `0` + 15677855740172624429 => + [[-4294967296,-3502771103,1628104549], + [-2750312253,-2208396507,-2147483648,1926198452,3660971145], + [-2542330914,-1089175976,-1073741824,290495829], + [-2155350068,0], + [1073741824,2807978463,3625918826], + [-1032333168,-705082324,1541401419,1594347321,2147483648, + 2266580263,2823045213], + [-2465550512,3221225472], + [2854075383,651030299,-1581781966,-3419595364,-4294967295], + [3351133532,968011333,-2217176682,-4294967294], + [598547769,-1379599129,-4294967293], + [-649195724,-4294967292], + [2943767758,-645518858,-875893937,-1294474094,-4294967291], + [3255309205,-2208705073,-4294967290], + [2162086262,-3745041100,-4294967288], + [-36087602,-1146855151,-1687820340,-3221225471], + [4177844763,3846951687,3485974116,3175597814,590007752, + -3221225470], + [3264460518,1553643847,1183174568,-3221225469], + [-577423597,-3221225468,-3522984153], + [3855876603,3019389034,-1323003840,-2576022240,-3221225467], + [-471176452,-3221225466], + [-1122194611,-3221225465,-4210494386], + [3603262778,994932591,-1788155141,-1921175318,-3221225464], + [3836440544,-1003007187,-2147483647], + [-2051344765,-2147483646], + [3650711544,-2147483645,-2799381711,-3556915274], + [3489936963,1240642555,-2147483644,-3957745840], + [1085161678,-2052366093,-2147483643,-3483479006], + [1939744936,-2147483642,-3856508363], + [-566163246,-2060332302,-2147483641,-4230104575], + [1203359280,237551462,-1073741823], + [1727228961,-813544803,-1073741822,-1309725727,-1666872574, + -2203000992], + [3698637395,3362609925,876970478,-714241238,-1073741821], + [1765842640,-354951691,-566902540,-1073741820], + [3963091352,2371749084,591553116,-1073741819], + [-1073741817,-2715118400], + [-1073741816,-3224015310], + [2762405117,1,-2123671186], + [2470477117,2,-331878960,-2322233731], + [3815926349,2088957086,3], + [1968999576,870968367,4,-1268233288,-3048698020], + [979559827,5], + [946684365,753214037,6,-2648059890], + [3790852688,2964822264,2830450758,7,-3580232887], + [1073741825,-3356417243,-3706053980], + [1073741827,-2621798828], + [1073741828,-2347690873], + [2090309310,1073741830,-1375115411,-2016799213,-4267952630], + [1073741831,672032559], + [1073741832,-2577014530,-3065907606], + [3796535022,2351766515,2147483649,-2136894649], + [2280176922,2147483650], + [4198987324,3244673818,2147483651,270823276,-2880202587], + [3880317786,3256588678,2670024934,2147483652,-2327563310, + -3284218582,-3844717086], + [2178108296,2147483653,-3361345880], + [2954325696,2147483654,-1059451308,-1331847237], + [3189358149,2147483655,-1477948284,-1669797549,-3362853705, + -3928750615], + [2147483656,471953932,-355892383], + [3221225473,-3995083753,-4092880912], + [3221225474,-2207482759,-3373076062], + [3221225475,2400978919,2246389041,1052806668,-781893221, + -1811850779], + [3221225476,-245369539,-1842612521], + [3221225477,688232807], + [3221225478,209327542,-2793530395], + [3221225479,-2303080520,-4225327222], + [4216539003,3221225480]], + + %% 32-bit internal hash of `0` + 416211501 => + [[-55973163,-134217697],[43918753,-134217684], + [107875525,-134217667],[-30291033,-134217663], + [-40285269,-111848095],[35020004,-111848056], + [-44437601,-111848046],[103325476,-69901823,-111848030], + [126809757,-111848012],[-92672406,-111848005], + [-64199103,-111847990],[102238942,-111847982], + [62106519,-89478468],[-89478462,-128994853], + [-67899866,-89478412],[-45432484,-89478397], + [120764819,-89478387],[9085208,-89478382], + [10859155,-89478369],[45834467,-67108863], + [-67108857,-124327693],[104597114,-67108847], + [11918558,-67108783],[50986187,-67108760], + [113683827,64978564,-67108752], + [111972669,-67108751],[27085194,-44739227], + [46760231,-44739221],[101248827,-44739220], + [30692154,-44739176],[33768394,-44739117], + [-12083942,-44739116],[-22369572,-112420685], + [-22369568,-98812798],[-22369550,-78759395], + [47792095,-22369543],[9899495,-22369540], + [99744593,-22369511],[76325343,52], + [122425143,68],[21651445,74], + [129537216,119],[125,-110161190], + [80229747,22369626],[22369629,-55742042], + [128416574,22369631],[105267606,22369643], + [22369693,-2286278],[126622985,22369698], + [22369701,-13725583],[22369728,-22765683], + [22369731,-54786216],[22369740,-65637968], + [44739246,12048008],[44739259,-26636781], + [126966693,44739272],[44739274,-130215175], + [44739277,15051453],[44739292,17890441], + [44739301,-72627814],[106949249,44739322], + [44739323,-56882381],[67108879,-111259055], + [67108888,37627968],[67108894,-53291767], + [67108896,-127782577],[67108908,-1014167], + [82796148,67108959],[67108962,-71355523], + [67108984,-62077338,-77539719],[126106374,89478485], + [89478488,85703113],[132215738,89478495], + [89478515,-122049151],[89478518,-22611374], + [94050181,89478530],[89478547,42736340], + [89478553,86641584],[129419863,111848199], + [111848217,-32493354],[112586988,111848229]] + }, + + HashKey = internal_hash(0), + #{ HashKey := Keys } = ByMethod, + + verify_colliding_keys(Keys, Mask). + +verify_colliding_keys([[K | Ks]=Group | Gs], Mask) -> + Hash = internal_hash(K) band Mask, + [Hash] = lists:usort([(internal_hash(Key) band Mask) || Key <- Ks]), + [Group | verify_colliding_keys(Gs, Mask)]; +verify_colliding_keys([], _Mask) -> + []. + +%% Use this function to (re)generate the list in colliding_keys/0. This takes +%% several hours to run so you may want to run it overnight. +find_colliding_keys(Mask) -> + NumScheds = erlang:system_info(schedulers_online), + %% Stay below the limit for smalls on 32-bit platforms to prevent the + %% search from taking forever due to bignums. + Start = -(1 bsl 27), + End = -Start, + Range = End - Start, + Step = Range div NumScheds, + timer:tc(fun() -> + ckf_spawn(NumScheds, NumScheds, Start, End, Step, Mask, []) + end). + +ckf_spawn(0, _NumScheds, _Start, _End, _Step, _Mask, Refs) -> + lists:append(ckf_await(Refs)); +ckf_spawn(N, NumScheds, Start, End, Step, Mask, Refs) -> + Keys = [Start + Z + (N - 1) * Step || Z <- lists:seq(1, 128)], + {_, Ref} = spawn_monitor(fun() -> + exit(ckf_finder(Start, End, Mask, Keys)) + end), + ckf_spawn(N - 1, NumScheds, Start, End, Step, Mask, [Ref | Refs]). + +ckf_await([Ref | Refs]) -> + receive + {'DOWN', Ref, _, _, []} -> + %% Ignore empty slices. + ckf_await(Refs); + {'DOWN', Ref, _, _, Collisions} -> + [Collisions | ckf_await(Refs)] + end; +ckf_await([]) -> + []. -fatmap_populate(_, Seed, 0) -> Seed; -fatmap_populate(Table, Seed, N) -> - {I, NextSeed} = rand:uniform_s(1 bsl 48, Seed), - Hash = internal_hash(I), - ets:insert(Table, [{Hash, I}]), - fatmap_populate(Table, NextSeed, N-1). +ckf_finder(Start, End, Mask, Keys) -> + [ckf_finder_1(Start, End, Mask, Key) || Key <- Keys]. +ckf_finder_1(Start, End, Mask, Key) -> + true = Key >= Start, true = Key < End, %Assertion. + Target = internal_hash(Key) band Mask, + ckf_finder_2(Start, End, Mask, Target, []). -fatmap_generate(_, _, N, Acc) when N =< 0 -> +ckf_finder_2(Same, Same, _Mask, _Target, [_]) -> + %% Key collided with itself, ignore it. + []; +ckf_finder_2(Same, Same, _Mask, _Target, Acc) -> Acc; -fatmap_generate(Table, Seed, N0, Acc0) -> - {I, NextSeed} = rand:uniform_s(1 bsl 48, Seed), - Hash = internal_hash(I), - case ets:member(Table, Hash) of - true -> - NewKeys = [I | ets:lookup_element(Table, Hash, 2)], - Acc1 = lists:usort(Acc0 ++ NewKeys), - N1 = N0 - (length(Acc1) - length(Acc0)), - fatmap_generate(Table, NextSeed, N1, Acc1); - false -> - fatmap_generate(Table, NextSeed, N0, Acc0) +ckf_finder_2(Next, End, Mask, Target, Acc) -> + case (internal_hash(Next) band Mask) =:= Target of + true -> ckf_finder_2(Next + 1, End, Mask, Target, [Next | Acc]); + false -> ckf_finder_2(Next + 1, End, Mask, Target, Acc) end. internal_hash(Term) -> erts_debug:get_internal_state({internal_hash, Term}). - %% map external_format (fannerl). fannerl() -> <<131,116,0,0,0,28,100,0,13,108,101,97,114,110,105,110,103,95,114, diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 359646ca5c..1fb9ca8933 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -3747,7 +3747,7 @@ test_bit_distribution_fitness(Integers, BitSize) -> (FailureText =:= [] orelse ct:fail(FailureText)). -nif_hash_result_bitsize(internal) -> 32; +nif_hash_result_bitsize(internal) -> erlang:system_info(wordsize) * 8; nif_hash_result_bitsize(phash2) -> 27. unique(List) -> diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl index 39dd2d3428..178eb295ee 100644 --- a/erts/emulator/test/persistent_term_SUITE.erl +++ b/erts/emulator/test/persistent_term_SUITE.erl @@ -40,10 +40,6 @@ %% -export([test_init_restart_cmd/1]). -%% Test writing helper --export([find_colliding_keys/0]). - - suite() -> [{ct_hooks,[ts_install_cth]}, {timetrap,{minutes,10}}]. @@ -603,120 +599,144 @@ collisions_delete([], _) -> ok. colliding_keys() -> - %% Collisions found by find_colliding_keys() below - %% ct:timetrap({minutes, 60}), - %% ct:pal("Colliding keys = ~p", [find_colliding_keys()]), - Collisions = - #{ - %% Collisions for Jenkins96 hashing. - 1268203079 => [[77674392,148027], - [103370644,950908], - [106444046,870178], - [22217246,735880], - [18088843,694607], - [63426007,612179], - [117354942,906431], - [121434305,94282311,816072], - [118441466,93873772,783366], - [124338174,1414801,123089], - [20240282,17113486,923647], - [126495528,61463488,164994], - [125341723,5729072,445539], - [127450932,80442669,348245], - [123354692,85724182,14241288,180793], - [99159367,65959274,61680971,289939], - [107637580,104512101,62639807,181644], - [139547511,51654420,2062545,151944], - [88078274,73031465,53388204,428872], - [141314238,75761379,55699508,861797], - [88045216,59272943,21030492,180903]], - %% Collisions for CRC32-C hashing. - 1982459178 => [[-4294967296,654663773], - [-3758096384,117792861], - [-3221225472,1728405597], - [-2684354560,1191534685], - [-2147483648,2706162303], - [-1610612736,2169291391], - [-1073741824,3779904127], - [-536870912,3243033215], - [-3640303523,0], - [-4177174435,536870912], - [-2566561699,1073741824], - [-3103432611,1610612736], - [-1588804993,2147483648], - [-2125675905,2684354560], - [-515063169,3221225472], - [-1051934081,3758096384]] - }, - - Key = internal_hash(2), - ct:pal("internal_hash(2) = ~p", [Key]), - #{ Key := L } = Collisions, - - %% Verify that the keys still collide (this will fail if the - %% internal hash function has been changed). - case erlang:system_info(wordsize) of - 8 -> - verify_colliding_keys(L); - 4 -> - %% Not guaranteed to collide on a 32-bit system. - ok - end, - - L. - -verify_colliding_keys([[K|Ks]|Gs]) -> - Hash = internal_hash(K), - [Hash] = lists:usort([internal_hash(Key) || Key <- Ks]), - verify_colliding_keys(Gs); -verify_colliding_keys([]) -> - ok. + Mask = 16#FFFFFFFF, + + %% Collisions found by find_colliding_keys(Mask) in `map_SUITE`. + ByMethod = #{ + %% 64-bit internal hash of `0` + 15677855740172624429 => + [[-4294967296,-3502771103,1628104549], + [-2750312253,-2208396507,-2147483648,1926198452,3660971145], + [-2542330914,-1089175976,-1073741824,290495829], + [-2155350068,0], + [1073741824,2807978463,3625918826], + [-1032333168,-705082324,1541401419,1594347321,2147483648, + 2266580263,2823045213], + [-2465550512,3221225472], + [2854075383,651030299,-1581781966,-3419595364,-4294967295], + [3351133532,968011333,-2217176682,-4294967294], + [598547769,-1379599129,-4294967293], + [-649195724,-4294967292], + [2943767758,-645518858,-875893937,-1294474094,-4294967291], + [3255309205,-2208705073,-4294967290], + [2162086262,-3745041100,-4294967288], + [-36087602,-1146855151,-1687820340,-3221225471], + [4177844763,3846951687,3485974116,3175597814,590007752, + -3221225470], + [3264460518,1553643847,1183174568,-3221225469], + [-577423597,-3221225468,-3522984153], + [3855876603,3019389034,-1323003840,-2576022240,-3221225467], + [-471176452,-3221225466], + [-1122194611,-3221225465,-4210494386], + [3603262778,994932591,-1788155141,-1921175318,-3221225464], + [3836440544,-1003007187,-2147483647], + [-2051344765,-2147483646], + [3650711544,-2147483645,-2799381711,-3556915274], + [3489936963,1240642555,-2147483644,-3957745840], + [1085161678,-2052366093,-2147483643,-3483479006], + [1939744936,-2147483642,-3856508363], + [-566163246,-2060332302,-2147483641,-4230104575], + [1203359280,237551462,-1073741823], + [1727228961,-813544803,-1073741822,-1309725727,-1666872574, + -2203000992], + [3698637395,3362609925,876970478,-714241238,-1073741821], + [1765842640,-354951691,-566902540,-1073741820], + [3963091352,2371749084,591553116,-1073741819], + [-1073741817,-2715118400], + [-1073741816,-3224015310], + [2762405117,1,-2123671186], + [2470477117,2,-331878960,-2322233731], + [3815926349,2088957086,3], + [1968999576,870968367,4,-1268233288,-3048698020], + [979559827,5], + [946684365,753214037,6,-2648059890], + [3790852688,2964822264,2830450758,7,-3580232887], + [1073741825,-3356417243,-3706053980], + [1073741827,-2621798828], + [1073741828,-2347690873], + [2090309310,1073741830,-1375115411,-2016799213,-4267952630], + [1073741831,672032559], + [1073741832,-2577014530,-3065907606], + [3796535022,2351766515,2147483649,-2136894649], + [2280176922,2147483650], + [4198987324,3244673818,2147483651,270823276,-2880202587], + [3880317786,3256588678,2670024934,2147483652,-2327563310, + -3284218582,-3844717086], + [2178108296,2147483653,-3361345880], + [2954325696,2147483654,-1059451308,-1331847237], + [3189358149,2147483655,-1477948284,-1669797549,-3362853705, + -3928750615], + [2147483656,471953932,-355892383], + [3221225473,-3995083753,-4092880912], + [3221225474,-2207482759,-3373076062], + [3221225475,2400978919,2246389041,1052806668,-781893221, + -1811850779], + [3221225476,-245369539,-1842612521], + [3221225477,688232807], + [3221225478,209327542,-2793530395], + [3221225479,-2303080520,-4225327222], + [4216539003,3221225480]], + + %% 32-bit internal hash of `0` + 416211501 => + [[-55973163,-134217697],[43918753,-134217684], + [107875525,-134217667],[-30291033,-134217663], + [-40285269,-111848095],[35020004,-111848056], + [-44437601,-111848046],[103325476,-69901823,-111848030], + [126809757,-111848012],[-92672406,-111848005], + [-64199103,-111847990],[102238942,-111847982], + [62106519,-89478468],[-89478462,-128994853], + [-67899866,-89478412],[-45432484,-89478397], + [120764819,-89478387],[9085208,-89478382], + [10859155,-89478369],[45834467,-67108863], + [-67108857,-124327693],[104597114,-67108847], + [11918558,-67108783],[50986187,-67108760], + [113683827,64978564,-67108752], + [111972669,-67108751],[27085194,-44739227], + [46760231,-44739221],[101248827,-44739220], + [30692154,-44739176],[33768394,-44739117], + [-12083942,-44739116],[-22369572,-112420685], + [-22369568,-98812798],[-22369550,-78759395], + [47792095,-22369543],[9899495,-22369540], + [99744593,-22369511],[76325343,52], + [122425143,68],[21651445,74], + [129537216,119],[125,-110161190], + [80229747,22369626],[22369629,-55742042], + [128416574,22369631],[105267606,22369643], + [22369693,-2286278],[126622985,22369698], + [22369701,-13725583],[22369728,-22765683], + [22369731,-54786216],[22369740,-65637968], + [44739246,12048008],[44739259,-26636781], + [126966693,44739272],[44739274,-130215175], + [44739277,15051453],[44739292,17890441], + [44739301,-72627814],[106949249,44739322], + [44739323,-56882381],[67108879,-111259055], + [67108888,37627968],[67108894,-53291767], + [67108896,-127782577],[67108908,-1014167], + [82796148,67108959],[67108962,-71355523], + [67108984,-62077338,-77539719],[126106374,89478485], + [89478488,85703113],[132215738,89478495], + [89478515,-122049151],[89478518,-22611374], + [94050181,89478530],[89478547,42736340], + [89478553,86641584],[129419863,111848199], + [111848217,-32493354],[112586988,111848229]] + }, + + HashKey = internal_hash(0), + #{ HashKey := Keys } = ByMethod, + + verify_colliding_keys(Keys, Mask). + +verify_colliding_keys([[K | Ks]=Group | Gs], Mask) -> + Hash = internal_hash(K) band Mask, + [Hash] = lists:usort([(internal_hash(Key) band Mask) || Key <- Ks]), + [Group | verify_colliding_keys(Gs, Mask)]; +verify_colliding_keys([], _Mask) -> + []. internal_hash(Term) -> erts_debug:get_internal_state({internal_hash,Term}). -%% Use this function to (re)generate the list in colliding_keys/0 -%% -%% Grab a coffee, it will take a while. -find_colliding_keys() -> - erts_debug:set_internal_state(available_internal_state, true), - NumScheds = erlang:system_info(schedulers_online), - Start = -(1 bsl 32), - End = -Start, - Range = End - Start, - Step = Range div NumScheds, - timer:tc(fun() -> fck_spawn(NumScheds, NumScheds, Start, End, Step, []) end). - -fck_spawn(0, _NumScheds, _Start, _End, _Step, Refs) -> - fck_await(Refs); -fck_spawn(N, NumScheds, Start, End, Step, Refs) -> - Key = Start + (N - 1) * Step, - {_, Ref} = spawn_monitor(fun() -> exit(fck_finder(Start, End, Key)) end), - fck_spawn(N - 1, NumScheds, Start, End, Step, [Ref | Refs]). - -fck_await([Ref | Refs]) -> - receive - {'DOWN', Ref, _, _, [_Initial]} -> - %% Ignore slices where the initial value only collided with itself. - fck_await(Refs); - {'DOWN', Ref, _, _, Collisions} -> - [Collisions | fck_await(Refs)] - end; -fck_await([]) -> - []. - -fck_finder(Start, End, Key) -> - true = Key >= Start, true = Key < End, %Assertion. - fck_finder_1(Start, End, internal_hash(Key)). - -fck_finder_1(Same, Same, _Target) -> - []; -fck_finder_1(Next, End, Target) -> - case internal_hash(Next) =:= Target of - true -> [Next | fck_finder_1(Next + 1, End, Target)]; - false -> fck_finder_1(Next + 1, End, Target) - end. - %% OTP-17700 Bug skipped refc++ of shared magic reference shared_magic_ref(_Config) -> Ref = atomics:new(10, []), -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor