Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:24
erlang
2351-kernel-Add-a-Global-testcase-showing-weakn...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2351-kernel-Add-a-Global-testcase-showing-weakness-of-alg.patch of Package erlang
From a769d10343ba676e550aa218a896fb88d3456ed5 Mon Sep 17 00:00:00 2001 From: Hans Bolinder <hasse@erlang.org> Date: Mon, 7 Jun 2021 07:18:12 +0200 Subject: [PATCH 1/2] kernel: Add a Global testcase showing weakness of algo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scenario of the testcase is described in https://erlang.org/pipermail/erlang-questions/2020-October/100034.html: Dániel Szoboszlay dszoboszlay@REDACTED Mon Oct 12 11:05:19 CEST 2020 Hi, Global can indeed end up in inconsistent states if some nodes get disconnected from each other (so you're no longer running on a fully connected mesh). Since when registering a global name on node X the change is only propagated to nodes that X are directly connected to, you can end up in a situation that X and Y are connected together, so they will both know about the name, and Y and Z are connected together but X and Z are not, so Z never gets the update. When two nodes (re)connect, they only compare the names they locally know about. So it is a bit tricky, but you can actually end up in a situation when all nodes are connected, yet the global name databases are inconsistent. You will need at least 4 nodes for this scenario to happen (e.g. A, B, C & D): 1. All nodes are connected initially. 2. A gets disconnected from C. 3. A registers process X under some name: this gets propagated to B & D, but not C. 4. B gets disconnected from D. 5. B re-registers process Y under some name: this gets propagated to A & C, but not D, so on D the name still belongs to X. 6. A reconnects to C, since they both know the name belongs to Y they will inform their half of the network about the new node, but won't issue any global name updates. 7. You have all 4 nodes connected again, but A, B & C believe the name belongs to Y, while D believes it belongs to X. So this can happen, if you know how global works you can understand how it can happen, but I don't think it would be expected by many people to actually happen. :) global:sync() is not really meant to resolve this error. The only solution I know about is to manually compare global name registrations shortly after you see a new node connecting. Cheers, Daniel --- lib/kernel/test/global_SUITE.erl | 87 ++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/lib/kernel/test/global_SUITE.erl b/lib/kernel/test/global_SUITE.erl index bdc4d9ce16..9c4e272684 100644 --- a/lib/kernel/test/global_SUITE.erl +++ b/lib/kernel/test/global_SUITE.erl @@ -44,6 +44,7 @@ mass_death/1, garbage_messages/1, ring_line/1, + flaw1/1, lost_connection/1, lost_connection2/1, global_disconnect/1 @@ -141,7 +142,7 @@ all() -> simple_resolve2, simple_resolve3, leftover_name, re_register_name, name_exit, external_nodes, many_nodes, sync_0, global_groups_change, register_1, both_known_1, - lost_unregister, mass_death, garbage_messages, + lost_unregister, mass_death, garbage_messages, flaw1, lost_connection, lost_connection2, global_disconnect ] end. @@ -490,8 +491,9 @@ write_high_level_trace(Nodes, Config) -> Node <- Nodes], Dir = proplists:get_value(priv_dir, Config), DataFile = filename:join([Dir, lists:concat(["global_", ?testcase])]), - ?P("High-level trace on:" - "~n ~p", [DataFile]), + io:format("\n\nAnalyze high level trace like this:\n"), + io:format("global_trace:dd(~p, [{show_state, 0, 10}]). % 10 seconds\n", + [DataFile]), file:write_file(DataFile, term_to_binary({high_level_trace, When, Data})). lock_global2(Id, Parent) -> @@ -4281,6 +4283,84 @@ garbage_messages(Config) when is_list(Config) -> init_condition(Config), ok. +%% This is scenario outlined in +%% https://erlang.org/pipermail/erlang-questions/2020-October/100034.html. +%% It illustrates that the algorithm of Global is flawed. +flaw1(Config) -> + Timeout = 360, + ct:timetrap({seconds,Timeout}), + init_high_level_trace(Timeout), + init_condition(Config), + OrigNames = global:registered_names(), + + [A, B, C, D] = OtherNodes = start_nodes([a, b, c, d], peer, Config), + Nodes = lists:sort([node() | OtherNodes]), + wait_for_ready_net(Config), + + F1 = + fun(S0) -> + ct:sleep(100), + Str = "************", + S = Str ++ " " ++ lists:flatten(S0) ++ " " ++ Str, + io:format("~s\n", [S]), + [begin + RNs = rpc:call(N, global, registered_names, []), + W = rpc:call(N, global, whereis_name, [x]), + io:format(" === ~w ===\n", [N]), + io:format(" registered names: ~p", [RNs]), + io:format(" where is x: ~p", [W]) + end || N <- OtherNodes] + end, + F1("start"), + + true = rpc:call(A, erlang, disconnect_node, [C]), + F1("after disconnecting c from a"), + + Pid = self(), + yes = rpc:call(A, global, register_name, [x, Pid]), + F1(io_lib:format("after registering x as ~p on a", [Pid])), + + true = rpc:call(B, erlang, disconnect_node, [D]), + F1("after disconnecting d from b"), + + Pid2 = whereis(global_name_server), + yes = rpc:call(B, global, re_register_name, [x, Pid2]), + F1(io_lib:format("after re_register_name x as ~p on b", [Pid2])), + + pong = rpc:call(A, net_adm, ping, [C]), + F1("finished after ping c from a"), + + pong = rpc:call(B, net_adm, ping, [D]), + + timer:sleep(1000), + + %% "You have all 4 nodes connected again, but A, B & C believe the + %% name belongs to Y, while D believes it belongs to X." + Pid2 = rpc:call(A, global, whereis_name, [x]), + Pid2 = rpc:call(B, global, whereis_name, [x]), + Pid2 = rpc:call(C, global, whereis_name, [x]), + Pid = rpc:call(D, global, whereis_name, [x]), + + lists:foreach(fun(N) -> + rpc:call(N, ?MODULE, stop_tracer, []) + end, Nodes), + _ = rpc:call(A, global, unregister_name, [x]), + + F1("after unregistering x on node a"), + + %% _ = rpc:call(B, global, unregister_name, [y]), + %% F1("after unregistering y on node b"), + + _ = rpc:call(C, global, unregister_name, [x]), + F1("after unregistering x on node c"), + + ct:sleep(100), + OrigNames = global:registered_names(), + write_high_level_trace(Config), + stop_nodes(OtherNodes), + init_condition(Config), + ok. + global_disconnect(Config) when is_list(Config) -> Timeout = 30, ct:timetrap({seconds,Timeout}), -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor