Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
0530-Correct-printing-of-Unicode-atoms-in-crash...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0530-Correct-printing-of-Unicode-atoms-in-crash-dumps.patch of Package erlang
From 5fc8d39e2c976f88d06c26de23422bee6f5ad2ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Mon, 31 May 2021 08:11:10 +0200 Subject: [PATCH] Correct printing of Unicode atoms in crash dumps Atoms containing Unicode code points greater than 255 would not be printed correctly in crash dumps. This commit corrects the printing of Unicode atoms done by the runtime-internal function `erts_printf()` (used when producing crash dumps) and also updates `crashdump_viewer` to correctly handle such atoms in function names in stack backtraces. --- erts/emulator/beam/erl_printf_term.c | 95 +++++++++++++++++-- lib/observer/src/crashdump_viewer.erl | 3 +- lib/observer/src/observer_html_lib.erl | 13 ++- .../test/crashdump_helper_unicode.erl | 8 +- lib/observer/test/crashdump_viewer_SUITE.erl | 19 +++- 5 files changed, 123 insertions(+), 15 deletions(-) diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index 60bb09f0c6..47bbf588be 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -139,8 +139,6 @@ do { \ #define IS_CNTRL(c) ((c) < ' ' || (c) >= 127) #endif -#define IS_PRINT(c) (!IS_CNTRL(c)) - /* return 0 if list is not a non-empty flat list of printable characters */ static int @@ -185,7 +183,36 @@ static int is_printable_ascii(byte* bytep, Uint bytesize, Uint bitoffs) return 1; } -/* print a atom doing what quoting is necessary */ +/* + * Helper function for print_atom_name(). Not generally useful. + */ +static ERTS_INLINE int latin1_char(int c1, int c2) +{ + if ((c1 & 0x80) == 0) { + /* Plain old 7-bit ASCII. */ + return c1; + } else if ((c1 & 0xE0) == 0xC0) { + /* Unicode code points from 0x80 through 0x7FF. */ + ASSERT((c2 & 0xC0) == 0x80); + return (c1 & 0x1F) << 6 | (c2 & 0x3F); + } else if ((c1 & 0xC0) == 0x80) { + /* A continutation byte in a utf8 sequence. Pretend that it is + * a character that is allowed in an atom. */ + return 'a'; + } else { + /* The start of a utf8 sequence comprising three or four + * bytes. Always needs quoting. */ + return 0; + } +} + +/* + * Print a atom, quoting it if necessary. + * + * Atoms are encoded in utf8. Since we have full control over creation + * of atoms, the utf8 encoding is always correct and there is no need + * to check for errors. + */ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) { int n, i; @@ -195,6 +222,7 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) byte *s; byte *cpos; int c; + int lc; res = 0; i = atom_val(atom); @@ -212,27 +240,47 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) *dcount -= atom_tab(i)->len; if (n == 0) { + /* The empty atom: '' */ PRINT_STRING(res, fn, arg, "''"); return res; } + /* + * Find out whether the atom will need quoting. Quoting is not necessary + * if the following applies: + * + * - The first character is a lowercase letter in the Latin-1 code + * block (0-255). + * + * - All other characters are either alphanumeric characters in + * the Latin-1 code block or the character '_'. + */ need_quote = 0; cpos = s; pos = n - 1; - c = *cpos++; - if (!IS_LOWER(c)) + lc = latin1_char(c, *cpos); + if (!IS_LOWER(lc)) need_quote++; else { while (pos--) { c = *cpos++; - if (!IS_ALNUM(c) && (c != '_')) { + lc = latin1_char(c, *cpos); + if (!IS_ALNUM(lc) && lc != '_') { need_quote++; break; } } } + + /* + * Now output the atom, including single quotes if needed. + * + * Control characters (including the range 128-159) must + * be specially printed. Therefore, we must do a partial + * decoding of the utf8 encoding. + */ cpos = s; pos = n; if (need_quote) @@ -249,12 +297,40 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) case '\b': PRINT_STRING(res, fn, arg, "\\b"); break; case '\v': PRINT_STRING(res, fn, arg, "\\v"); break; default: - if (IS_CNTRL(c)) { + if (c < ' ') { + /* ASCII control character (0-31). */ PRINT_CHAR(res, fn, arg, '\\'); PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) c); - } - else + } else if (c >= 0x80) { + /* A multi-byte utf8-encoded code point. Determine the + * length of the sequence. */ + int n; + if ((c & 0xE0) == 0xC0) { + n = 2; + } else if ((c & 0xF0) == 0xE0) { + n = 3; + } else { + ASSERT((c & 0xF8) == 0xF0); + n = 4; + } + ASSERT(pos - n + 1 >= 0); + + if (c == 0xC2 && *cpos < 0xA0) { + /* Extended ASCII control character (128-159). */ + ASSERT(pos > 0); + ASSERT(0x80 <= *cpos); + PRINT_CHAR(res, fn, arg, '\\'); + PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) *cpos); + pos--, cpos++; + } else { + PRINT_BUF(res, fn, arg, cpos-1, n); + cpos += n - 1; + pos -= n - 1; + } + } else { + /* Printable ASCII character. */ PRINT_CHAR(res, fn, arg, (char) c); + } break; } } @@ -263,7 +339,6 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) return res; } - #define PRT_BAR ((Eterm) 0) #define PRT_COMMA ((Eterm) 1) #define PRT_CLOSE_LIST ((Eterm) 2) diff --git a/lib/observer/src/crashdump_viewer.erl b/lib/observer/src/crashdump_viewer.erl index 2e3403325c..9ad281beeb 100644 --- a/lib/observer/src/crashdump_viewer.erl +++ b/lib/observer/src/crashdump_viewer.erl @@ -2943,7 +2943,8 @@ parse_term([$p|Line0], _, D) -> %Port. {Port,Line} = get_id(Line0), {['#CDVPort'|Port],Line,D}; parse_term([$S|Str0], _, D) -> %Information string. - Str = lists:reverse(skip_blanks(lists:reverse(Str0))), + Str1 = byte_list_to_string(Str0), + Str = lists:reverse(skip_blanks(lists:reverse(Str1))), {Str,[],D}; parse_term([$D|Line0], DecodeOpts, D) -> %DistExternal try diff --git a/lib/observer/src/observer_html_lib.erl b/lib/observer/src/observer_html_lib.erl index 4c92a8faab..4cce12dde8 100644 --- a/lib/observer/src/observer_html_lib.erl +++ b/lib/observer/src/observer_html_lib.erl @@ -138,8 +138,17 @@ msgq_table(Tab,Msg0, Id, Even, Colors) -> tr(color(Even, Colors),[td(integer_to_list(Id)), td(pre(Msg))]). stackdump_table(Tab,{Label0,Term0},Even, Colors) -> - Label = io_lib:format("~w",[Label0]), - Term = all_or_expand(Tab,Term0), + Label = io_lib:format("~ts",[Label0]), + Term = case atom_to_list(Label0) of + "y" ++ _ -> + %% Any term is possible, including huge ones. + all_or_expand(Tab,Term0); + _ -> + %% Return address or catch tag. It is known to be a + %% flat list, shortish, possibly containing characters + %% greater than 255. + href_proc_port(Term0) + end, tr(color(Even, Colors), [td("VALIGN=center",pre(Label)), td(pre(Term))]). dict_table(Tab,{Key0,Value0}, Even, Colors) -> diff --git a/lib/observer/test/crashdump_helper_unicode.erl b/lib/observer/test/crashdump_helper_unicode.erl index 60c3d20315..59cebef4fb 100644 --- a/lib/observer/test/crashdump_helper_unicode.erl +++ b/lib/observer/test/crashdump_helper_unicode.erl @@ -1,6 +1,6 @@ -module(crashdump_helper_unicode). -behaviour(gen_server). --export([start/0, init/1, handle_call/3, handle_cast/2]). +-export([start/0, init/1, handle_call/3, handle_cast/2, 'спутник'/0]). -record(state, {s,a,b,lb}). start() -> @@ -8,6 +8,9 @@ start() -> init([]) -> process_flag(trap_exit, true), + process_flag(save_calls, 10), + erlang:yield(), + ?MODULE:'спутник'(), ets:new('tab_αβ',[set,named_table]), Bin = <<"bin αβ"/utf8>>, LongBin = <<"long bin αβ - a utf8 binary which can be expanded αβ"/utf8>>, @@ -20,3 +23,6 @@ handle_call(_Info, _From, State) -> {reply, ok, State}. handle_cast(_Info, State) -> {noreply, State}. + +'спутник'() -> + ok. diff --git a/lib/observer/test/crashdump_viewer_SUITE.erl b/lib/observer/test/crashdump_viewer_SUITE.erl index e4c4c4decb..7ed58a143c 100644 --- a/lib/observer/test/crashdump_viewer_SUITE.erl +++ b/lib/observer/test/crashdump_viewer_SUITE.erl @@ -593,9 +593,17 @@ special(File,Procs) -> #proc{pid=Pid0} = lists:keyfind("'unicode_reg_name_αβ'",#proc.name,Procs), Pid = pid_to_list(Pid0), - {ok,#proc{},[]} = crashdump_viewer:proc_details(Pid), + {ok,Proc,[]} = crashdump_viewer:proc_details(Pid), + #proc{last_calls=LastCalls,stack_dump=Stk} = Proc, io:format(" unicode registered name ok",[]), + ["crashdump_helper_unicode:'спутник'/0", + "ets:new/2"|_] = lists:reverse(LastCalls), + io:format(" last calls ok",[]), + + verify_unicode_stack(Stk), + io:format(" unicode stack values ok",[]), + {ok,[#ets_table{id="'tab_αβ'",name="'tab_αβ'"}],[]} = crashdump_viewer:ets_tables(Pid), io:format(" unicode table name ok",[]), @@ -653,6 +661,15 @@ special(File,Procs) -> end, ok. +verify_unicode_stack([{_,{state,Str,Atom,Bin,LongBin}}|_]) -> + 'unicode_atom_αβ' = Atom, + "unicode_string_αβ" = Str, + <<"bin αβ"/utf8>> = Bin, + <<"long bin αβ - a utf8 binary which can be expanded αβ"/utf8>> = LongBin, + ok; +verify_unicode_stack([_|T]) -> + verify_unicode_stack(T). + verify_binaries([H|T1], [H|T2]) -> %% Heap binary. verify_binaries(T1, T2); -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor