Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
5861-erl_scan-Stricten-tests-for-integers.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 5861-erl_scan-Stricten-tests-for-integers.patch of Package erlang
From f2c7365440f549233bdc45a769c7a909064442ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Thu, 26 May 2022 10:07:52 +0200 Subject: [PATCH 1/7] erl_scan: Stricten tests for integers erl_scan should not accept floating point numbers in strings. Adding these stricter integer tests will also allow the JIT to generate more compact native code. While at it, also rewrite old-style `catch` to `try`...`catch`. --- lib/stdlib/src/erl_scan.erl | 233 +++++++++++++++++++---------- lib/stdlib/test/erl_scan_SUITE.erl | 7 +- 2 files changed, 156 insertions(+), 84 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index f2e9d2d7b9..e7e2582c03 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -264,15 +264,15 @@ string_thing(_) -> "string". -define(WHITE_SPACE(C), is_integer(C) andalso (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). --define(DIGIT(C), C >= $0 andalso C =< $9). --define(CHAR(C), is_integer(C), C >= 0). +-define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)). +-define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)). -define(UNICODE(C), - is_integer(C) andalso + (is_integer(C) andalso (C >= 0 andalso C < 16#D800 orelse C > 16#DFFF andalso C < 16#FFFE orelse - C > 16#FFFF andalso C =< 16#10FFFF)). + C > 16#FFFF andalso C =< 16#10FFFF))). --define(UNI255(C), C >= 0, C =< 16#ff). +-define(UNI255(C), (is_integer(C) andalso 0 =< C andalso C =< 16#ff)). options(Opts0) when is_list(Opts0) -> Opts = lists:foldr(fun expand_opt/2, [], Opts0), @@ -363,7 +363,7 @@ string1(Cs, St, Line, Col, Toks) -> Error end. -scan(Cs, St, Line, Col, Toks, _) -> +scan(Cs, #erl_scan{}=St, Line, Col, Toks, _) -> scan1(Cs, St, Line, Col, Toks). scan1([$\s|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> @@ -374,10 +374,6 @@ scan1([$\n|Cs], St, Line, Col, Toks) when St#erl_scan.ws -> scan_newline(Cs, St, Line, Col, Toks); scan1([$\n|Cs], St, Line, Col, Toks) -> skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0); -scan1([C|Cs], St, Line, Col, Toks) when C >= $A, C =< $Z -> - scan_variable(Cs, St, Line, Col, Toks, [C]); -scan1([C|Cs], St, Line, Col, Toks) when C >= $a, C =< $z -> - scan_atom(Cs, St, Line, Col, Toks, [C]); %% Optimization: some very common punctuation characters: scan1([$,|Cs], St, Line, Col, Toks) -> tok2(Cs, St, Line, Col, Toks, ",", ',', 1); @@ -397,11 +393,17 @@ scan1([$;|Cs], St, Line, Col, Toks) -> tok2(Cs, St, Line, Col, Toks, ";", ';', 1); scan1([$_=C|Cs], St, Line, Col, Toks) -> scan_variable(Cs, St, Line, Col, Toks, [C]); -%% More punctuation characters below. scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment -> skip_comment(Cs, St, Line, Col, Toks, 1); scan1([$\%=C|Cs], St, Line, Col, Toks) -> scan_comment(Cs, St, Line, Col, Toks, [C]); +%% More punctuation characters below. +scan1([C|_], _St, _Line, _Col0, _Toks) when not ?CHAR(C) -> + error({not_character,C}); +scan1([C|Cs], St, Line, Col, Toks) when C >= $A, C =< $Z -> + scan_variable(Cs, St, Line, Col, Toks, [C]); +scan1([C|Cs], St, Line, Col, Toks) when C >= $a, C =< $z -> + scan_atom(Cs, St, Line, Col, Toks, [C]); scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) -> scan_number(Cs, St, Line, Col, Toks, [C], no_underscore); scan1("..."++Cs, St, Line, Col, Toks) -> @@ -557,39 +559,49 @@ scan1([]=Cs, _St, Line, Col, Toks) -> scan1(eof=Cs, _St, Line, Col, Toks) -> {ok,Toks,Cs,Line,Col}. +scan_atom_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) -> + scan_atom(Cs, St, Line, Col, Toks, Ncs). + scan_atom(Cs0, St, Line, Col, Toks, Ncs0) -> case scan_name(Cs0, Ncs0) of {more,Ncs} -> - {more,{[],Col,Toks,Line,Ncs,fun scan_atom/6}}; + {more,{[],Col,Toks,Line,Ncs,fun scan_atom_fun/6}}; {Wcs,Cs} -> - case catch list_to_atom(Wcs) of - Name when is_atom(Name) -> + try list_to_atom(Wcs) of + Name -> case (St#erl_scan.resword_fun)(Name) of true -> tok2(Cs, St, Line, Col, Toks, Wcs, Name); false -> tok3(Cs, St, Line, Col, Toks, atom, Wcs, Name) - end; - _Error -> + end + catch + _:_ -> Ncol = incr_column(Col, length(Wcs)), scan_error({illegal,atom}, Line, Col, Line, Ncol, Cs) end end. +scan_variable_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) -> + scan_variable(Cs, St, Line, Col, Toks, Ncs). + scan_variable(Cs0, St, Line, Col, Toks, Ncs0) -> case scan_name(Cs0, Ncs0) of {more,Ncs} -> - {more,{[],Col,Toks,Line,Ncs,fun scan_variable/6}}; + {more,{[],Col,Toks,Line,Ncs,fun scan_variable_fun/6}}; {Wcs,Cs} -> - case catch list_to_atom(Wcs) of - Name when is_atom(Name) -> - tok3(Cs, St, Line, Col, Toks, var, Wcs, Name); - _Error -> + try list_to_atom(Wcs) of + Name -> + tok3(Cs, St, Line, Col, Toks, var, Wcs, Name) + catch + _:_ -> Ncol = incr_column(Col, length(Wcs)), scan_error({illegal,var}, Line, Col, Line, Ncol, Cs) end end. +scan_name([C|_]=Cs, Ncs) when not ?CHAR(C) -> + {lists:reverse(Ncs),Cs}; scan_name([C|Cs], Ncs) when C >= $a, C =< $z -> scan_name(Cs, [C|Ncs]); scan_name([C|Cs], Ncs) when C >= $A, C =< $Z -> @@ -663,20 +675,31 @@ scan_newline([], _St, Line, Col, Toks) -> scan_newline(Cs, St, Line, Col, Toks) -> scan_nl_white_space(Cs, St, Line, Col, Toks, "\n"). +scan_nl_spcs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) + when is_integer(N) -> + scan_nl_spcs(Cs, St, Line, Col, Toks, N). + scan_nl_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 17 -> scan_nl_spcs(Cs, St, Line, Col, Toks, N+1); scan_nl_spcs([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun scan_nl_spcs/6}}; + {more,{Cs,Col,Toks,Line,N,fun scan_nl_spcs_fun/6}}; scan_nl_spcs(Cs, St, Line, Col, Toks, N) -> newline_end(Cs, St, Line, Col, Toks, N, nl_spcs(N)). +scan_nl_tabs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) + when is_integer(N) -> + scan_nl_tabs(Cs, St, Line, Col, Toks, N). + scan_nl_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 11 -> scan_nl_tabs(Cs, St, Line, Col, Toks, N+1); scan_nl_tabs([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun scan_nl_tabs/6}}; + {more,{Cs,Col,Toks,Line,N,fun scan_nl_tabs_fun/6}}; scan_nl_tabs(Cs, St, Line, Col, Toks, N) -> newline_end(Cs, St, Line, Col, Toks, N, nl_tabs(N)). +scan_nl_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) -> + scan_nl_white_space(Cs, St, Line, Col, Toks, Ncs). + %% Note: returning {more,Cont} is meaningless here; one could just as %% well return several tokens. But since tokens() scans up to a full %% stop anyway, nothing is gained by not collecting all white spaces. @@ -689,10 +712,11 @@ scan_nl_white_space([$\n|Cs], St, Line, Col, Toks, Ncs0) -> Anno = anno(Line, Col, St, Ncs), Token = {white_space,Anno,Ncs}, scan_newline(Cs, St, Line+1, new_column(Col, length(Ncs)), [Token|Toks]); -scan_nl_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) -> +scan_nl_white_space([C|Cs], St, Line, Col, Toks, Ncs) + when ?WHITE_SPACE(C) -> scan_nl_white_space(Cs, St, Line, Col, Toks, [C|Ncs]); scan_nl_white_space([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_nl_white_space/6}}; + {more,{Cs,Col,Toks,Line,Ncs,fun scan_nl_white_space_fun/6}}; scan_nl_white_space(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Ncs) -> Anno = anno(Line), @@ -706,40 +730,54 @@ scan_nl_white_space(Cs, St, Line, Col, Toks, Ncs0) -> newline_end(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _N, Ncs) -> scan1(Cs, St, Line+1, Col, [{white_space,anno(Line),Ncs}|Toks]); -newline_end(Cs, St, Line, Col, Toks, N, Ncs) -> +newline_end(Cs, #erl_scan{}=St, Line, Col, Toks, N, Ncs) -> Anno = anno(Line, Col, St, Ncs), scan1(Cs, St, Line+1, new_column(Col, N), [{white_space,Anno,Ncs}|Toks]). +scan_spcs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) + when is_integer(N), N >= 1 -> + scan_spcs(Cs, St, Line, Col, Toks, N). + scan_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 16 -> scan_spcs(Cs, St, Line, Col, Toks, N+1); scan_spcs([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun scan_spcs/6}}; + {more,{Cs,Col,Toks,Line,N,fun scan_spcs_fun/6}}; scan_spcs(Cs, St, Line, Col, Toks, N) -> white_space_end(Cs, St, Line, Col, Toks, N, spcs(N)). +scan_tabs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) + when is_integer(N), N >= 1 -> + scan_tabs(Cs, St, Line, Col, Toks, N). + scan_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 10 -> scan_tabs(Cs, St, Line, Col, Toks, N+1); scan_tabs([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun scan_tabs/6}}; + {more,{Cs,Col,Toks,Line,N,fun scan_tabs_fun/6}}; scan_tabs(Cs, St, Line, Col, Toks, N) -> white_space_end(Cs, St, Line, Col, Toks, N, tabs(N)). +skip_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) -> + skip_white_space(Cs, St, Line, Col, Toks, N). + skip_white_space([$\n|Cs], St, Line, Col, Toks, _N) -> skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0); skip_white_space([C|Cs], St, Line, Col, Toks, N) when ?WHITE_SPACE(C) -> skip_white_space(Cs, St, Line, Col, Toks, N+1); skip_white_space([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun skip_white_space/6}}; + {more,{Cs,Col,Toks,Line,N,fun skip_white_space_fun/6}}; skip_white_space(Cs, St, Line, Col, Toks, N) -> scan1(Cs, St, Line, incr_column(Col, N), Toks). +scan_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) -> + scan_white_space(Cs, St, Line, Col, Toks, Ncs). + %% Maybe \t and \s should break the loop. scan_white_space([$\n|_]=Cs, St, Line, Col, Toks, Ncs) -> white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs)); scan_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) -> scan_white_space(Cs, St, Line, Col, Toks, [C|Ncs]); scan_white_space([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_white_space/6}}; + {more,{Cs,Col,Toks,Line,Ncs,fun scan_white_space_fun/6}}; scan_white_space(Cs, St, Line, Col, Toks, Ncs) -> white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs)). @@ -778,7 +816,7 @@ scan_char([], _St, Line, Col, Toks) -> scan_char(eof, _St, Line, Col, _Toks) -> scan_error(char, Line, Col, Line, incr_column(Col, 1), eof). -scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> +scan_string(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %" {more,Ncs,Nline,Ncol,Nstr,Nwcs} -> State = {Nwcs,Nstr,Line0,Col0}, @@ -793,7 +831,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> scan1(Ncs, St, Nline, Ncol, [{string,Anno,Nwcs}|Toks]) end. -scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> +scan_qatom(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> case scan_string0(Cs, St, Line, Col, $\', Str, Wcs) of %' {more,Ncs,Nline,Ncol,Nstr,Nwcs} -> State = {Nwcs,Nstr,Line0,Col0}, @@ -803,12 +841,13 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> {error,Nline,Ncol,Nwcs,Ncs} -> Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars. scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %' - {Ncs,Nline,Ncol,Nstr,Nwcs} -> - case catch list_to_atom(Nwcs) of + {Ncs,Nline,Ncol,Nstr,Nwcs} -> + try list_to_atom(Nwcs) of A when is_atom(A) -> Anno = anno(Line0, Col0, St, Nstr), - scan1(Ncs, St, Nline, Ncol, [{atom,Anno,A}|Toks]); - _ -> + scan1(Ncs, St, Nline, Ncol, [{atom,Anno,A}|Toks]) + catch + _:_ -> scan_error({illegal,atom}, Line0, Col0, Nline, Ncol, Ncs) end end. @@ -884,10 +923,11 @@ scan_string1([]=Cs, Line, Col, _Q, Str, Wcs) -> scan_string1(eof, Line, Col, _Q, _Str, Wcs) -> {error,Line,Col,lists:reverse(Wcs),eof}. --define(OCT(C), C >= $0, C =< $7). --define(HEX(C), C >= $0 andalso C =< $9 orelse - C >= $A andalso C =< $F orelse - C >= $a andalso C =< $f). +-define(OCT(C), (is_integer(C) andalso $0 =< C andalso C =< $7)). +-define(HEX(C), (is_integer(C) andalso + (C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f))). %% \<1-3> octal digits scan_escape([O1,O2,O3|Cs], Col) when ?OCT(O1), ?OCT(O2), ?OCT(O3) -> @@ -939,26 +979,31 @@ scan_escape([], _Col) -> scan_escape(eof, Col) -> {eof,Col}. -scan_hex([C|Cs], no_col=Col, Wcs) when ?HEX(C) -> - scan_hex(Cs, Col, [C|Wcs]); scan_hex([C|Cs], Col, Wcs) when ?HEX(C) -> - scan_hex(Cs, Col+1, [C|Wcs]); + scan_hex(Cs, incr_column(Col, 1), [C|Wcs]); scan_hex(Cs, Col, Wcs) -> - scan_esc_end(Cs, Col, Wcs, 16, "x{"). + scan_hex_end(Cs, Col, Wcs, "x{"). -scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) -> +scan_hex_end([$}|Cs], Col, [], _Str) -> + %% Empty escape sequence. + {error,Cs,{illegal,character},incr_column(Col, 1)}; +scan_hex_end([$}|Cs], Col, Wcs0, Str0) -> Wcs = lists:reverse(Wcs0), - case catch erlang:list_to_integer(Wcs, B) of + try list_to_integer(Wcs, 16) of Val when ?UNICODE(Val) -> {Val,Str0++Wcs++[$}],Cs,incr_column(Col, 1)}; - _ -> + _Val -> + {error,Cs,{illegal,character},incr_column(Col, 1)} + catch + error:system_limit -> + %% Extremely unlikely to occur in practice. {error,Cs,{illegal,character},incr_column(Col, 1)} end; -scan_esc_end([], _Col, _Wcs, _B, _Str0) -> +scan_hex_end([], _Col, _Wcs, _Str0) -> more; -scan_esc_end(eof, Col, _Wcs, _B, _Str0) -> +scan_hex_end(eof, Col, _Wcs, _Str0) -> {eof,Col}; -scan_esc_end(Cs, Col, _Wcs, _B, _Str0) -> +scan_hex_end(Cs, Col, _Wcs, _Str0) -> {error,Cs,{illegal,character},Col}. escape_char($n) -> $\n; % \n = LF @@ -972,7 +1017,7 @@ caret_char_code($?) -> 16#7f; caret_char_code(C) when $@ =< C, C =< $_; $a =< C, C =< $z -> C band 16#1f; caret_char_code(_) -> error. -scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) -> +scan_number(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) -> scan_number(Cs, St, Line, Col, Toks, Ncs, Us). scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> @@ -988,22 +1033,28 @@ scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(remove_digit_separators(Ncs, Us)) of - B when B >= 2, B =< 1+$Z-$A+10 -> + try list_to_integer(remove_digit_separators(Ncs, Us)) of + B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore); - B -> + B when is_integer(B) -> Len = length(Ncs), scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) + catch + error:system_limit -> + %% Extremely unlikely to occur in practice. + scan_error({illegal,base}, Line, Col, Line, Col, Cs0) end; scan_number([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(remove_digit_separators(Ncs, Us)) of - N when is_integer(N) -> - tok3(Cs, St, Line, Col, Toks, integer, Ncs, N); - _ -> + try list_to_integer(remove_digit_separators(Ncs, Us), 10) of + N -> + tok3(Cs, St, Line, Col, Toks, integer, Ncs, N) + catch + error:system_limit -> + %% Extremely unlikely to occur in practice. Ncol = incr_column(Col, length(Ncs)), scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. @@ -1014,11 +1065,14 @@ remove_digit_separators(Number, with_underscore) -> [C || C <- Number, C =/= $_]. -define(BASED_DIGIT(C, B), - ((?DIGIT(C) andalso C < $0 + B) - orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) - orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))). - -scan_based_int(Cs, St, Line, Col, Toks, {B,NCs,BCs,Us}) -> + (is_integer(C) + andalso + ((?DIGIT(C) andalso C < $0 + B) + orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) + orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))). + +scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us}) + when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 -> scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us). scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when @@ -1032,18 +1086,25 @@ scan_based_int([$_]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; -scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) -> +scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) -> + %% No actual digits following the base. + Len = length(Bcs), + Ncol = incr_column(Col, Len), + scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs); +scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) -> Ncs = lists:reverse(Ncs0), - case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of - N when is_integer(N) -> - tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N); - _ -> + try list_to_integer(remove_digit_separators(Ncs, Us), B) of + N -> + tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N) + catch + error:system_limit -> + %% Extremely unlikely to occur in practice. Len = length(Bcs)+length(Ncs), Ncol = incr_column(Col, Len), scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -scan_fraction(Cs, St, Line, Col, Toks, {Ncs,Us}) -> +scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) -> scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us). scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> @@ -1060,7 +1121,7 @@ scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) -> float_end(Cs, St, Line, Col, Toks, Ncs, Us). -scan_exponent_sign(Cs, St, Line, Col, Toks, {Ncs, Us}) -> +scan_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) -> scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us). scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs, Us) when @@ -1071,7 +1132,7 @@ scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us) -> scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). -scan_exponent(Cs, St, Line, Col, Toks, {Ncs, Us}) -> +scan_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) -> scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> @@ -1088,14 +1149,18 @@ scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) -> float_end(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_float(remove_digit_separators(Ncs, Us)) of - F when is_float(F) -> - tok3(Cs, St, Line, Col, Toks, float, Ncs, F); - _ -> + try list_to_float(remove_digit_separators(Ncs, Us)) of + F -> + tok3(Cs, St, Line, Col, Toks, float, Ncs, F) + catch + _:_ -> Ncol = incr_column(Col, length(Ncs)), scan_error({illegal,float}, Line, Col, Line, Ncol, Cs) end. +skip_comment_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) -> + skip_comment(Cs, St, Line, Col, Toks, N). + skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) -> case ?UNICODE(C) of true -> @@ -1105,11 +1170,15 @@ skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) -> scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) end; skip_comment([]=Cs, _St, Line, Col, Toks, N) -> - {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}}; + {more,{Cs,Col,Toks,Line,N,fun skip_comment_fun/6}}; skip_comment(Cs, St, Line, Col, Toks, N) -> scan1(Cs, St, Line, incr_column(Col, N), Toks). -scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) -> +scan_comment_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) -> + scan_comment(Cs, St, Line, Col, Toks, Ncs). + +scan_comment([C|Cs], St, Line, Col, Toks, Ncs) + when C =/= $\n, ?CHAR(C) -> case ?UNICODE(C) of true -> scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]); @@ -1118,33 +1187,33 @@ scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) -> scan_error({illegal,character}, Line, Col, Line, Ncol, Cs) end; scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}}; + {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment_fun/6}}; scan_comment(Cs, St, Line, Col, Toks, Ncs0) -> Ncs = lists:reverse(Ncs0), tok3(Cs, St, Line, Col, Toks, comment, Ncs, Ncs). tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P) -> scan1(Cs, St, Line, Col, [{P,anno(Line)}|Toks]); -tok2(Cs, St, Line, Col, Toks, Wcs, P) -> +tok2(Cs, #erl_scan{}=St, Line, Col, Toks, Wcs, P) -> Anno = anno(Line, Col, St, Wcs), scan1(Cs, St, Line, incr_column(Col, length(Wcs)), [{P,Anno}|Toks]). tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P, _N) -> scan1(Cs, St, Line, Col, [{P,anno(Line)}|Toks]); -tok2(Cs, St, Line, Col, Toks, Wcs, P, N) -> +tok2(Cs, #erl_scan{}=St, Line, Col, Toks, Wcs, P, N) -> Anno = anno(Line, Col, St, Wcs), scan1(Cs, St, Line, incr_column(Col, N), [{P,Anno}|Toks]). tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item, _S, Sym) -> scan1(Cs, St, Line, Col, [{Item,anno(Line),Sym}|Toks]); -tok3(Cs, St, Line, Col, Toks, Item, String, Sym) -> +tok3(Cs, #erl_scan{}=St, Line, Col, Toks, Item, String, Sym) -> Token = {Item,anno(Line, Col, St, String),Sym}, scan1(Cs, St, Line, incr_column(Col, length(String)), [Token|Toks]). tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item, _String, Sym, _Length) -> scan1(Cs, St, Line, Col, [{Item,anno(Line),Sym}|Toks]); -tok3(Cs, St, Line, Col, Toks, Item, String, Sym, Length) -> +tok3(Cs, #erl_scan{}=St, Line, Col, Toks, Item, String, Sym, Length) -> Token = {Item,anno(Line, Col, St, String),Sym}, scan1(Cs, St, Line, incr_column(Col, Length), [Token|Toks]). diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index ee8bc8420f..6c4694bebe 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -676,6 +676,8 @@ illegal() -> crashes() -> {'EXIT',_} = (catch {foo, erl_scan:string([-1])}), % type error + {'EXIT',_} = (catch erl_scan:string("'a" ++ [999999999] ++ "c'")), + {'EXIT',_} = (catch {foo, erl_scan:string("$"++[-1])}), {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[-1])}), {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[-1])}), @@ -698,6 +700,7 @@ crashes() -> (catch {foo, erl_scan:string("% foo"++[a],{1,1})}), {'EXIT',_} = (catch {foo, erl_scan:string([3.0])}), % type error + {'EXIT',_} = (catch {foo, erl_scan:string("A" ++ [999999999])}), ok. @@ -867,8 +870,8 @@ unicode() -> erl_scan:string([1089]), {error,{{1,1},erl_scan,{illegal,character}},{1,2}} = erl_scan:string([1089], {1,1}), - {error,{{1,3},erl_scan,{illegal,character}},{1,4}} = - erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}), + {error,{{1,1},erl_scan,{illegal,character}},{1,2}} = + erl_scan:string([16#D800], {1,1}), test("\"a"++[1089]++"b\""), {error,{1,erl_scan,{illegal,character}},1} = -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor