File 5861-erl_scan-Stricten-tests-for-integers.patch of Package erlang

Overview Repositories Revisions Requests Users Attributes Meta

File 5861-erl_scan-Stricten-tests-for-integers.patch of Package erlang

From f2c7365440f549233bdc45a769c7a909064442ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 26 May 2022 10:07:52 +0200
Subject: [PATCH 1/7] erl_scan: Stricten tests for integers

erl_scan should not accept floating point numbers in strings. Adding
these stricter integer tests will also allow the JIT to generate
more compact native code.

While at it, also rewrite old-style `catch` to `try`...`catch`.
---
 lib/stdlib/src/erl_scan.erl        | 233 +++++++++++++++++++----------
 lib/stdlib/test/erl_scan_SUITE.erl |   7 +-
 2 files changed, 156 insertions(+), 84 deletions(-)

diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index f2e9d2d7b9..e7e2582c03 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -264,15 +264,15 @@ string_thing(_) -> "string".
 -define(WHITE_SPACE(C),
         is_integer(C) andalso
          (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)).
--define(DIGIT(C), C >= $0 andalso C =< $9).
--define(CHAR(C), is_integer(C), C >= 0).
+-define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)).
+-define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)).
 -define(UNICODE(C),
-        is_integer(C) andalso
+        (is_integer(C) andalso
          (C >= 0 andalso C < 16#D800 orelse
           C > 16#DFFF andalso C < 16#FFFE orelse
-          C > 16#FFFF andalso C =< 16#10FFFF)).
+          C > 16#FFFF andalso C =< 16#10FFFF))).
 
--define(UNI255(C), C >= 0, C =< 16#ff).
+-define(UNI255(C), (is_integer(C) andalso 0 =< C andalso C =< 16#ff)).
 
 options(Opts0) when is_list(Opts0) ->
     Opts = lists:foldr(fun expand_opt/2, [], Opts0),
@@ -363,7 +363,7 @@ string1(Cs, St, Line, Col, Toks) ->
             Error
     end.
 
-scan(Cs, St, Line, Col, Toks, _) ->
+scan(Cs, #erl_scan{}=St, Line, Col, Toks, _) ->
     scan1(Cs, St, Line, Col, Toks).
 
 scan1([$\s|Cs], St, Line, Col, Toks) when St#erl_scan.ws ->
@@ -374,10 +374,6 @@ scan1([$\n|Cs], St, Line, Col, Toks) when St#erl_scan.ws ->
     scan_newline(Cs, St, Line, Col, Toks);
 scan1([$\n|Cs], St, Line, Col, Toks) ->
     skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0);
-scan1([C|Cs], St, Line, Col, Toks) when C >= $A, C =< $Z ->
-    scan_variable(Cs, St, Line, Col, Toks, [C]);
-scan1([C|Cs], St, Line, Col, Toks) when C >= $a, C =< $z ->
-    scan_atom(Cs, St, Line, Col, Toks, [C]);
 %% Optimization: some very common punctuation characters:
 scan1([$,|Cs], St, Line, Col, Toks) ->
     tok2(Cs, St, Line, Col, Toks, ",", ',', 1);
@@ -397,11 +393,17 @@ scan1([$;|Cs], St, Line, Col, Toks) ->
     tok2(Cs, St, Line, Col, Toks, ";", ';', 1);
 scan1([$_=C|Cs], St, Line, Col, Toks) ->
     scan_variable(Cs, St, Line, Col, Toks, [C]);
-%% More punctuation characters below.
 scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment ->
     skip_comment(Cs, St, Line, Col, Toks, 1);
 scan1([$\%=C|Cs], St, Line, Col, Toks) ->
     scan_comment(Cs, St, Line, Col, Toks, [C]);
+%% More punctuation characters below.
+scan1([C|_], _St, _Line, _Col0, _Toks) when not ?CHAR(C) ->
+    error({not_character,C});
+scan1([C|Cs], St, Line, Col, Toks) when C >= $A, C =< $Z ->
+    scan_variable(Cs, St, Line, Col, Toks, [C]);
+scan1([C|Cs], St, Line, Col, Toks) when C >= $a, C =< $z ->
+    scan_atom(Cs, St, Line, Col, Toks, [C]);
 scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) ->
     scan_number(Cs, St, Line, Col, Toks, [C], no_underscore);
 scan1("..."++Cs, St, Line, Col, Toks) ->
@@ -557,39 +559,49 @@ scan1([]=Cs, _St, Line, Col, Toks) ->
 scan1(eof=Cs, _St, Line, Col, Toks) ->
     {ok,Toks,Cs,Line,Col}.
 
+scan_atom_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) ->
+    scan_atom(Cs, St, Line, Col, Toks, Ncs).
+
 scan_atom(Cs0, St, Line, Col, Toks, Ncs0) ->
     case scan_name(Cs0, Ncs0) of
         {more,Ncs} ->
-            {more,{[],Col,Toks,Line,Ncs,fun scan_atom/6}};
+            {more,{[],Col,Toks,Line,Ncs,fun scan_atom_fun/6}};
         {Wcs,Cs} ->
-            case catch list_to_atom(Wcs) of
-                Name when is_atom(Name) ->
+            try list_to_atom(Wcs) of
+                Name ->
                     case (St#erl_scan.resword_fun)(Name) of
                         true ->
                             tok2(Cs, St, Line, Col, Toks, Wcs, Name);
                         false ->
                             tok3(Cs, St, Line, Col, Toks, atom, Wcs, Name)
-                    end;
-                _Error ->
+                    end
+            catch
+                _:_ ->
                     Ncol = incr_column(Col, length(Wcs)),
                     scan_error({illegal,atom}, Line, Col, Line, Ncol, Cs)
             end
     end.
 
+scan_variable_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) ->
+    scan_variable(Cs, St, Line, Col, Toks, Ncs).
+
 scan_variable(Cs0, St, Line, Col, Toks, Ncs0) ->
     case scan_name(Cs0, Ncs0) of
         {more,Ncs} ->
-            {more,{[],Col,Toks,Line,Ncs,fun scan_variable/6}};
+            {more,{[],Col,Toks,Line,Ncs,fun scan_variable_fun/6}};
         {Wcs,Cs} ->
-            case catch list_to_atom(Wcs) of
-                Name when is_atom(Name) ->
-                    tok3(Cs, St, Line, Col, Toks, var, Wcs, Name);
-                _Error ->
+            try list_to_atom(Wcs) of
+                Name ->
+                    tok3(Cs, St, Line, Col, Toks, var, Wcs, Name)
+            catch
+                _:_ ->
                     Ncol = incr_column(Col, length(Wcs)),
                     scan_error({illegal,var}, Line, Col, Line, Ncol, Cs)
             end
     end.
 
+scan_name([C|_]=Cs, Ncs) when not ?CHAR(C) ->
+    {lists:reverse(Ncs),Cs};
 scan_name([C|Cs], Ncs) when C >= $a, C =< $z ->
     scan_name(Cs, [C|Ncs]);
 scan_name([C|Cs], Ncs) when C >= $A, C =< $Z ->
@@ -663,20 +675,31 @@ scan_newline([], _St, Line, Col, Toks) ->
 scan_newline(Cs, St, Line, Col, Toks) ->
     scan_nl_white_space(Cs, St, Line, Col, Toks, "\n").
 
+scan_nl_spcs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N)
+  when is_integer(N) ->
+    scan_nl_spcs(Cs, St, Line, Col, Toks, N).
+
 scan_nl_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 17 ->
     scan_nl_spcs(Cs, St, Line, Col, Toks, N+1);
 scan_nl_spcs([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun scan_nl_spcs/6}};
+    {more,{Cs,Col,Toks,Line,N,fun scan_nl_spcs_fun/6}};
 scan_nl_spcs(Cs, St, Line, Col, Toks, N) ->
     newline_end(Cs, St, Line, Col, Toks, N, nl_spcs(N)).
 
+scan_nl_tabs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N)
+  when is_integer(N) ->
+    scan_nl_tabs(Cs, St, Line, Col, Toks, N).
+
 scan_nl_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 11 ->
     scan_nl_tabs(Cs, St, Line, Col, Toks, N+1);
 scan_nl_tabs([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun scan_nl_tabs/6}};
+    {more,{Cs,Col,Toks,Line,N,fun scan_nl_tabs_fun/6}};
 scan_nl_tabs(Cs, St, Line, Col, Toks, N) ->
     newline_end(Cs, St, Line, Col, Toks, N, nl_tabs(N)).
 
+scan_nl_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) ->
+    scan_nl_white_space(Cs, St, Line, Col, Toks, Ncs).
+
 %% Note: returning {more,Cont} is meaningless here; one could just as
 %% well return several tokens. But since tokens() scans up to a full
 %% stop anyway, nothing is gained by not collecting all white spaces.
@@ -689,10 +712,11 @@ scan_nl_white_space([$\n|Cs], St, Line, Col, Toks, Ncs0) ->
     Anno = anno(Line, Col, St, Ncs),
     Token = {white_space,Anno,Ncs},
     scan_newline(Cs, St, Line+1, new_column(Col, length(Ncs)), [Token|Toks]);
-scan_nl_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) ->
+scan_nl_white_space([C|Cs], St, Line, Col, Toks, Ncs)
+  when ?WHITE_SPACE(C) ->
     scan_nl_white_space(Cs, St, Line, Col, Toks, [C|Ncs]);
 scan_nl_white_space([]=Cs, _St, Line, Col, Toks, Ncs) ->
-    {more,{Cs,Col,Toks,Line,Ncs,fun scan_nl_white_space/6}};
+    {more,{Cs,Col,Toks,Line,Ncs,fun scan_nl_white_space_fun/6}};
 scan_nl_white_space(Cs, #erl_scan{text = false}=St, Line, no_col=Col,
                     Toks, Ncs) ->
     Anno = anno(Line),
@@ -706,40 +730,54 @@ scan_nl_white_space(Cs, St, Line, Col, Toks, Ncs0) ->
 newline_end(Cs, #erl_scan{text = false}=St, Line, no_col=Col,
             Toks, _N, Ncs) ->
     scan1(Cs, St, Line+1, Col, [{white_space,anno(Line),Ncs}|Toks]);
-newline_end(Cs, St, Line, Col, Toks, N, Ncs) ->
+newline_end(Cs, #erl_scan{}=St, Line, Col, Toks, N, Ncs) ->
     Anno = anno(Line, Col, St, Ncs),
     scan1(Cs, St, Line+1, new_column(Col, N), [{white_space,Anno,Ncs}|Toks]).
 
+scan_spcs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N)
+  when is_integer(N), N >= 1 ->
+    scan_spcs(Cs, St, Line, Col, Toks, N).
+
 scan_spcs([$\s|Cs], St, Line, Col, Toks, N) when N < 16 ->
     scan_spcs(Cs, St, Line, Col, Toks, N+1);
 scan_spcs([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun scan_spcs/6}};
+    {more,{Cs,Col,Toks,Line,N,fun scan_spcs_fun/6}};
 scan_spcs(Cs, St, Line, Col, Toks, N) ->
     white_space_end(Cs, St, Line, Col, Toks, N, spcs(N)).
 
+scan_tabs_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N)
+  when is_integer(N), N >= 1 ->
+    scan_tabs(Cs, St, Line, Col, Toks, N).
+
 scan_tabs([$\t|Cs], St, Line, Col, Toks, N) when N < 10 ->
     scan_tabs(Cs, St, Line, Col, Toks, N+1);
 scan_tabs([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun scan_tabs/6}};
+    {more,{Cs,Col,Toks,Line,N,fun scan_tabs_fun/6}};
 scan_tabs(Cs, St, Line, Col, Toks, N) ->
     white_space_end(Cs, St, Line, Col, Toks, N, tabs(N)).
 
+skip_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) ->
+    skip_white_space(Cs, St, Line, Col, Toks, N).
+
 skip_white_space([$\n|Cs], St, Line, Col, Toks, _N) ->
     skip_white_space(Cs, St, Line+1, new_column(Col, 1), Toks, 0);
 skip_white_space([C|Cs], St, Line, Col, Toks, N) when ?WHITE_SPACE(C) ->
     skip_white_space(Cs, St, Line, Col, Toks, N+1);
 skip_white_space([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun skip_white_space/6}};
+    {more,{Cs,Col,Toks,Line,N,fun skip_white_space_fun/6}};
 skip_white_space(Cs, St, Line, Col, Toks, N) ->
     scan1(Cs, St, Line, incr_column(Col, N), Toks).
 
+scan_white_space_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) ->
+    scan_white_space(Cs, St, Line, Col, Toks, Ncs).
+
 %% Maybe \t and \s should break the loop.
 scan_white_space([$\n|_]=Cs, St, Line, Col, Toks, Ncs) ->
     white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs));
 scan_white_space([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) ->
     scan_white_space(Cs, St, Line, Col, Toks, [C|Ncs]);
 scan_white_space([]=Cs, _St, Line, Col, Toks, Ncs) ->
-    {more,{Cs,Col,Toks,Line,Ncs,fun scan_white_space/6}};
+    {more,{Cs,Col,Toks,Line,Ncs,fun scan_white_space_fun/6}};
 scan_white_space(Cs, St, Line, Col, Toks, Ncs) ->
     white_space_end(Cs, St, Line, Col, Toks, length(Ncs), lists:reverse(Ncs)).
 
@@ -778,7 +816,7 @@ scan_char([], _St, Line, Col, Toks) ->
 scan_char(eof, _St, Line, Col, _Toks) ->
     scan_error(char, Line, Col, Line, incr_column(Col, 1), eof).
 
-scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+scan_string(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
     case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %"
         {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
             State = {Nwcs,Nstr,Line0,Col0},
@@ -793,7 +831,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
             scan1(Ncs, St, Nline, Ncol, [{string,Anno,Nwcs}|Toks])
     end.
 
-scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+scan_qatom(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
     case scan_string0(Cs, St, Line, Col, $\', Str, Wcs) of %'
         {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
             State = {Nwcs,Nstr,Line0,Col0},
@@ -803,12 +841,13 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
         {error,Nline,Ncol,Nwcs,Ncs} ->
             Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
             scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
-        {Ncs,Nline,Ncol,Nstr,Nwcs} ->
-            case catch list_to_atom(Nwcs) of
+            {Ncs,Nline,Ncol,Nstr,Nwcs} ->
+            try list_to_atom(Nwcs) of
                 A when is_atom(A) ->
                     Anno = anno(Line0, Col0, St, Nstr),
-                    scan1(Ncs, St, Nline, Ncol, [{atom,Anno,A}|Toks]);
-                _ ->
+                    scan1(Ncs, St, Nline, Ncol, [{atom,Anno,A}|Toks])
+            catch
+                _:_ ->
                     scan_error({illegal,atom}, Line0, Col0, Nline, Ncol, Ncs)
             end
     end.
@@ -884,10 +923,11 @@ scan_string1([]=Cs, Line, Col, _Q, Str, Wcs) ->
 scan_string1(eof, Line, Col, _Q, _Str, Wcs) ->
     {error,Line,Col,lists:reverse(Wcs),eof}.
 
--define(OCT(C), C >= $0, C =< $7).
--define(HEX(C), C >= $0 andalso C =< $9 orelse
-                C >= $A andalso C =< $F orelse
-                C >= $a andalso C =< $f).
+-define(OCT(C), (is_integer(C) andalso $0 =< C andalso C =< $7)).
+-define(HEX(C), (is_integer(C) andalso
+                 (C >= $0 andalso C =< $9 orelse
+                  C >= $A andalso C =< $F orelse
+                  C >= $a andalso C =< $f))).
 
 %% \<1-3> octal digits
 scan_escape([O1,O2,O3|Cs], Col) when ?OCT(O1), ?OCT(O2), ?OCT(O3) ->
@@ -939,26 +979,31 @@ scan_escape([], _Col) ->
 scan_escape(eof, Col) ->
     {eof,Col}.
 
-scan_hex([C|Cs], no_col=Col, Wcs) when ?HEX(C) ->
-    scan_hex(Cs, Col, [C|Wcs]);
 scan_hex([C|Cs], Col, Wcs) when ?HEX(C) ->
-    scan_hex(Cs, Col+1, [C|Wcs]);
+    scan_hex(Cs, incr_column(Col, 1), [C|Wcs]);
 scan_hex(Cs, Col, Wcs) ->
-    scan_esc_end(Cs, Col, Wcs, 16, "x{").
+    scan_hex_end(Cs, Col, Wcs, "x{").
 
-scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) ->
+scan_hex_end([$}|Cs], Col, [], _Str) ->
+    %% Empty escape sequence.
+    {error,Cs,{illegal,character},incr_column(Col, 1)};
+scan_hex_end([$}|Cs], Col, Wcs0, Str0) ->
     Wcs = lists:reverse(Wcs0),
-    case catch erlang:list_to_integer(Wcs, B) of
+    try list_to_integer(Wcs, 16) of
         Val when ?UNICODE(Val) ->
             {Val,Str0++Wcs++[$}],Cs,incr_column(Col, 1)};
-        _ ->
+        _Val ->
+            {error,Cs,{illegal,character},incr_column(Col, 1)}
+    catch
+        error:system_limit ->
+            %% Extremely unlikely to occur in practice.
             {error,Cs,{illegal,character},incr_column(Col, 1)}
     end;
-scan_esc_end([], _Col, _Wcs, _B, _Str0) ->
+scan_hex_end([], _Col, _Wcs, _Str0) ->
     more;
-scan_esc_end(eof, Col, _Wcs, _B, _Str0) ->
+scan_hex_end(eof, Col, _Wcs, _Str0) ->
     {eof,Col};
-scan_esc_end(Cs, Col, _Wcs, _B, _Str0) ->
+scan_hex_end(Cs, Col, _Wcs, _Str0) ->
     {error,Cs,{illegal,character},Col}.
 
 escape_char($n) -> $\n;                         % \n = LF
@@ -972,7 +1017,7 @@ caret_char_code($?) -> 16#7f;
 caret_char_code(C) when $@ =< C, C =< $_; $a =< C, C =< $z -> C band 16#1f;
 caret_char_code(_) -> error.
 
-scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+scan_number(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) ->
     scan_number(Cs, St, Line, Col, Toks, Ncs, Us).
 
 scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
@@ -988,22 +1033,28 @@ scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
     {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
 scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
     Ncs = lists:reverse(Ncs0),
-    case catch list_to_integer(remove_digit_separators(Ncs, Us)) of
-        B when B >= 2, B =< 1+$Z-$A+10 ->
+    try list_to_integer(remove_digit_separators(Ncs, Us)) of
+        B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
             Bcs = Ncs++[$#],
             scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
-        B ->
+        B when is_integer(B) ->
             Len = length(Ncs),
             scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0)
+    catch
+        error:system_limit ->
+            %% Extremely unlikely to occur in practice.
+            scan_error({illegal,base}, Line, Col, Line, Col, Cs0)
     end;
 scan_number([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
     {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
 scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) ->
     Ncs = lists:reverse(Ncs0),
-    case catch list_to_integer(remove_digit_separators(Ncs, Us)) of
-        N when is_integer(N) ->
-            tok3(Cs, St, Line, Col, Toks, integer, Ncs, N);
-        _ ->
+    try list_to_integer(remove_digit_separators(Ncs, Us), 10) of
+        N ->
+            tok3(Cs, St, Line, Col, Toks, integer, Ncs, N)
+    catch
+        error:system_limit ->
+            %% Extremely unlikely to occur in practice.
             Ncol = incr_column(Col, length(Ncs)),
             scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
     end.
@@ -1014,11 +1065,14 @@ remove_digit_separators(Number, with_underscore) ->
     [C || C <- Number, C =/= $_].
 
 -define(BASED_DIGIT(C, B),
-        ((?DIGIT(C) andalso C < $0 + B)
-         orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
-         orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))).
-
-scan_based_int(Cs, St, Line, Col, Toks, {B,NCs,BCs,Us}) ->
+        (is_integer(C)
+         andalso
+           ((?DIGIT(C) andalso C < $0 + B)
+            orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
+            orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))).
+
+scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
+  when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
     scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
 
 scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
@@ -1032,18 +1086,25 @@ scan_based_int([$_]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) ->
     {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
 scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) ->
     {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
-scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) ->
+scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
+    %% No actual digits following the base.
+    Len = length(Bcs),
+    Ncol = incr_column(Col, Len),
+    scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs);
+scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
     Ncs = lists:reverse(Ncs0),
-    case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of
-        N when is_integer(N) ->
-            tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N);
-        _ ->
+    try list_to_integer(remove_digit_separators(Ncs, Us), B) of
+        N ->
+            tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N)
+    catch
+        error:system_limit ->
+            %% Extremely unlikely to occur in practice.
             Len = length(Bcs)+length(Ncs),
             Ncol = incr_column(Col, Len),
             scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
     end.
 
-scan_fraction(Cs, St, Line, Col, Toks, {Ncs,Us}) ->
+scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) ->
     scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us).
 
 scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
@@ -1060,7 +1121,7 @@ scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
 scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) ->
     float_end(Cs, St, Line, Col, Toks, Ncs, Us).
 
-scan_exponent_sign(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+scan_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) ->
     scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us).
 
 scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs, Us) when
@@ -1071,7 +1132,7 @@ scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
 scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us) ->
     scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us).
 
-scan_exponent(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+scan_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs, Us}) ->
     scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us).
 
 scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
@@ -1088,14 +1149,18 @@ scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) ->
 
 float_end(Cs, St, Line, Col, Toks, Ncs0, Us) ->
     Ncs = lists:reverse(Ncs0),
-    case catch list_to_float(remove_digit_separators(Ncs, Us)) of
-        F when is_float(F) ->
-            tok3(Cs, St, Line, Col, Toks, float, Ncs, F);
-        _ ->
+    try list_to_float(remove_digit_separators(Ncs, Us)) of
+        F ->
+            tok3(Cs, St, Line, Col, Toks, float, Ncs, F)
+    catch
+        _:_ ->
             Ncol = incr_column(Col, length(Ncs)),
             scan_error({illegal,float}, Line, Col, Line, Ncol, Cs)
     end.
 
+skip_comment_fun(Cs, #erl_scan{}=St, Line, Col, Toks, N) ->
+    skip_comment(Cs, St, Line, Col, Toks, N).
+
 skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) ->
     case ?UNICODE(C) of
         true ->
@@ -1105,11 +1170,15 @@ skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) ->
             scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
     end;
 skip_comment([]=Cs, _St, Line, Col, Toks, N) ->
-    {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}};
+    {more,{Cs,Col,Toks,Line,N,fun skip_comment_fun/6}};
 skip_comment(Cs, St, Line, Col, Toks, N) ->
     scan1(Cs, St, Line, incr_column(Col, N), Toks).
 
-scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) ->
+scan_comment_fun(Cs, #erl_scan{}=St, Line, Col, Toks, Ncs) ->
+    scan_comment(Cs, St, Line, Col, Toks, Ncs).
+
+scan_comment([C|Cs], St, Line, Col, Toks, Ncs)
+  when C =/= $\n, ?CHAR(C) ->
     case ?UNICODE(C) of
         true ->
             scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]);
@@ -1118,33 +1187,33 @@ scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) ->
             scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
     end;
 scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) ->
-    {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}};
+    {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment_fun/6}};
 scan_comment(Cs, St, Line, Col, Toks, Ncs0) ->
     Ncs = lists:reverse(Ncs0),
     tok3(Cs, St, Line, Col, Toks, comment, Ncs, Ncs).
 
 tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P) ->
     scan1(Cs, St, Line, Col, [{P,anno(Line)}|Toks]);
-tok2(Cs, St, Line, Col, Toks, Wcs, P) ->
+tok2(Cs, #erl_scan{}=St, Line, Col, Toks, Wcs, P) ->
     Anno = anno(Line, Col, St, Wcs),
     scan1(Cs, St, Line, incr_column(Col, length(Wcs)), [{P,Anno}|Toks]).
 
 tok2(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, _Wcs, P, _N) ->
     scan1(Cs, St, Line, Col, [{P,anno(Line)}|Toks]);
-tok2(Cs, St, Line, Col, Toks, Wcs, P, N) ->
+tok2(Cs, #erl_scan{}=St, Line, Col, Toks, Wcs, P, N) ->
     Anno = anno(Line, Col, St, Wcs),
     scan1(Cs, St, Line, incr_column(Col, N), [{P,Anno}|Toks]).
 
 tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item, _S, Sym) ->
     scan1(Cs, St, Line, Col, [{Item,anno(Line),Sym}|Toks]);
-tok3(Cs, St, Line, Col, Toks, Item, String, Sym) ->
+tok3(Cs, #erl_scan{}=St, Line, Col, Toks, Item, String, Sym) ->
     Token = {Item,anno(Line, Col, St, String),Sym},
     scan1(Cs, St, Line, incr_column(Col, length(String)), [Token|Toks]).
 
 tok3(Cs, #erl_scan{text = false}=St, Line, no_col=Col, Toks, Item,
      _String, Sym, _Length) ->
     scan1(Cs, St, Line, Col, [{Item,anno(Line),Sym}|Toks]);
-tok3(Cs, St, Line, Col, Toks, Item, String, Sym, Length) ->
+tok3(Cs, #erl_scan{}=St, Line, Col, Toks, Item, String, Sym, Length) ->
     Token = {Item,anno(Line, Col, St, String),Sym},
     scan1(Cs, St, Line, incr_column(Col, Length), [Token|Toks]).
 
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index ee8bc8420f..6c4694bebe 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -676,6 +676,8 @@ illegal() ->
 
 crashes() ->
     {'EXIT',_} = (catch {foo, erl_scan:string([-1])}), % type error
+    {'EXIT',_} = (catch erl_scan:string("'a" ++ [999999999] ++ "c'")),
+
     {'EXIT',_} = (catch {foo, erl_scan:string("$"++[-1])}),
     {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[-1])}),
     {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[-1])}),
@@ -698,6 +700,7 @@ crashes() ->
          (catch {foo, erl_scan:string("% foo"++[a],{1,1})}),
 
     {'EXIT',_} = (catch {foo, erl_scan:string([3.0])}), % type error
+    {'EXIT',_} = (catch {foo, erl_scan:string("A" ++ [999999999])}),
 
     ok.
 
@@ -867,8 +870,8 @@ unicode() ->
         erl_scan:string([1089]),
     {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
         erl_scan:string([1089], {1,1}),
-    {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
-        erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}),
+    {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
+        erl_scan:string([16#D800], {1,1}),
 
     test("\"a"++[1089]++"b\""),
     {error,{1,erl_scan,{illegal,character}},1} =
-- 
2.35.3

Places

File 5861-erl_scan-Stricten-tests-for-integers.patch of Package erlang

Places