Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:24
erlang
2291-Regular-expression-replacement-with-a-func...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2291-Regular-expression-replacement-with-a-function.patch of Package erlang
From 94191a89750f06474c69305c56b11b36ed369999 Mon Sep 17 00:00:00 2001 From: Jan Uhlig <juhlig@hnc-agency.org> Date: Wed, 24 Aug 2022 12:25:20 +0200 Subject: [PATCH] Regular expression replacement with a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this change, `re:replace/3,4` also accepts a function for the Replacement argument, for cases when more complex processing is required to generate a replacement. The given function will be called with the complete match and a list of subexpression matches as arguments, and the returned value will be inserted into the result. Co-authored-by: Maria Scott <maria-12648430@hnc-agency.org> Co-authored-by: Björn Gustavsson <bgustavsson@gmail.com> --- lib/stdlib/doc/src/re.xml | 40 ++++++++++++++++++++++++++++++++---- lib/stdlib/src/re.erl | 34 ++++++++++++++++++++++-------- lib/stdlib/test/re_SUITE.erl | 14 +++++++++++-- 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/lib/stdlib/doc/src/re.xml b/lib/stdlib/doc/src/re.xml index e16ef12f16..43bdb142f4 100644 --- a/lib/stdlib/doc/src/re.xml +++ b/lib/stdlib/doc/src/re.xml @@ -75,6 +75,9 @@ <datatype> <name name="compile_option"/> </datatype> + <datatype> + <name name="replace_fun"/> + </datatype> </datatypes> <funcs> @@ -363,7 +366,7 @@ elements with Replacement.</fsummary> <desc> <p>Replaces the matched part of the <c><anno>Subject</anno></c> string - with the contents of <c><anno>Replacement</anno></c>.</p> + with <c><anno>Replacement</anno></c>.</p> <p>The permissible options are the same as for <seemfa marker="#run/3"><c>run/3</c></seemfa>, except that option<c> capture</c> is not allowed. Instead a <c>{return, @@ -378,8 +381,8 @@ <c>unicode</c> compilation option is specified to this function, both the regular expression and <c><anno>Subject</anno></c> are to specified as valid Unicode <c>charlist()</c>s.</p> - <p>The replacement string can contain the special character - <c>&</c>, which inserts the whole matching expression in the + <p>If the replacement is given as a string, it can contain the special + character <c>&</c>, which inserts the whole matching expression in the result, and the special sequence <c>\</c>N (where N is an integer > 0), <c>\g</c>N, or <c>\g{</c>N<c>}</c>, resulting in the subexpression number N, is inserted in the result. If no subexpression with that @@ -401,6 +404,35 @@ re:replace("abcd","c","[\\&]",[{return,list}]).</code> <p>gives</p> <code> "ab[&]d"</code> + <p>If the replacement is given as a fun, it will be called with the + whole matching expression as the first argument and a list of subexpression + matches in the order in which they appear in the regular expression. + The returned value will be inserted in the result.</p> + <p><em>Example:</em></p> + <code> +re:replace("abcd", ".(.)", fun(Whole, [<<C>>]) -> <<$#, Whole/binary, $-, (C - $a + $A), $#>> end, [{return, list}]).</code> + <p>gives</p> + <code> +"#ab-B#cd"</code> + <note> + <p>Non-matching optional subexpressions will not be included in the list + of subexpression matches if they are the last subexpressions in the + regular expression.</p> + <p><em>Example:</em></p> + <p>The regular expression <c>"(a)(b)?(c)?"</c> ("a", optionally followed + by "b", optionally followed by "c") will create the following subexpression + lists:</p> + <list> + <item><c>[<<"a">>, <<"b">>, <<"c">>]</c> + when applied to the string <c>"abc"</c></item> + <item><c>[<<"a">>, <<>>, <<"c">>]</c> + when applied to the string <c>"acx"</c></item> + <item><c>[<<"a">>, <<"b">>]</c> + when applied to the string <c>"abx"</c></item> + <item><c>[<<"a">>]</c> + when applied to the string <c>"axx"</c></item> + </list> + </note> <p>As with <c>run/3</c>, compilation errors raise the <c>badarg</c> exception. <seemfa marker="#compile/2"><c>compile/2</c></seemfa> can be used to get more information about the error.</p> @@ -972,7 +1004,7 @@ re:run("ABCabcdABC",".*(?<FOO>abcd).*",[{capture,['FOO']}]).</code> <p>Here the empty binary (<c><<>></c>) represents the unassigned subpattern. In the <c>binary</c> case, some information about the matching is therefore lost, as - <c><<>></c> can + <c><<>></c> can also be an empty string captured.</p> <p>If differentiation between empty matches and non-existing subpatterns is necessary, use the <c>type</c> <c>index</c> and do diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 863bbeb652..3a3eca8f44 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -31,6 +31,8 @@ | bsr_anycrlf | bsr_unicode | no_start_optimize | ucp | never_utf. +-type replace_fun() :: fun((binary(), [binary()]) -> iodata() | unicode:charlist()). + %%% BIFs -export([internal_run/4]). @@ -353,7 +355,7 @@ compile_split(_,_) -> -spec replace(Subject, RE, Replacement) -> iodata() | unicode:charlist() when Subject :: iodata() | unicode:charlist(), RE :: mp() | iodata(), - Replacement :: iodata() | unicode:charlist(). + Replacement :: iodata() | unicode:charlist() | replace_fun(). replace(Subject,RE,Replacement) -> try @@ -366,7 +368,7 @@ replace(Subject,RE,Replacement) -> -spec replace(Subject, RE, Replacement, Options) -> iodata() | unicode:charlist() when Subject :: iodata() | unicode:charlist(), RE :: mp() | iodata() | unicode:charlist(), - Replacement :: iodata() | unicode:charlist(), + Replacement :: iodata() | unicode:charlist() | replace_fun(), Options :: [Option], Option :: anchored | global | notbol | noteol | notempty | notempty_atstart @@ -380,11 +382,11 @@ replace(Subject,RE,Replacement) -> replace(Subject,RE,Replacement,Options) -> try - {NewOpt,Convert} = process_repl_params(Options,iodata), - Unicode = check_for_unicode(RE, Options), - FlatSubject = to_binary(Subject, Unicode), - FlatReplacement = to_binary(Replacement, Unicode), - IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt), + {NewOpt,Convert} = process_repl_params(Options,iodata), + Unicode = check_for_unicode(RE, Options), + FlatSubject = to_binary(Subject, Unicode), + Replacement1 = normalize_replacement(Replacement, Unicode), + IoList = do_replace(FlatSubject,Subject,RE,Replacement1,NewOpt), case Convert of iodata -> IoList; @@ -412,6 +414,10 @@ replace(Subject,RE,Replacement,Options) -> badarg_with_info([Subject,RE,Replacement,Options]) end. +normalize_replacement(Replacement, _Unicode) when is_function(Replacement, 2) -> + Replacement; +normalize_replacement(Replacement, Unicode) -> + to_binary(Replacement, Unicode). do_replace(FlatSubject,Subject,RE,Replacement,Options) -> case re:run(FlatSubject,RE,Options) of @@ -512,7 +518,9 @@ precomp_repl(<<X,Rest/binary>>) -> [<<X,BHead/binary>> | T0]; Other -> [<<X>> | Other] - end. + end; +precomp_repl(Repl) when is_function(Repl) -> + Repl. @@ -540,6 +548,16 @@ do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail]) [NewData | do_mlist(Whole,Rest,Pos+EatLength,Repl,Tail)]. +do_replace(Subject, Repl, SubExprs0) when is_function(Repl) -> + All = binary:part(Subject, hd(SubExprs0)), + SubExprs1 = + [ if + Pos >= 0, Len > 0 -> + binary:part(Subject, Pos, Len); + true -> + <<>> + end || {Pos, Len} <- tl(SubExprs0) ], + Repl(All, SubExprs1); do_replace(_,[Bin],_) when is_binary(Bin) -> Bin; do_replace(Subject,Repl,SubExprs0) -> diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 09a65d8fdd..fc6e977942 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -22,7 +22,7 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, pcre/1,compile_options/1, run_options/1,combined_options/1,replace_autogen/1, - global_capture/1,replace_input_types/1,replace_return/1, + global_capture/1,replace_input_types/1,replace_with_fun/1,replace_return/1, split_autogen/1,split_options/1,split_specials/1, error_handling/1,pcre_cve_2008_2371/1,re_version/1, pcre_compile_workspace_overflow/1,re_infinite_loop/1, @@ -42,7 +42,7 @@ suite() -> all() -> [pcre, compile_options, run_options, combined_options, replace_autogen, global_capture, replace_input_types, - replace_return, split_autogen, split_options, + replace_with_fun, replace_return, split_autogen, split_options, split_specials, error_handling, pcre_cve_2008_2371, pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented, opt_dupnames, opt_all_names, @@ -365,6 +365,16 @@ replace_input_types(Config) when is_list(Config) -> <<"a",208,128,"cd">> = re:replace(<<"abcd">>,"b","\x{400}",[{return,binary},unicode]), ok. +%% Test replace with a replacement function. +replace_with_fun(Config) when is_list(Config) -> + <<"ABCD">> = re:replace("abcd", ".", fun(<<C>>, []) -> <<(C - $a + $A)>> end, [global, {return, binary}]), + <<"AbCd">> = re:replace("abcd", ".", fun(<<C>>, []) when (C - $a) rem 2 =:= 0 -> <<(C - $a + $A)>>; (C, []) -> C end, [global, {return, binary}]), + <<"b-ad-c">> = re:replace("abcd", "(.)(.)", fun(_, [A, B]) -> <<B/binary, $-, A/binary>> end, [global, {return, binary}]), + <<"#ab-B#cd">> = re:replace("abcd", ".(.)", fun(Whole, [<<C>>]) -> <<$#, Whole/binary, $-, (C - $a + $A), $#>> end, [{return, binary}]), + <<"#ab#cd">> = re:replace("abcd", ".(x)?(.)", fun(Whole, [<<>>, _]) -> <<$#, Whole/binary, $#>> end, [{return, binary}]), + <<"#ab#cd">> = re:replace("abcd", ".(.)(x)?", fun(Whole, [_]) -> <<$#, Whole/binary, $#>> end, [{return, binary}]), + ok. + %% Test return options of replace together with global searching. replace_return(Config) when is_list(Config) -> {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")), -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor