Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:24
erlang
3421-stdlib-add-quote-unquote-functions-in-uri_...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 3421-stdlib-add-quote-unquote-functions-in-uri_string.patch of Package erlang
From 3d980d91c8fc81b02cd5cf85595bbf569ff9652e Mon Sep 17 00:00:00 2001 From: Jakub Witczak <kuba@erlang.org> Date: Wed, 9 Feb 2022 16:34:37 +0100 Subject: [PATCH 1/2] stdlib: add quote, unquote functions in uri_string - functions for percent encoding and decoding user data - intended as utility functions for preparing URI components --- lib/common_test/test/erl2html2_SUITE.erl | 4 +- lib/inets/src/http_lib/http_uri.erl | 4 +- lib/inets/src/http_server/httpd_util.erl | 11 +- lib/inets/src/inets_app/inets.app.src | 2 +- lib/stdlib/doc/src/uri_string.xml | 79 ++++++++++++- lib/stdlib/doc/src/uri_string_usage.xml | 11 +- lib/stdlib/src/uri_string.erl | 30 ++++- lib/stdlib/test/uri_string_SUITE.erl | 139 ++++++++++++++++++++++- 8 files changed, 258 insertions(+), 22 deletions(-) diff --git a/lib/common_test/test/erl2html2_SUITE.erl b/lib/common_test/test/erl2html2_SUITE.erl index b2336ff0bc..bac028350a 100644 --- a/lib/common_test/test/erl2html2_SUITE.erl +++ b/lib/common_test/test/erl2html2_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2012-2017. All Rights Reserved. +%% Copyright Ericsson AB 2012-2022. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -262,7 +262,7 @@ sax_event({startElement,_Uri,"a",_QN,Attrs},{Exports,LastExprFuncs,PrevLine}) -> {match,[FStr,EndStr]} = re:run(Name,"^(.*)-(last_expr|[0-9]+)$", [{capture,all_but_first,list}]), - F = list_to_atom(http_uri:decode(FStr)), + F = list_to_atom(uri_string:unquote(FStr)), case EndStr of "last_expr" -> true = lists:member(F,LastExprFuncs), diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl index 5799163271..5795280635 100644 --- a/lib/inets/src/http_lib/http_uri.erl +++ b/lib/inets/src/http_lib/http_uri.erl @@ -64,8 +64,8 @@ -deprecated({parse, 1, "use uri_string functions instead"}). -deprecated({parse, 2, "use uri_string functions instead"}). --deprecated({encode, 1, "use uri_string functions instead"}). --deprecated({decode, 1, "use uri_string functions instead"}). +-deprecated({encode, 1, "use uri_string:quote function instead"}). +-deprecated({decode, 1, "use uri_string:unquote function instead"}). -deprecated({scheme_defaults, 0, "use uri_string functions instead"}). diff --git a/lib/inets/src/http_server/httpd_util.erl b/lib/inets/src/http_server/httpd_util.erl index 70d5e776bb..c1f273af79 100644 --- a/lib/inets/src/http_server/httpd_util.erl +++ b/lib/inets/src/http_server/httpd_util.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2021. All Rights Reserved. +%% Copyright Ericsson AB 1997-2022. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -398,10 +398,11 @@ month(12) -> "Dec". %% decode_hex decode_hex(URI) -> - http_uri:decode(URI). + uri_string:unquote(URI). encode_hex(URI) -> - http_uri:encode(URI). + SafeChars = "!$()*", %% characters not encoded by deprecated http_uri:encode/1 + uri_string:quote(URI, SafeChars). %% flatlength flatlength(List) -> diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 4fe24e656f..08fc09e150 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>2017</year><year>2020</year> + <year>2017</year><year>2022</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -84,9 +84,14 @@ <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br> <seemfa marker="#dissect_query/1"><c>dissect_query/1</c></seemfa> </item> - <item>Decoding percent-encoded triplets<br></br> + <item>Decoding percent-encoded triplets in URI map or a specific component of URI<br></br> <seemfa marker="#percent_decode/1"><c>percent_decode/1</c></seemfa> </item> + <item>Preparing and retrieving application specific data included in URI components<br></br> + <seemfa marker="#quote/1"><c>quote/1</c></seemfa> + <seemfa marker="#quote/2"><c>quote/2</c></seemfa> + <seemfa marker="#unquote/1"><c>unquote/1</c></seemfa> + </item> </list> <p>There are four different encodings present during the handling of URIs:</p> <list type="bulleted"> @@ -110,6 +115,15 @@ character encoding and it is usually defined by the protocol or surrounding text. This library takes the same assumption, binary and percent-encoding are handled as one configuration unit, they cannot be set to different values.</p> + <p>Quoting functions are intended to be used by URI producing application + during component preparation or retrieval phase to avoid conflicts between + data and characters used in URI syntax. Quoting functions use percent + encoding, but with different rules than for example during execution of + <c>recompose/1</c>. It is user responsibility to provide quoting + functions with application data only and using their output to combine an + URI component.<br></br>Quoting functions can for instance be used for constructing a path + component with a segment containing '/' character which should not collide with + '/' used as general delimiter in path component.</p> </description> <datatypes> @@ -357,6 +371,48 @@ </desc> </func> + <func> + <name name="quote" arity="1" since="OTP 25.0"/> + <fsummary>Percent encode characters out of unreserved set.</fsummary> + <desc> + <p>Replaces characters out of unreserved set with their percent encoded equivalents.</p> + <p>Unreserved characters defined in + <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> are not quoted.</p> + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:quote("SomeId/04").]]></input> +<![CDATA["SomeId%2F04"]]> +2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>).]]></input> +<![CDATA[<<"SomeId%2F04">>]]> + </pre> + + <warning><p>Function is not aware about any URI component context and + should not be used on whole URI. If applied more than once on the same + data, might produce unexpected results.</p></warning> + </desc> + </func> + + <func> + <name name="quote" arity="2" since="OTP 25.0"/> + <fsummary>Percent encode characters out of unreserved set and user defined + safe list.</fsummary> + <desc> + <p>Same as <c>quote/1</c>, but <c><anno>Safe</anno></c> allows user to + provide a list of characters to be protected from encoding.</p> + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:quote("SomeId/04", "/").]]></input> +<![CDATA["SomeId/04"]]> +2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>, "/").]]></input> +<![CDATA[<<"SomeId/04">>]]> + </pre> + + <warning><p>Function is not aware about any URI component context and + should not be used on whole URI. If applied more than once on the same + data, might produce unexpected results.</p></warning> + </desc> + </func> + <func> <name name="recompose" arity="1" since="OTP 21.0"/> <fsummary>Recompose URI.</fsummary> @@ -445,5 +501,24 @@ </desc> </func> + <func> + <name name="unquote" arity="1" since="OTP 25.0"/> + <fsummary>Percent decode characters.</fsummary> + <desc> + <p>Percent decode characters.</p> + + <p><em>Example:</em></p> + <pre> +1> <input><![CDATA[uri_string:unquote("SomeId%2F04").]]></input> +<![CDATA["SomeId/04"]]> +2> <input><![CDATA[uri_string:unquote(<<"SomeId%2F04">>).]]></input> +<![CDATA[<<"SomeId/04">>]]> + </pre> + + <warning><p>Function is not aware about any URI component context and + should not be used on whole URI. If applied more than once on the same + data, might produce unexpected results.</p></warning> + </desc> + </func> </funcs> </erlref> diff --git a/lib/stdlib/doc/src/uri_string_usage.xml b/lib/stdlib/doc/src/uri_string_usage.xml index 72851096b7..31e2c23c45 100644 --- a/lib/stdlib/doc/src/uri_string_usage.xml +++ b/lib/stdlib/doc/src/uri_string_usage.xml @@ -4,8 +4,7 @@ <chapter> <header> <copyright> - <year>2020</year> - <year>2020</year> + <year>2022</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -294,12 +293,14 @@ and it happens when converting a <seetype marker="uri_string#uri_map"><c>uri_map()</c></seetype> into a <seetype marker="uri_string#uri_string"><c>uri_string()</c></seetype>. - There is no equivalent to a raw percent-encoding function as percent-encoding - shall be applied on the component level using different sets of allowed characters. - Applying percent-encoding directly on an input URI would not be safe just as in + Applying any percent-encoding directly on an input URI would not be safe just as in the case of <seemfa marker="uri_string#percent_decode/1"><c>uri_string:percent_decode/1</c></seemfa>, the output could be an invalid URI. + Quoting functions allow users to perform raw percent encoding and decoding + on application data which cannot be handled automatically by + <c>uri_string:recompose/1</c>. For example in scenario when user would + need to use '/' or sub-delimeter as data rather than delimeter in a path component. </p> </note> </section> diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 05b66c12d6..dbbb835036 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2017-2020. All Rights Reserved. +%% Copyright Ericsson AB 2017-2022. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -237,7 +237,10 @@ recompose/1, resolve/2, resolve/3, - transcode/2]). + transcode/2, + quote/1, + quote/2, + unquote/1]). -export_type([error/0, uri_map/0, uri_string/0]). @@ -518,6 +521,29 @@ percent_decode(URI) when is_list(URI) orelse is_binary(URI) -> raw_decode(URI). +-spec quote(Data) -> QuotedData when + Data :: unicode:chardata(), + QuotedData :: unicode:chardata(). +quote(D) -> + encode(D, fun is_unreserved/1). + +-spec quote(Data, Safe) -> QuotedData when + Data :: unicode:chardata(), + Safe :: string(), + QuotedData :: unicode:chardata(). +quote(D, Safe) -> + UnreservedOrSafe = + fun(C) -> + is_unreserved(C) orelse lists:member(C, Safe) + end, + encode(D, UnreservedOrSafe). + +-spec unquote(QuotedData) -> Data when + QuotedData :: unicode:chardata(), + Data :: unicode:chardata(). +unquote(D) -> + raw_decode(D). + %%------------------------------------------------------------------------- %% Functions for working with the query part of a URI as a list %% of key/value pairs. diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 04d7a23eab..6a6bff2688 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2020. All Rights Reserved. +%% Copyright Ericsson AB 2008-2022. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ -module(uri_string_SUITE). -include_lib("common_test/include/ct.hrl"). +-compile({nowarn_deprecated_function, [{http_uri, encode, 1}]}). +-compile({nowarn_deprecated_function, [{http_uri, decode, 1}]}). -export([all/0, suite/0,groups/0, normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1, @@ -53,7 +55,8 @@ interop_query_latin1/1, interop_query_utf8/1, regression_parse/1, regression_recompose/1, regression_normalize/1, recompose_host_relative_path/1, - recompose_host_absolute_path/1 + recompose_host_absolute_path/1, + quote/1 ]). @@ -148,7 +151,8 @@ all() -> regression_recompose, regression_normalize, recompose_host_relative_path, - recompose_host_absolute_path + recompose_host_absolute_path, + quote ]. groups() -> @@ -1354,3 +1358,132 @@ recompose_host_absolute_path(_Config) -> path => [<<"/f">>,<<"oo">>]}), ok. +%%------------------------------------------------------------------------- +%% Quote tests +%%------------------------------------------------------------------------- +quote(_Config) -> + TestQuote = + fun(Unquoted, Quoted) -> + Quoted = uri_string:quote(Unquoted) + end, + + [TestQuote(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()], + [TestQuote(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()], + + Head = fun([H | _]) -> H; + (<<H, _/binary>>) -> H + end, + + TestQuoteUnquote = + fun(Unquoted) -> + %% case below should be removed when functions used are removed + case Head(Unquoted) =< 127 of + true -> + Unquoted = http_uri:decode(http_uri:encode(Unquoted)); + _ -> + ok + end, + Unquoted = uri_string:unquote(uri_string:quote(Unquoted)) + end, + [TestQuoteUnquote(U) || #{unquoted := U} <- get_quote_data()], + [TestQuoteUnquote(U) || #{unquoted_b := U} <- get_quote_data()], + + TestQuoteWithSafeList = + fun(Unquoted, Quoted) -> + Safe = "!$()*", %% characters not encoded by old http_uri:encode + Result = uri_string:quote(Unquoted, Safe), + %% case below should be removed when function used are removed + case Head(Unquoted) =< 127 of + true -> + Result = http_uri:encode(Unquoted); + _ -> + ok + end, + case lists:member(Head(Unquoted), Safe) of + true -> + Unquoted = Result; + false -> + Quoted = Result + end + end, + [TestQuoteWithSafeList(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()], + [TestQuoteWithSafeList(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()], + + + ComposePath = fun (PathSegments, Safe) -> + lists:join("/", [uri_string:quote(S, Safe) || + S <- PathSegments]) + end, + + %% / used as data see GH-5368 + ExampleURI1 = "https://internal.api.com/devices/Ethernet0%2F4", + ExampleURI1 = uri_string:recompose( + #{scheme => "https", + host => "internal.api.com", + path => ComposePath(["devices", "Ethernet0/4"], + "")}), + + %% sub-delims as data + %% in this example ComposePath must treat sub-delims and '%' as safe character + %% to avoid re-encoding encoded characters + ExampleURI2 = "yeti://localhost/folder/file.txt,version=1%2C1", + ExampleURI2 = uri_string:recompose( + #{scheme => "yeti", + host => "localhost", + path => ComposePath(["folder", "file.txt,version=" ++ + uri_string:quote("1,1")], + ",=%")}), + + %% percent character as data + ExampleURI3 = "yeti://localhost/folder/file_with_%25.txt", + ExampleURI3 = uri_string:recompose( + #{scheme => "yeti", + host => "localhost", + path => ComposePath(["folder", "file_with_" ++ + uri_string:quote("%") ++ ".txt"], + "%")}), + ok. + +get_quote_data() -> + [%% reserved/gen-delims + #{unquoted => ":", quoted => "%3A", unquoted_b =><<":">>, quoted_b=> <<"%3A">>}, + #{unquoted => "/", quoted => "%2F", unquoted_b =><<"/">>, quoted_b=> <<"%2F">>}, + #{unquoted => "?", quoted => "%3F", unquoted_b =><<"?">>, quoted_b=> <<"%3F">>}, + #{unquoted => "#", quoted => "%23", unquoted_b =><<"#">>, quoted_b=> <<"%23">>}, + #{unquoted => "[", quoted => "%5B", unquoted_b =><<"[">>, quoted_b=> <<"%5B">>}, + #{unquoted => "]", quoted => "%5D", unquoted_b =><<"]">>, quoted_b=> <<"%5D">>}, + #{unquoted => "@", quoted => "%40", unquoted_b =><<"@">>, quoted_b=> <<"%40">>}, + %% reserved/sub-delims + #{unquoted => "!", quoted => "%21", unquoted_b =><<"!">>, quoted_b=> <<"%21">>}, + #{unquoted => "$", quoted => "%24", unquoted_b =><<"$">>, quoted_b=> <<"%24">>}, + #{unquoted => "&", quoted => "%26", unquoted_b =><<"&">>, quoted_b=> <<"%26">>}, + #{unquoted => "'", quoted => "%27", unquoted_b =><<"'">>, quoted_b=> <<"%27">>}, + #{unquoted => "(", quoted => "%28", unquoted_b =><<"(">>, quoted_b=> <<"%28">>}, + #{unquoted => ")", quoted => "%29", unquoted_b =><<")">>, quoted_b=> <<"%29">>}, + #{unquoted => "*", quoted => "%2A", unquoted_b =><<"*">>, quoted_b=> <<"%2A">>}, + #{unquoted => "+", quoted => "%2B", unquoted_b =><<"+">>, quoted_b=> <<"%2B">>}, + #{unquoted => ",", quoted => "%2C", unquoted_b =><<",">>, quoted_b=> <<"%2C">>}, + #{unquoted => ";", quoted => "%3B", unquoted_b =><<";">>, quoted_b=> <<"%3B">>}, + #{unquoted => "=", quoted => "%3D", unquoted_b =><<"=">>, quoted_b=> <<"%3D">>}, + %% other not unreserved + #{unquoted => "<", quoted => "%3C", unquoted_b =><<"<">>, quoted_b=> <<"%3C">>}, + #{unquoted => ">", quoted => "%3E", unquoted_b =><<">">>, quoted_b=> <<"%3E">>}, + #{unquoted => "\"", quoted => "%22", unquoted_b =><<"\"">>, quoted_b=> <<"%22">>}, + #{unquoted => "{", quoted => "%7B", unquoted_b =><<"{">>, quoted_b=> <<"%7B">>}, + #{unquoted => "}", quoted => "%7D", unquoted_b =><<"}">>, quoted_b=> <<"%7D">>}, + #{unquoted => "|", quoted => "%7C", unquoted_b =><<"|">>, quoted_b=> <<"%7C">>}, + #{unquoted => "\\", quoted => "%5C", unquoted_b =><<"\\">>, quoted_b=> <<"%5C">>}, + #{unquoted => "^", quoted => "%5E", unquoted_b =><<"^">>, quoted_b=> <<"%5E">>}, + #{unquoted => "%", quoted => "%25", unquoted_b =><<"%">>, quoted_b=> <<"%25">>}, + #{unquoted => " ", quoted => "%20", unquoted_b =><<" ">>, quoted_b=> <<"%20">>}, + %% non-ASCII + #{unquoted => "örebro", quoted => "%C3%B6rebro", + unquoted_b =><<"örebro"/utf8>>, quoted_b=> <<"%C3%B6rebro">>}, + #{unquoted => "Łódź", quoted => "%C5%81%C3%B3d%C5%BA", + unquoted_b =><<"Łódź"/utf8>>, quoted_b=> <<"%C5%81%C3%B3d%C5%BA">>}, + %% unreserved non alpha, non digit characters + #{unquoted => "-", quoted => "-", unquoted_b =><<"-">>, quoted_b=> <<"-">>}, + #{unquoted => ".", quoted => ".", unquoted_b =><<".">>, quoted_b=> <<".">>}, + #{unquoted => "_", quoted => "_", unquoted_b =><<"_">>, quoted_b=> <<"_">>}, + #{unquoted => "~", quoted => "~", unquoted_b =><<"~">>, quoted_b=> <<"~">>} + ]. -- 2.34.1
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor