From db7d6821966db3db573fe362aabe03f063ae7b0c Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 22 May 2020 12:35:19 -0400 Subject: [PATCH] Use stricter pattern with non -special URIs --- lib/Url.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/Url.php b/lib/Url.php index 687641e..5110c8b 100644 --- a/lib/Url.php +++ b/lib/Url.php @@ -43,6 +43,20 @@ class Url implements UriInterface { (\?[^\#]*)? # query part (\#.*)? # fragment part $>six +PCRE; + protected const STRICT_URI_PATTERN = <<<'PCRE' +<^ +(?: + (?: + ([a-z][a-z0-9\.\-\+]*): | # absolute URI + :?(?=//) # scheme-relative URI + ) + (//?[^/\?\#]*)? # authority part +)? +([^\?\#]*) # path part +(\?[^\#]*)? # query part +(\#.*)? # fragment part +$>six PCRE; protected const HOST_PATTERN = '/^(\[[a-f0-9:\.]*\]|[^:]*)(:[^\/]*)?$/si'; protected const USER_PATTERN = '/^([^:]*)(?::(.*))?$/s'; @@ -97,8 +111,9 @@ PCRE; public function __construct(string $url, string $baseUrl = null) { $url = str_replace(["\t", "\n", "\r"], "", trim($url, self::WHITESPACE_CHARS)); $base = null; + $pattern = self::URI_PATTERN; reprocess: - if (preg_match(self::URI_PATTERN, $url, $match)) { + if (preg_match($pattern, $url, $match)) { [$url, $scheme, $authority, $path, $query, $fragment] = array_pad($match, 6, ""); // if the URI is not unambigously a URL, parse the base URI if (!$base && $baseUrl && (!$scheme || substr($authority, 0, 2) !== "//")) { @@ -106,6 +121,11 @@ PCRE; } // set the scheme; use the base scheme if necessary $this->setScheme($scheme ?: ($base->scheme ?? "")); + // if the scheme is non-special, re-process with a stricter pattern + if (!$this->specialScheme && $pattern !== self::STRICT_URI_PATTERN) { + $pattern = self::STRICT_URI_PATTERN; + goto reprocess; + } // make various checks to see if the authority should actually be the starts of the path if ($authority && !in_array($authority[1] ?? "", ["/", "\\"])) { // the URI is something like x:/example.com/