From 1dfbff3c2217af3eaaaec7f8d9ca567b9e105062 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Mon, 11 May 2020 19:45:18 -0400 Subject: [PATCH] Mostly fix host parsing --- lib/Url.php | 90 ++++++++++++++++++++++++--- tests/cases/Util/Url/Psr7TestCase.php | 2 +- 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/lib/Url.php b/lib/Url.php index 1914210..dec16d8 100644 --- a/lib/Url.php +++ b/lib/Url.php @@ -50,6 +50,7 @@ PCRE; protected const IPV6_PATTERN = '/^\[[^\]]+\]$/i'; protected const PORT_PATTERN = '/^\d*$/'; protected const FORBIDDEN_HOST_PATTERN = '/[\x{00}\t\n\r #%\/:\?@\[\]\\\]/'; + protected const FORBIDDEN_OPAQUE_HOST_PATTERN = '/[\x{00}\t\n\r #\/:\?@\[\]\\\]/'; // forbidden host excluding % protected const WINDOWS_AUTHORITY_PATTERN = '/^[\/\\\\]{1,2}[a-zA-Z][:|]$/'; protected const WINDOWS_PATH_PATTERN = '/(?:^|\/)([a-zA-Z])[:|]($|[\/#\?].*)/'; protected const WHITESPACE_CHARS = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20"; @@ -336,7 +337,7 @@ PCRE; if ($this->scheme === "file" && strtolower($value) === "localhost") { $this->host = ""; } else { - $this->host = $this->normalizeHost($value); + $this->host = $this->parseHost($value); } } @@ -464,18 +465,28 @@ PCRE; } } - protected function normalizeHost(?string $host): ?string { + protected function parseHost(?string $host): ?string { if (strlen($host ?? "")) { - if ($host[0] === "[" && $host[-1] === "]") { + if ($host[0] === "[") { + if ($host[-1] !== "]") { + throw new \InvalidArgumentException("Invalid host in URL"); + } // normalize IPv6 addresses - $addr = $this->normalizeIPv6(substr($host, 1, strlen($host) - 2)); + $addr = $this->parseIPv6(substr($host, 1, strlen($host) - 2)); if ($addr !== null) { return "[".$addr."]"; } else { throw new \InvalidArgumentException("Invalid host in URL"); } + } elseif (!$this->specialScheme) { + // simply apply percent-encoding where necessary to hosts for non-special schemes + if (preg_match(self::FORBIDDEN_OPAQUE_HOST_PATTERN, $host)) { + throw new \InvalidArgumentException("Invalid host in URL"); + } + return $this->percentEncode($host, "C0"); } - $idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); + $host = rawurldecode($host); + $idn = $this->parseIPv4($host) ?? idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); if ( $idn === false || preg_match(self::FORBIDDEN_HOST_PATTERN, $idn) @@ -483,14 +494,79 @@ PCRE; ) { throw new \InvalidArgumentException("Invalid host in URL"); } - return $idn; + return strtolower($idn); } elseif ($this->specialScheme && $this->scheme !== "file") { throw new \InvalidArgumentException("Invalid host in URL"); } return $host; } - protected function normalizeIPv6(string $input): ?string { + protected function parseIPv4(string $input) { + // first parse the address; this is a literal implementation of https://url.spec.whatwg.org/#concept-ipv4-parser + assert(strlen($input)); + $input = explode(".", $input); + if ($input[sizeof($input) - 1] === "" && sizeof($input) > 1) { + array_pop($input); + } + if (sizeof($input) > 4) { + return null; + } + $numbers = []; + foreach ($input as $p) { + if ($p === "") { + return null; + } + $result = $this->parseIPv4Number($p); + if (!is_int($result)) { + return null; + } else { + $numbers[] = $result; + } + } + $ipv4 = array_pop($numbers); + $counter = 0; + if ($ipv4 >= 256 ** (5 - (sizeof($numbers) + 1))) { + return false; + } + foreach ($numbers as $n) { + if ($n > 255) { + return false; + } + $ipv4 += $n * 256 ** (3 - $counter); + $counter++; + } + // now re-serialize the address + $out = []; + for ($a = 0; $a < 4; $a++) { + $out[] = $ipv4 % 256; + $ipv4 = floor($ipv4 / 256); + } + return implode(".", array_reverse($out)); + } + + protected function parseIPv4Number(string $n): ?int { + if ($n === "") { + return 0; + } elseif (preg_match("/^0x/i", $n)) { + $n = substr($n, 2); + $r = 16; + } elseif ($n[0] === "0") { + $n = substr($n, 1); + $r = 8; + } else { + $r = 10; + } + if ( + ($r === 10 && preg_match("/[^0-9]/", $n)) + || ($r === 8 && preg_match("/[^0-7]/", $n)) + || ($r === 16 && preg_match("/[^0-9a-fA-F]/", $n)) + ) { + return null; + } + return (int) base_convert($n, $r, 10); + } + + protected function parseIPv6(string $input): ?string { // first parse the address; this is a literal implementation of https://url.spec.whatwg.org/#concept-ipv6-parser $addr = array_fill(0, 8, 0); $pieceIndex = 0; diff --git a/tests/cases/Util/Url/Psr7TestCase.php b/tests/cases/Util/Url/Psr7TestCase.php index 72eed41..93fcf7a 100644 --- a/tests/cases/Util/Url/Psr7TestCase.php +++ b/tests/cases/Util/Url/Psr7TestCase.php @@ -261,8 +261,8 @@ abstract class Psr7TestCase extends TestCase { */ public function testToString($scheme, $user, $pass, $host, $port, $path, $query, $fragment, $expected) { $uri = $this->createUri() - ->withHost($host) ->withScheme($scheme) + ->withHost($host) ->withUserInfo($user, $pass) ->withPort($port) ->withPath($path)