From d4802bcdb6593878b8a58973536b5e74536428d4 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Tue, 27 Aug 2019 15:18:02 -0400 Subject: [PATCH] Handle IDNs While IPv6 address normalization was originally planned, this was deemed too much effort to bother with such a niche feature; IPv6 addresses are instead passed through unmodified --- lib/Misc/URL.php | 22 +++++++--------------- tests/cases/Misc/TestURL.php | 3 +++ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/Misc/URL.php b/lib/Misc/URL.php index c8589b0..da47006 100644 --- a/lib/Misc/URL.php +++ b/lib/Misc/URL.php @@ -24,15 +24,16 @@ class URL { * Normalizations performed are: * * - Lowercasing scheme - * - Lowercasing host names - * - IDN normalization (IDN rather than punycode is returned) - * - IPv6 address normalization + * - Lowercasing ASCII host names + * - IDN normalization * - Resolution of relative path segments * - Discarding empty path segments * - Discarding empty queries * - %-encoding normalization * - Fragment discarding * + * It does NOT perform IPv6 address normalization, nor does it drop trailing slashes from paths + * * @param string $url The URL to normalize. Relative URLs are returned unchanged * @param string $u Username to add to the URL, replacing any existing credentials * @param string $p Password to add to the URL, if a username is specified @@ -56,11 +57,7 @@ class URL { } $out .= "@"; } - if ($host[0] === "[") { - $out .= self::normalizeIPv6($host); - } else { - $out .= self::normalizeHost($host); - } + $out .= self::normalizeHost($host); $out .= isset($port) ? ":$port" : ""; $out .= self::normalizePath($path ?? ""); if (isset($query) && strlen($query)) { @@ -75,13 +72,8 @@ class URL { } protected static function normalizeHost(string $host): string { - // stub - return $host; - } - - protected static function normalizeIPv6(string $addr): string { - // stub - return $addr; + $idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46); + return $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host; } /** Normalizes the whole path segment to remove empty segments and relative segments */ diff --git a/tests/cases/Misc/TestURL.php b/tests/cases/Misc/TestURL.php index b6c95b2..2a8a773 100644 --- a/tests/cases/Misc/TestURL.php +++ b/tests/cases/Misc/TestURL.php @@ -58,6 +58,9 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest { ["http://example.com/a/../b/", "http://example.com/b/"], ["http://example.com/.a/", "http://example.com/.a/"], ["http://example.com/..a/", "http://example.com/..a/"], + ["http://日本.example.com/", "http://日本.example.com/"], + ["http://EXAMPLE.COM/", "http://example.com/"], + ["http://É.example.com/", "http://é.example.com/"], ]; } }