diff --git a/lib/Misc/URL.php b/lib/Misc/URL.php index 0b0d8d4..a4908d4 100644 --- a/lib/Misc/URL.php +++ b/lib/Misc/URL.php @@ -11,7 +11,7 @@ namespace JKingWeb\Arsse\Misc; */ class URL { - /** Normalizes an absolute URL + /** Normalizes a URL * * Normalizations performed are: * @@ -27,32 +27,35 @@ class URL { * * It does NOT drop trailing slashes from paths, nor does it perform Unicode normalization or context-aware percent-encoding normalization * - * @param string $url The URL to normalize. Relative URLs are returned unchanged + * @param string $url The URL to normalize * @param string $u Username to add to the URL, replacing any existing credentials * @param string $p Password to add to the URL, if a username is specified */ public static function normalize(string $url, string $u = null, string $p = null): string { extract(parse_url($url)); - if (!isset($scheme) || !isset($host) || !strlen($host)) { - return $url; + $out = ""; + if (isset($scheme)) { + $out .= strtolower($scheme).":"; } - $out = strtolower($scheme)."://"; - if (strlen($u ?? "")) { - $out .= self::normalizeEncoding(rawurlencode($u)); - if (strlen($p ?? "")) { - $out .= ":".self::normalizeEncoding(rawurlencode($p)); - } - $out .= "@"; - } elseif (strlen($user ?? "")) { - $out .= self::normalizeEncoding($user); - if (strlen($pass ?? "")) { - $out .= ":".self::normalizeEncoding($pass); + if (isset($host)) { + $out .= "//"; + if (strlen($u ?? "")) { + $out .= self::normalizeEncoding(rawurlencode($u)); + if (strlen($p ?? "")) { + $out .= ":".self::normalizeEncoding(rawurlencode($p)); + } + $out .= "@"; + } elseif (strlen($user ?? "")) { + $out .= self::normalizeEncoding($user); + if (strlen($pass ?? "")) { + $out .= ":".self::normalizeEncoding($pass); + } + $out .= "@"; } - $out .= "@"; + $out .= self::normalizeHost($host); + $out .= isset($port) ? ":$port" : ""; } - $out .= self::normalizeHost($host); - $out .= isset($port) ? ":$port" : ""; - $out .= self::normalizePath($path ?? ""); + $out .= self::normalizePath($path ?? "", isset($host)); if (isset($query) && strlen($query)) { $out .= "?".self::normalizeEncoding($query); } @@ -114,8 +117,10 @@ class URL { } /** Normalizes the whole path segment to remove empty segments and relative segments */ - protected static function normalizePath(string $path): string { + protected static function normalizePath(string $path, bool $hasHost): string { $parts = explode("/", self::normalizeEncoding($path)); + $absolute = ($hasHost || $path[0] === "/"); + $index = (substr($path, -1) === "/"); $out = []; foreach($parts as $p) { switch ($p) { @@ -129,6 +134,8 @@ class URL { $out[] = $p; } } - return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : "")); + $out = implode("/", $out); + $out = ($absolute ? "/" : "").$out.($index ? "/" : ""); + return str_replace("//", "/", $out); } } diff --git a/tests/cases/Misc/TestURL.php b/tests/cases/Misc/TestURL.php index 44bfd05..9d06933 100644 --- a/tests/cases/Misc/TestURL.php +++ b/tests/cases/Misc/TestURL.php @@ -21,10 +21,6 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest { public function provideNormalizations() { return [ - ["/", "/"], - ["//example.com/", "//example.com/"], - ["/ ", "/ "], - ["//EXAMPLE.COM/", "//EXAMPLE.COM/"], ["http://example.com/", "http://example.com/"], ["HTTP://example.com/", "http://example.com/"], ["http://example.com", "http://example.com/"], @@ -46,6 +42,7 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest { ["http://user:pass@example.com/", "http://user:pass@example.com/", "", "p"], ["http://example.com/", "http://example.com/", "", "p"], ["http://example.com/path", "http://example.com/path"], + ["http://example.com/PATH", "http://example.com/PATH"], ["http://example.com/path/", "http://example.com/path/"], ["http://example.com/path/.", "http://example.com/path"], ["http://example.com/path/./", "http://example.com/path/"], @@ -69,6 +66,13 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest { ["http://example.com/%", "http://example.com/%25"], ["http://example.com/%a", "http://example.com/%25a"], ["http://example.com/%za", "http://example.com/%25za"], + ["//EXAMPLE.COM/", "//example.com/"], + ["//EXAMPLE.COM/", "//u:p@example.com/", "u", "p"], + ["/ ", "/%20"], + ["/ ", "/%20", "u", "p"], + ["EXAMPLE.COM/", "EXAMPLE.COM/"], + ["EXAMPLE.COM", "EXAMPLE.COM"], + [" ", "%20"], ]; } }