Browse Source

Handle ports, paths, and credentials correctly

microsub
J. King 5 years ago
parent
commit
dc750acf07
  1. 69
      lib/Misc/URL.php
  2. 46
      tests/cases/Misc/TestURL.php

69
lib/Misc/URL.php

@ -6,7 +6,37 @@
declare(strict_types=1);
namespace JKingWeb\Arsse\Misc;
/**
* A collection of functions for manipulating URLs
*/
class URL {
/** User component */
const P_USER = 1;
/** Password component */
const P_PASS = 2;
/** Path segment component */
const P_PATH = 3;
/** Full query component */
const P_QUERY = 4;
/** Normalizes an absolute URL
*
* Normalizations performed are:
*
* - Lowercasing scheme
* - Lowercasing host names
* - IDN normalization (IDN rather than punycode is returned)
* - IPv6 address normalization
* - Resolution of relative path segments
* - Discarding empty path segments
* - Discarding empty queries
* - %-encoding normalization
* - Fragment discarding
*
* @param string $url The URL to normalize. Relative URLs are returned unchanged
* @param string $u Username to add to the URL, replacing any existing credentials
* @param string $p Password to add to the URL, if a username is specified
*/
public static function normalize(string $url, string $u = null, string $p = null): string {
extract(parse_url($url));
if (!isset($scheme) || !isset($host) || !strlen($host)) {
@ -14,15 +44,15 @@ class URL {
}
$out = strtolower($scheme)."://";
if (strlen($u ?? "")) {
$out .= self::normalizePart($u, self::P_USER, false);
$out .= self::normalizePart(rawurlencode($u), self::P_USER, false);
if (strlen($p ?? "")) {
$out .= ":".self::normalizePart($p, self::P_PASS, false);
$out .= ":".self::normalizePart(rawurlencode($p), self::P_PASS, false);
}
$out .= "@";
} elseif (strlen($username ?? "")) {
$out .= self::normalizePart($username, self::P_USER);
if (strlen($password ?? "")) {
$out .= ":".self::normalizePart($username, self::P_PASS);
} elseif (strlen($user ?? "")) {
$out .= self::normalizePart($user, self::P_USER);
if (strlen($pass ?? "")) {
$out .= ":".self::normalizePart($pass, self::P_PASS);
}
$out .= "@";
}
@ -31,11 +61,8 @@ class URL {
} else {
$out .= self::normalizeHost($host);
}
if (isset($path)) {
$out .= self::normalizePath($path);
} else {
$out .= "/";
}
$out .= isset($port) ? ":$port" : "";
$out .= self::normalizePath($path ?? "");
if (isset($query) && strlen($query)) {
$out .= "?".self::normalizePart($query, self::P_QUERY);
}
@ -57,10 +84,22 @@ class URL {
return $addr;
}
/** Normalizes the whole path segment to remove empty segments and relative segments */
protected static function normalizePath(string $path): string {
// stub
return $path;
$parts = explode("/", $path);
$out = [];
foreach($parts as $p) {
switch ($p) {
case "":
case ".":
break;
case "..":
array_pop($out);
break;
default:
$out[] = self::normalizePart($p, self::P_PATH);
}
}
return str_replace("//", "/", "/".implode("/", $out).(substr($path, -1) === "/" ? "/" : ""));
}
}

46
tests/cases/Misc/TestURL.php

@ -15,17 +15,49 @@ class TestURL extends \JKingWeb\Arsse\Test\AbstractTest {
}
/** @dataProvider provideNormalizations */
public function testNormalizeAUrl(string $in, string $exp) {
$this->assertSame($exp, URL::normalize($in));
public function testNormalizeAUrl(string $url, string $exp, string $user = null, string $pass = null) {
$this->assertSame($exp, URL::normalize($url, $user, $pass));
}
public function provideNormalizations() {
return [
["/", "/"],
["//example.com/", "//example.com/"],
["http://example.com/", "http://example.com/"],
["http://[::1]/", "http://[::1]/"],
["HTTP://example.com/", "http://example.com/"],
["/", "/"],
["//example.com/", "//example.com/"],
["http://[::1]/", "http://[::1]/"],
["http://example.com/", "http://example.com/"],
["HTTP://example.com/", "http://example.com/"],
["http://example.com", "http://example.com/"],
["http://example.com:/", "http://example.com/"],
["HTTP://example.com:80/", "http://example.com:80/"],
["HTTP://example.com:80", "http://example.com:80/"],
["http://example.com/?", "http://example.com/"],
["http://example.com?", "http://example.com/"],
["http://example.com/#fragment", "http://example.com/"],
["http://example.com#fragment", "http://example.com/"],
["http://example.com?#", "http://example.com/"],
["http://example.com/?key=value", "http://example.com/?key=value"],
["http://example.com/", "http://user:pass@example.com/", "user", "pass"],
["http://example.com/", "http://user@example.com/", "user"],
["http://user:pass@example.com/", "http://user:pass@example.com/"],
["http://user@example.com/", "http://user@example.com/"],
["http://user:pass@example.com/", "http://u:p@example.com/", "u", "p"],
["http://user:pass@example.com/", "http://u@example.com/", "u"],
["http://user:pass@example.com/", "http://user:pass@example.com/", "", "p"],
["http://example.com/", "http://example.com/", "", "p"],
["http://example.com/path", "http://example.com/path"],
["http://example.com/path/", "http://example.com/path/"],
["http://example.com/path/.", "http://example.com/path"],
["http://example.com/path/./", "http://example.com/path/"],
["http://example.com/path/..", "http://example.com/"],
["http://example.com/path/../", "http://example.com/"],
["http://example.com/a/b/..", "http://example.com/a"],
["http://example.com/a/b/../", "http://example.com/a/"],
["http://example.com/../", "http://example.com/"],
["http://example.com////", "http://example.com/"],
["http://example.com/a/./b/", "http://example.com/a/b/"],
["http://example.com/a/../b/", "http://example.com/b/"],
["http://example.com/.a/", "http://example.com/.a/"],
["http://example.com/..a/", "http://example.com/..a/"],
];
}
}

Loading…
Cancel
Save