Browse Source

Cleanup and file: scheme fixes

master
J. King 4 years ago
parent
commit
4bd162931c
  1. 73
      lib/Url.php
  2. 2
      tests/cases/Util/Url/UrlTest.php

73
lib/Url.php

@ -54,6 +54,7 @@ PCRE;
protected const IPV6_PATTERN = '/^\[[^\]]+\]$/i';
protected const PORT_PATTERN = '/^\d*$/';
protected const FORBIDDEN_HOST_PATTERN = '/[\x{00}\t\n\r #%\/:\?@\[\]\\\]/';
protected const WINDOWS_PATH_PATTERN = '/(?:^|\/)([a-zA-Z])[:|]($|[\/#\?].*)/';
protected const WHITESPACE_CHARS = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20";
protected const PERCENT_ENCODE_SETS = [
'C0' => "",
@ -62,12 +63,6 @@ PCRE;
'userinfo' => " \"<>`?#{}/:;=@[\]^|",
'query' => " \"<>#", // single-quote as well if scheme is special
];
protected const ESCAPE_CHARS = [
'user' => [":", "@", "/", "?", "#"],
'pass' => [":", "@", "/", "?", "#"],
'path' => ["?", "#"],
'query' => ["#"],
];
protected const SPECIAL_SCHEMES = [
'ftp' => 21,
'file' => null,
@ -101,13 +96,17 @@ PCRE;
public function __construct(string $url, string $baseUrl = null) {
$url = str_replace(["\t", "\n", "\r"], "", trim($url, self::WHITESPACE_CHARS));
$base = null;
reprocess:
if (preg_match(self::URI_PATTERN, $url, $match)) {
[$url, $scheme, $authority, $path, $query, $fragment] = array_pad($match, 6, "");
if (!$base && $baseUrl && ($scheme || substr($authority, 0, 2) !== "//")) {
$base = new static($baseUrl);
}
$this->setScheme($scheme);
if ($authority && !in_array($authority[1] ?? "", ["/", "\\"])) {
// the URI is something like x:/example.com/
if ($baseUrl && ($base = new static($baseUrl)) && $this->scheme === $base->scheme && !$base->isUrn()) {
if ($base && $this->scheme === $base->scheme && !$base->isUrn()) {
// URI is a relative URL; add authority to path instead
$path = $authority.$path;
$authority = "";
@ -125,7 +124,7 @@ PCRE;
}
} elseif ($scheme && !$authority) {
// the URI is something like x:example.com/
if ($baseUrl && ($base = new static($baseUrl)) && $this->scheme === $base->scheme && !$base->isUrn()) {
if ($base && $this->scheme === $base->scheme && !$base->isUrn()) {
// URI is a relative URL; continue processing
} elseif ($this->scheme === "file") {
// URI is an absolute file: URL; add the authority delimiter and default authority to the URL and reprocess
@ -155,7 +154,7 @@ PCRE;
$this->setPort($match[2] ?? "");
}
}
if (!$scheme && $baseUrl) {
if (!$scheme && $base) {
// the effective URL scheme must be known to correctly process the path
$base = $base ?? new static($baseUrl);
$this->setScheme($base->scheme);
@ -167,12 +166,9 @@ PCRE;
if ($fragment) {
$this->setFragment(substr($fragment, 1));
}
if ((!$this->scheme || ($this->host === null && $this->specialScheme)) && strlen($baseUrl ?? "")) {
if ((!$scheme || ($this->host === null && $this->specialScheme)) && strlen($baseUrl ?? "")) {
$this->resolve($base ?? new static($baseUrl));
}
if ($this->scheme === "file" && !($this->host === "" || $this->host === "localhost")) {
throw new \InvalidArgumentException("Invalid authority for file: scheme");
}
} else {
throw new \InvalidArgumentException("String is not a valid URI");
}
@ -321,6 +317,8 @@ PCRE;
protected function setPort(string $value): void {
if (!strlen($value)) {
$this->port = null;
} elseif ($this->scheme === "file") {
throw new \InvalidArgumentException("Port in file: scheme must always be null");
} elseif (preg_match(self::PORT_PATTERN, (string) $value) && (int) $value <= 0xFFFF) {
$value = (int) $value;
if ($this->specialScheme && $value === self::SPECIAL_SCHEMES[$this->scheme]) {
@ -337,6 +335,9 @@ PCRE;
if ($this->specialScheme) {
$value = str_replace("\\", "/", $value);
}
if ($this->scheme === "file" && preg_match(self::WINDOWS_PATH_PATTERN, $value, $match)) {
$value = "/".$match[1].":".$match[2];
}
$this->path = $this->percentEncode($value, $this->isUrn() ? "C0" : "path");
}
@ -405,52 +406,6 @@ PCRE;
}
}
protected function normalizeEncoding(string $data, string $part = null): string {
$pos = 0;
$end = strlen($data);
$out = "";
$esc = self::ESCAPE_CHARS[$part] ?? [];
// process each character in sequence
while ($pos < $end) {
$c = $data[$pos];
if ($c === "%") {
// the % character signals an encoded character...
$d = substr($data, $pos + 1, 2);
if (!preg_match("/^[0-9a-fA-F]{2}$/", $d)) {
// unless there are fewer than two characters left in the string or the two characters are not hex digits
$d = ord($c);
} else {
$d = hexdec($d);
$pos += 2;
}
} else {
$d = ord($c);
}
$dc = chr($d);
if ($d < 0x21 || $d > 0x7E || $d == 0x25) {
// these characters are always encoded
$out .= "%".strtoupper(dechex($d));
} elseif (preg_match("/[a-zA-Z0-9\._~-]/", $dc)) {
// these characters are never encoded
$out .= $dc;
} else {
// these characters are passed through as-is...
if ($c === "%") {
$out .= "%".strtoupper(dechex($d));
} else {
// unless the part we're processing has delimiters which must be escaped
if (in_array($dc, $esc)) {
$out .= "%".strtoupper(dechex($d));
} else {
$out .= $c;
}
}
}
$pos++;
}
return $out;
}
/** Normalizes a hostname per IDNA:2008 */
protected function normalizeHost(?string $host): ?string {
if (strlen($host ?? "")) {

2
tests/cases/Util/Url/UrlTest.php

@ -18,7 +18,7 @@ class UrlTest extends Psr7TestCase {
}
/** @dataProvider provideStandardParsingTests */
public function xtestParsePerWhatwgRules(string $input, string $base, ?string $exp): void {
public function testParsePerWhatwgRules(string $input, string $base, ?string $exp): void {
if (in_array($input, self::INCOMPLETE_STD_INPUT)) {
$this->markTestIncomplete();
}

Loading…
Cancel
Save