J. King
4 years ago
5 changed files with 362 additions and 3 deletions
@ -0,0 +1,339 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2018 J. King et al. |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace JKingWeb\Lax; |
|||
|
|||
use Psr\Http\Message\UriInterface; |
|||
|
|||
/** Normalized URI representation, compatible with the PSR-7 URI interface |
|||
* |
|||
* The following features are implemented: |
|||
* |
|||
* - The full PSR-7 `UriInterface` interface |
|||
* - Correct handling of both URLs and URNs |
|||
* - Relative URL resolution |
|||
* - Encoding normalization |
|||
* - Scheme normalization |
|||
* - IDNA normalization |
|||
* - IPv6 address normalization |
|||
* - Empty query and fragment removal |
|||
* |
|||
* Some things this class does not do: |
|||
* |
|||
* - Handle non-standard schemes (e.g. ed2k) |
|||
* - Collapse paths |
|||
* - Drop default ports |
|||
* |
|||
* This class should not be used with XML namespace URIs, |
|||
* as the normalizations performed will change the values |
|||
* of some namespaces. |
|||
*/ |
|||
class Url implements UriInterface { |
|||
protected const URI_PATTERN = <<<'PCRE' |
|||
<^ |
|||
(?: |
|||
(?: |
|||
([a-z][a-z0-9\.\-\+]*): | # absolute URI |
|||
:?(?=//) # scheme-relative URI |
|||
) |
|||
(//[^/\?\#]*)? # authority part |
|||
)? |
|||
([^\?\#]*) # path part |
|||
(\?[^\#]*)? # query part |
|||
(\#.*)? # fragment part |
|||
$>six |
|||
PCRE; |
|||
protected const AUTHORITY_PATTERN = <<<'PCRE' |
|||
<^ |
|||
// |
|||
(?: |
|||
([^@:]*) # username part |
|||
(?::([^@]*))? # password part |
|||
@ |
|||
)? |
|||
( |
|||
\[[a-f0-9:]*\] | # IPv6 address |
|||
[^:]+ # domain or IPv4 address |
|||
) |
|||
(?: |
|||
:(\d*) # port part |
|||
)? |
|||
$>six |
|||
PCRE; |
|||
protected const SCHEME_PATTERN = "<^[a-z][a-z0-9\.\-\+]*$>i"; |
|||
protected const PORT_PATTERN = "<^\d+$>"; |
|||
protected const ESCAPE_CHARS = [ |
|||
'user' => [":", "@", "/", "?", "#"], |
|||
'pass' => ["@", "/", "?", "#"], |
|||
'path' => ["?", "#"], |
|||
'query' => ["#"], |
|||
]; |
|||
|
|||
protected $scheme = null; |
|||
protected $host = null; |
|||
protected $port = null; |
|||
protected $user = ""; |
|||
protected $pass = ""; |
|||
protected $path = null; |
|||
protected $query = null; |
|||
protected $fragment = null; |
|||
|
|||
public static function fromUri(UriInterface $uri): self { |
|||
return ($uri instanceof self) ? $uri : new self((string) $uri); |
|||
} |
|||
|
|||
public function __construct(string $url, ?UriInterface $baseUrl = null) { |
|||
if (preg_match(self::URI_PATTERN, $url, $match)) { |
|||
[$url, $scheme, $authority, $path, $query, $fragment] = $match; |
|||
foreach (["scheme", "path", "query", "fragment"] as $part) { |
|||
if (strlen($$part)) { |
|||
if ($part === "query" || $part === "fragment") { |
|||
$$part = substr($$part, 1); |
|||
} |
|||
$this->__set($part, $$part); |
|||
} |
|||
} |
|||
if (strlen($authority)) { |
|||
if (preg_match(self::AUTHORITY_PATTERN, $authority, $match)) { |
|||
[$authority, $user, $pass, $host, $port] = $match; |
|||
foreach (["user", "pass", "host", "port"] as $part) { |
|||
$this->__set($part, $$part); |
|||
} |
|||
} |
|||
} |
|||
if ($baseUrl && !strlen($this->scheme)) { |
|||
$this->resolve(self::fromUri($baseUrl)); |
|||
} |
|||
foreach (["scheme", "path", "query", "fragment"] as $part) { |
|||
$this->$part = $this->$part ?? ""; |
|||
} |
|||
} else { |
|||
throw new \InvalidArgumentException("String is not a valid URI"); |
|||
} |
|||
} |
|||
|
|||
public function getAuthority() { |
|||
$host = $this->getHost(); |
|||
if (strlen($host) > 0) { |
|||
$userInfo = $this->getUserInfo(); |
|||
$port = $this->getPort(); |
|||
return (strlen($userInfo) ? $userInfo."@" : "").$host.(!is_null($port) ? ":".$port : ""); |
|||
} |
|||
return ""; |
|||
} |
|||
|
|||
public function getFragment() { |
|||
return $this->fragment ?? ""; |
|||
} |
|||
|
|||
public function getHost() { |
|||
return $this->host ?? ""; |
|||
} |
|||
|
|||
public function getPath() { |
|||
return $this->path ?? ""; |
|||
} |
|||
|
|||
public function getPort() { |
|||
return $this->port; |
|||
} |
|||
|
|||
public function getQuery() { |
|||
return $this->query ?? ""; |
|||
} |
|||
|
|||
public function getScheme() { |
|||
return $this->scheme ?? ""; |
|||
} |
|||
|
|||
public function getUserInfo() { |
|||
if (strlen($this->user ?? "")) { |
|||
return $this->user.(strlen($this->pass ?? "") ? ":".$this->pass : ""); |
|||
} |
|||
return ""; |
|||
} |
|||
|
|||
public function withFragment($fragment) { |
|||
$out = clone $this; |
|||
$out->fragment = $fragment; |
|||
return $out; |
|||
} |
|||
|
|||
public function withHost($host) { |
|||
$out = clone $this; |
|||
$out->host = $host; |
|||
return $out; |
|||
} |
|||
|
|||
public function withPath($path) { |
|||
$out = clone $this; |
|||
$out->path = $path; |
|||
return $out; |
|||
} |
|||
|
|||
public function withPort($port) { |
|||
$out = clone $this; |
|||
$out->port = $port; |
|||
return $out; |
|||
} |
|||
|
|||
public function withQuery($query) { |
|||
$out = clone $this; |
|||
$out->query = $query; |
|||
return $out; |
|||
} |
|||
|
|||
public function withScheme($scheme) { |
|||
$out = clone $this; |
|||
$out->scheme = $scheme; |
|||
return $out; |
|||
} |
|||
|
|||
public function withUserInfo($user, $password = null) { |
|||
$out = clone $this; |
|||
$out->user = $user; |
|||
$out->pass = $password; |
|||
return $out; |
|||
} |
|||
|
|||
public function __toString() { |
|||
$out = ""; |
|||
if (is_null($this->host)) { |
|||
$out .= strlen($this->scheme) ? $this->scheme.":" : ""; |
|||
$out .= $this->path; |
|||
} else { |
|||
$out .= $this->scheme."://"; |
|||
$out .= $this->getAuthority(); |
|||
$out .= ($this->path[0] ?? "") === "/" ? "" : "/"; |
|||
$out .= preg_replace("<^/{2,}/>", "/", $this->path); |
|||
} |
|||
$out .= strlen($this->query) ? "?".$this->query : ""; |
|||
$out .= strlen($this->fragment) ? "#".$this->fragment : ""; |
|||
return $out; |
|||
} |
|||
|
|||
public function __get(string $name) { |
|||
return $this->$name; |
|||
} |
|||
|
|||
public function __set(string $name, $value): void { |
|||
switch ($name) { |
|||
case "host": |
|||
$this->host = $this->normalizeHost($value); |
|||
break; |
|||
case "port": |
|||
if (preg_match(self::PORT_PATTERN, (string) $value, $match)) { |
|||
$this->port = strlen($match[0]) ? (int) $value : null; |
|||
} else { |
|||
throw new \InvalidArgumentException("Port must be an integer or null"); |
|||
} |
|||
break; |
|||
case "scheme": |
|||
if (preg_match(self::SCHEME_PATTERN, $value)) { |
|||
$this->scheme = strtolower($value); |
|||
} else { |
|||
throw new \InvalidArgumentException("Invalid scheme specified"); |
|||
} |
|||
break; |
|||
default: |
|||
$this->$name = $this->normalizeEncoding((string) $value, $name); |
|||
} |
|||
} |
|||
|
|||
protected function resolve(self $base): void { |
|||
[$scheme, $host, $user, $pass, $port, $path, $query, $fragment] = [$base->scheme, $base->host, $base->user, $base->pass, $base->port, $base->path, $base->query, $base->fragment]; |
|||
if (strlen($scheme) && is_null($host)) { |
|||
throw new \InvalidArgumentException("URL base must not be a Uniform Resource Name"); |
|||
} |
|||
$this->scheme = $this->scheme ?? $scheme; |
|||
if (is_null($this->host)) { |
|||
$this->host = $host; |
|||
$this->user = $user; |
|||
$this->pass = $pass; |
|||
$this->port = $port; |
|||
if (is_null($this->path)) { |
|||
$this->path = $path; |
|||
if (is_null($this->query)) { |
|||
$this->query = $query; |
|||
if (is_null($this->fragment)) { |
|||
$this->fragment = $fragment; |
|||
} |
|||
} |
|||
} elseif(strlen($path)) { |
|||
if ($this->path[0] !== "/") { |
|||
if ($path[-1] === "/") { |
|||
$this->path = $path.$this->path; |
|||
} else { |
|||
$this->path = substr($path, 0, (int) strrpos($path, "/")).$this->path; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
protected function normalizeEncoding(string $data, string $part = null): string { |
|||
$pos = 0; |
|||
$end = strlen($data); |
|||
$out = ""; |
|||
$esc = self::ESCAPE_CHARS[$part] ?? []; |
|||
// process each character in sequence |
|||
while ($pos < $end) { |
|||
$c = $data[$pos]; |
|||
if ($c === "%") { |
|||
// the % character signals an encoded character... |
|||
$d = substr($data, $pos + 1, 2); |
|||
if (!preg_match("/^[0-9a-fA-F]{2}$/", $d)) { |
|||
// unless there are fewer than two characters left in the string or the two characters are not hex digits |
|||
$d = ord($c); |
|||
} else { |
|||
$d = hexdec($d); |
|||
$pos += 2; |
|||
} |
|||
} else { |
|||
$d = ord($c); |
|||
} |
|||
$dc = chr($d); |
|||
if ($d < 0x21 || $d > 0x7E || $d == 0x25) { |
|||
// these characters are always encoded |
|||
$out .= "%".strtoupper(dechex($d)); |
|||
} elseif (preg_match("/[a-zA-Z0-9\._~-]/", $dc)) { |
|||
// these characters are never encoded |
|||
$out .= $dc; |
|||
} else { |
|||
// these characters are passed through as-is... |
|||
if ($c === "%") { |
|||
$out .= "%".strtoupper(dechex($d)); |
|||
} else { |
|||
// unless the part we're processing has delimiters which must be escaped |
|||
if (in_array($dc, $esc)) { |
|||
$out .= "%".strtoupper(dechex($d)); |
|||
} else { |
|||
$out .= $c; |
|||
} |
|||
} |
|||
} |
|||
$pos++; |
|||
} |
|||
return $out; |
|||
} |
|||
|
|||
/** Normalizes a hostname per IDNA:2008 */ |
|||
protected function normalizeHost(?string $host): ?string { |
|||
$host = trim($host); |
|||
if (!is_null($host) && strlen($host)) { |
|||
if ($host[0] === "[" && substr($host, -1) === "]") { |
|||
// normalize IPv6 addresses |
|||
$addr = @inet_pton(substr($host, 1, strlen($host) - 2)); |
|||
if ($addr !== false) { |
|||
return "[".inet_ntop($addr)."]"; |
|||
} |
|||
} |
|||
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46); |
|||
$host = $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host; |
|||
} |
|||
return $host; |
|||
} |
|||
} |
@ -0,0 +1,17 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2018 J. King |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace JKingWeb\Lax\TestCase\Util; |
|||
|
|||
use JKingWeb\Lax\Url; |
|||
|
|||
/** @covers JKingWeb\Lax\Url<extended> */ |
|||
class UrlTest extends \PHPUnit\Framework\TestCase { |
|||
public function testTemp(): void { |
|||
$url = "https://me:secret@example.com:443/file?question#bit"; |
|||
$this->assertSame((string) new Url("https://me:secret@example.com:443/file?question#bit"), $url); |
|||
} |
|||
} |
Loading…
Reference in new issue