Browse Source

Prototype URL class

master
J. King 4 years ago
parent
commit
31822ed23a
  1. 2
      lib/Parser/JSON/Entry.php
  2. 339
      lib/Url.php
  3. 2
      tests/cases/JSON/JSONTest.php
  4. 17
      tests/cases/Util/UrlTest.php
  5. 5
      tests/phpunit.dist.xml

2
lib/Parser/JSON/Entry.php

@ -141,7 +141,7 @@ class Entry implements \JKingWeb\Lax\Parser\Entry {
public function getEnclosures(): EnclosureCollection {
$out = new EnclosureCollection;
foreach ($this->fetchMember("attachments", "array") as $attachment) {
foreach ($this->fetchMember("attachments", "array") ?? [] as $attachment) {
$url = $this->fetchUrl("url", $attachment);
if ($url) {
$m = new Enclosure;

339
lib/Url.php

@ -0,0 +1,339 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax;
use Psr\Http\Message\UriInterface;
/** Normalized URI representation, compatible with the PSR-7 URI interface
*
* The following features are implemented:
*
* - The full PSR-7 `UriInterface` interface
* - Correct handling of both URLs and URNs
* - Relative URL resolution
* - Encoding normalization
* - Scheme normalization
* - IDNA normalization
* - IPv6 address normalization
* - Empty query and fragment removal
*
* Some things this class does not do:
*
* - Handle non-standard schemes (e.g. ed2k)
* - Collapse paths
* - Drop default ports
*
* This class should not be used with XML namespace URIs,
* as the normalizations performed will change the values
* of some namespaces.
*/
class Url implements UriInterface {
protected const URI_PATTERN = <<<'PCRE'
<^
(?:
(?:
([a-z][a-z0-9\.\-\+]*): | # absolute URI
:?(?=//) # scheme-relative URI
)
(//[^/\?\#]*)? # authority part
)?
([^\?\#]*) # path part
(\?[^\#]*)? # query part
(\#.*)? # fragment part
$>six
PCRE;
protected const AUTHORITY_PATTERN = <<<'PCRE'
<^
//
(?:
([^@:]*) # username part
(?::([^@]*))? # password part
@
)?
(
\[[a-f0-9:]*\] | # IPv6 address
[^:]+ # domain or IPv4 address
)
(?:
:(\d*) # port part
)?
$>six
PCRE;
protected const SCHEME_PATTERN = "<^[a-z][a-z0-9\.\-\+]*$>i";
protected const PORT_PATTERN = "<^\d+$>";
protected const ESCAPE_CHARS = [
'user' => [":", "@", "/", "?", "#"],
'pass' => ["@", "/", "?", "#"],
'path' => ["?", "#"],
'query' => ["#"],
];
protected $scheme = null;
protected $host = null;
protected $port = null;
protected $user = "";
protected $pass = "";
protected $path = null;
protected $query = null;
protected $fragment = null;
public static function fromUri(UriInterface $uri): self {
return ($uri instanceof self) ? $uri : new self((string) $uri);
}
public function __construct(string $url, ?UriInterface $baseUrl = null) {
if (preg_match(self::URI_PATTERN, $url, $match)) {
[$url, $scheme, $authority, $path, $query, $fragment] = $match;
foreach (["scheme", "path", "query", "fragment"] as $part) {
if (strlen($$part)) {
if ($part === "query" || $part === "fragment") {
$$part = substr($$part, 1);
}
$this->__set($part, $$part);
}
}
if (strlen($authority)) {
if (preg_match(self::AUTHORITY_PATTERN, $authority, $match)) {
[$authority, $user, $pass, $host, $port] = $match;
foreach (["user", "pass", "host", "port"] as $part) {
$this->__set($part, $$part);
}
}
}
if ($baseUrl && !strlen($this->scheme)) {
$this->resolve(self::fromUri($baseUrl));
}
foreach (["scheme", "path", "query", "fragment"] as $part) {
$this->$part = $this->$part ?? "";
}
} else {
throw new \InvalidArgumentException("String is not a valid URI");
}
}
public function getAuthority() {
$host = $this->getHost();
if (strlen($host) > 0) {
$userInfo = $this->getUserInfo();
$port = $this->getPort();
return (strlen($userInfo) ? $userInfo."@" : "").$host.(!is_null($port) ? ":".$port : "");
}
return "";
}
public function getFragment() {
return $this->fragment ?? "";
}
public function getHost() {
return $this->host ?? "";
}
public function getPath() {
return $this->path ?? "";
}
public function getPort() {
return $this->port;
}
public function getQuery() {
return $this->query ?? "";
}
public function getScheme() {
return $this->scheme ?? "";
}
public function getUserInfo() {
if (strlen($this->user ?? "")) {
return $this->user.(strlen($this->pass ?? "") ? ":".$this->pass : "");
}
return "";
}
public function withFragment($fragment) {
$out = clone $this;
$out->fragment = $fragment;
return $out;
}
public function withHost($host) {
$out = clone $this;
$out->host = $host;
return $out;
}
public function withPath($path) {
$out = clone $this;
$out->path = $path;
return $out;
}
public function withPort($port) {
$out = clone $this;
$out->port = $port;
return $out;
}
public function withQuery($query) {
$out = clone $this;
$out->query = $query;
return $out;
}
public function withScheme($scheme) {
$out = clone $this;
$out->scheme = $scheme;
return $out;
}
public function withUserInfo($user, $password = null) {
$out = clone $this;
$out->user = $user;
$out->pass = $password;
return $out;
}
public function __toString() {
$out = "";
if (is_null($this->host)) {
$out .= strlen($this->scheme) ? $this->scheme.":" : "";
$out .= $this->path;
} else {
$out .= $this->scheme."://";
$out .= $this->getAuthority();
$out .= ($this->path[0] ?? "") === "/" ? "" : "/";
$out .= preg_replace("<^/{2,}/>", "/", $this->path);
}
$out .= strlen($this->query) ? "?".$this->query : "";
$out .= strlen($this->fragment) ? "#".$this->fragment : "";
return $out;
}
public function __get(string $name) {
return $this->$name;
}
public function __set(string $name, $value): void {
switch ($name) {
case "host":
$this->host = $this->normalizeHost($value);
break;
case "port":
if (preg_match(self::PORT_PATTERN, (string) $value, $match)) {
$this->port = strlen($match[0]) ? (int) $value : null;
} else {
throw new \InvalidArgumentException("Port must be an integer or null");
}
break;
case "scheme":
if (preg_match(self::SCHEME_PATTERN, $value)) {
$this->scheme = strtolower($value);
} else {
throw new \InvalidArgumentException("Invalid scheme specified");
}
break;
default:
$this->$name = $this->normalizeEncoding((string) $value, $name);
}
}
protected function resolve(self $base): void {
[$scheme, $host, $user, $pass, $port, $path, $query, $fragment] = [$base->scheme, $base->host, $base->user, $base->pass, $base->port, $base->path, $base->query, $base->fragment];
if (strlen($scheme) && is_null($host)) {
throw new \InvalidArgumentException("URL base must not be a Uniform Resource Name");
}
$this->scheme = $this->scheme ?? $scheme;
if (is_null($this->host)) {
$this->host = $host;
$this->user = $user;
$this->pass = $pass;
$this->port = $port;
if (is_null($this->path)) {
$this->path = $path;
if (is_null($this->query)) {
$this->query = $query;
if (is_null($this->fragment)) {
$this->fragment = $fragment;
}
}
} elseif(strlen($path)) {
if ($this->path[0] !== "/") {
if ($path[-1] === "/") {
$this->path = $path.$this->path;
} else {
$this->path = substr($path, 0, (int) strrpos($path, "/")).$this->path;
}
}
}
}
}
protected function normalizeEncoding(string $data, string $part = null): string {
$pos = 0;
$end = strlen($data);
$out = "";
$esc = self::ESCAPE_CHARS[$part] ?? [];
// process each character in sequence
while ($pos < $end) {
$c = $data[$pos];
if ($c === "%") {
// the % character signals an encoded character...
$d = substr($data, $pos + 1, 2);
if (!preg_match("/^[0-9a-fA-F]{2}$/", $d)) {
// unless there are fewer than two characters left in the string or the two characters are not hex digits
$d = ord($c);
} else {
$d = hexdec($d);
$pos += 2;
}
} else {
$d = ord($c);
}
$dc = chr($d);
if ($d < 0x21 || $d > 0x7E || $d == 0x25) {
// these characters are always encoded
$out .= "%".strtoupper(dechex($d));
} elseif (preg_match("/[a-zA-Z0-9\._~-]/", $dc)) {
// these characters are never encoded
$out .= $dc;
} else {
// these characters are passed through as-is...
if ($c === "%") {
$out .= "%".strtoupper(dechex($d));
} else {
// unless the part we're processing has delimiters which must be escaped
if (in_array($dc, $esc)) {
$out .= "%".strtoupper(dechex($d));
} else {
$out .= $c;
}
}
}
$pos++;
}
return $out;
}
/** Normalizes a hostname per IDNA:2008 */
protected function normalizeHost(?string $host): ?string {
$host = trim($host);
if (!is_null($host) && strlen($host)) {
if ($host[0] === "[" && substr($host, -1) === "]") {
// normalize IPv6 addresses
$addr = @inet_pton(substr($host, 1, strlen($host) - 2));
if ($addr !== false) {
return "[".inet_ntop($addr)."]";
}
}
$idn = idn_to_ascii($host, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
$host = $idn !== false ? idn_to_utf8($idn, \IDNA_NONTRANSITIONAL_TO_UNICODE, \INTL_IDNA_VARIANT_UTS46) : $host;
}
return $host;
}
}

2
tests/cases/JSON/TestJSONFeed.php → tests/cases/JSON/JSONTest.php

@ -16,7 +16,7 @@ use JKingWeb\Lax\Person\Collection as PersonCollection;
/** @covers JKingWeb\Lax\Parser\JSON\Feed<extended> */
class TestJSON extends \PHPUnit\Framework\TestCase {
class JSONTest extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideJSONFeedVersion1 */
public function testJSONFeedVersion1($input, string $type, $output): void {
if (is_object($input)) {

17
tests/cases/Util/UrlTest.php

@ -0,0 +1,17 @@
<?php
/** @license MIT
* Copyright 2018 J. King
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace JKingWeb\Lax\TestCase\Util;
use JKingWeb\Lax\Url;
/** @covers JKingWeb\Lax\Url<extended> */
class UrlTest extends \PHPUnit\Framework\TestCase {
public function testTemp(): void {
$url = "https://me:secret@example.com:443/file?question#bit";
$this->assertSame((string) new Url("https://me:secret@example.com:443/file?question#bit"), $url);
}
}

5
tests/phpunit.dist.xml

@ -20,7 +20,10 @@
<testsuites>
<testsuite name="JSON">
<directory suffix=".php">cases/JSON</directory>
<directory>cases/JSON</directory>
</testsuite>
<testsuite name="Util">
<directory>cases/Util</directory>
</testsuite>
</testsuites>
</phpunit>

Loading…
Cancel
Save