Browse Source

Implement RFC 8288 Web Linking; more tests to come

master
J. King 4 years ago
parent
commit
3096b41811
  1. 10
      lib/Link/Collection.php
  2. 28
      lib/Link/Link.php
  3. 8
      lib/Metadata.php
  4. 102
      lib/Parser/HTTP/Message.php
  5. 8
      lib/Url.php
  6. 24
      tests/cases/AbstractParserTestCase.php
  7. 18
      tests/cases/HTTP/http.yaml

10
lib/Link/Collection.php

@ -0,0 +1,10 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\Link;
class Collection extends \MensBeam\Lax\Collection {
}

28
lib/Link/Link.php

@ -0,0 +1,28 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\Link;
class Link {
/** @var bool $rev Whether the relation is a reverse one */
public $rev = false;
/** @var string $rel The link relation */
public $rel;
/** @var \MensBeam\Lax\Url $url The target URL the link points to */
public $url;
/** @var \MensBeam\Lax\Url $anchor The subject URL of the link*/
public $anchor;
/** @var \MensBeam\Lax\MimeType $type The media (content) type of the linked-to resource */
public $type;
/** @var string $title The title of the linked-to resource */
public $title;
/** @var string $lang The language of the linked-to resource */
public $lang;
/** @var string $media Media queries applicable to the linked resource */
public $media;
/** @var array $attr Extended attributes, if any */
public $attr = [];
}

8
lib/Metadata.php

@ -6,6 +6,8 @@
declare(strict_types=1);
namespace MensBeam\Lax;
use MensBeam\Lax\Link\Collection as LinkCollection;
class Metadata {
/** @var bool */
public $cached = false;
@ -25,4 +27,10 @@ class Metadata {
public $maxAge;
/** @var \DateInterval */
public $age;
public $links;
public function __construct() {
$this->links = new LinkCollection;
}
}

102
lib/Parser/HTTP/Message.php

@ -10,15 +10,23 @@ use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Url;
use MensBeam\Lax\Feed as FeedStruct;
use MensBeam\Lax\Link\Link;
use MensBeam\Lax\Link\Collection as LinkCollection;
use Psr\Http\Message\MessageInterface;
class Message {
protected const TYPE_PATTERN = '/^[A-Za-z0-9!#$%&\'*+\-\.\^_`|~]+\/[A-Za-z0-9!#$%&\'*+\-\.\^_`|~]+\s*(;.*)?$/s';
protected const DATE_PATTERN = '/^(?|(Mon|Tue|Wed|Thu|Fri|Sat|Sun), \d\d (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{4} \d\d:\d\d:\d\d GMT|((?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day), \d\d-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d\d \d\d:\d\d:\d\d GMT|(Mon|Tue|Wed|Thu|Fri|Sat|Sun) (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?:\d\d| \d) \d\d:\d\d:\d\d \d{4})$/';
protected const CCON_PATTERN = '/[,\s]*(?:[^=,]*)(?:=(?:"(?:\\\"|[^"])*(?:"|$)[^,]*|[^,]*))?/';
protected const CCON_PATTERN = '/[,\s]*[^=,]*(?:=(?:"(?:\\\"|[^"])*(?:"|$)[^,]*|[^,]*))?/';
protected const DSEC_PATTERN = '/^\d+$/';
protected const ETAG_PATTERN = '/^.+$/';
protected const DTOK_PATTERN = '/^(?|(\d+)|"((?:\\\(?=\d)|\d)+)".*)$/';
protected const LINK_PATTERN = '/[,\s]*<([^>]*)>((?:;\s*[^=,;]*(?:=(?:"(?:\\\"|[^"])*(?:"|$)[^,;]*|[^,;]*))?)*)/';
protected const LPRM_PATTERN = '/[;\s]*([^=;]*)(?:=("(?:\\\"|[^"])*(?:"|$)[^;]*|[^;]*))?/';
protected const NAME_PATTERN = '/^[A-Za-z0-9!#$%&\'*+\-\.\^_`|~]+$/s';
protected const BVAL_PATTERN = '/^[\t\x{20}-\x{7E}\x{80}-\x{FF}]+$/su';
protected const QVAL_PATTERN = '/^"((?:\\\"|[\t !\x{23}-\x{7E}\x{80}-\x{FF}])*)(?:"|$)/su';
protected const VESC_PATTERN = '/\\\(.)/s';
protected const SDAY_MAP = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
protected const FDAY_MAP = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"];
@ -43,12 +51,13 @@ class Message {
$feed->meta->etag = $this->getEtag();
$feed->meta->age = $this->getAge();
$feed->meta->maxAge = $this->getMaxAge();
$feed->meta->links = $this->getLinks();
return $feed;
}
protected function parseHeader(string $name, string $pattern, bool $multi = false): ?array {
if ($multi) {
if (preg_match_all($pattern, $this->msg->getHeaderLine($name), $match)) {
if (preg_match_all($pattern, $this->msg->getHeaderLine($name), $match, \PREG_SET_ORDER)) {
return $match;
}
} else {
@ -113,8 +122,8 @@ class Message {
$out = 0;
$maxAge = 0;
$sharedMaxAge = 0;
foreach ($this->parseHeader("Cache-Control", self::CCON_PATTERN, true)[0] ?? [] as $t) {
$t = explode("=", trim($t, ", \t"), 2);
foreach ($this->parseHeader("Cache-Control", self::CCON_PATTERN, true) ?? [] as $t) {
$t = explode("=", trim($t[0], ", \t"), 2);
$k = strtolower($t[0]);
if (($k === "max-age" || $k === "s-maxage") && isset($t[1]) && strlen($t[1])) {
if (preg_match(self::DTOK_PATTERN, $t[1], $match)) {
@ -136,4 +145,89 @@ class Message {
}
return null;
}
public function getLinks(): LinkCollection {
$out = new LinkCollection;
foreach ($this->parseHeader("Link", self::LINK_PATTERN, true) ?? [] as $h) {
if ($p = $this->parseParams($h[2])) {
$l = new Link;
$l->url = Url::fromString($h[1], $this->url);
// normalize and deduplicate relations
$relations = ['f' => $this->normalizeRelations($p['rel'] ?? ""), 'r' => $this->normalizeRelations($p['rev'] ?? "")];
if (!$l->url || (!$relations['f'] && !$relations['r'])) {
// if there are no relations or the URL is invalid, skip this link
continue;
}
// build the link object with everything except the relation
$l->anchor = Url::fromString($p['anchor'] ?? "", $this->url);
$l->type = MimeType::parse($p['type'] ?? "");
foreach (['title' => "title", 'media' => "media", 'hreflang' => "lang"] as $src => $dst) {
$l->$dst = isset($p[$src]) ? $p[$src] : null;
}
// clear any parameters we handle ourselves and leave the rest as unprocessed extended attributes
foreach (["rel", "rev", "title", "media", "anchor", "type", "hreflang"] as $attr) {
unset($p[$attr]);
}
$l->attr = $p;
// clone the link object for each forward and reverse relation and add each to the output collection
foreach ($relations['f'] as $r) {
$i = clone $l;
$i->rel = $r;
$out[] = $i;
}
foreach ($relations['r'] as $r) {
$i = clone $l;
$i->rel = $r;
$i->rev = true;
$out[] = $i;
}
}
}
return $out;
}
protected function parseParams(string $params): array {
$out = [];
if (preg_match_all(self::LPRM_PATTERN, $params, $matches, \PREG_SET_ORDER)) {
foreach ($matches as $match) {
[$param, $name, $value] = array_pad($match, 3, "");
if (preg_match(self::NAME_PATTERN, $name)) {
$name = strtolower($name);
} else {
continue;
}
if (strlen($value) && $value[0] === '"') {
if (preg_match(self::QVAL_PATTERN, $value, $match)) {
$value = preg_replace(self::VESC_PATTERN, '$1', $match[1]);
} else {
continue;
}
} else {
$value = rtrim($value, "\t\r\n ");
if (!preg_match(self::BVAL_PATTERN, $value, $match)) {
continue;
}
}
$out[$name] = $value;
}
}
return $out;
}
protected function normalizeRelations(string $relations): array {
$out = [];
$relations = trim($relations, "\t\r\n ");
if (!strlen($relations)) {
return $out;
}
foreach (preg_split('/\s+/', $relations) as $rel) {
$u = Url::fromString($rel);
if (!$u || !strlen($u->getScheme())) {
$out[] = strtolower($rel);
} else {
$out[] = (string) $u;
}
}
return array_unique($out);
}
}

8
lib/Url.php

@ -86,6 +86,14 @@ PCRE;
return ($uri instanceof self) ? $uri : new self((string) $uri);
}
public static function fromString(string $url, string $baseUrl = null): ?self {
try {
return new static($url, $baseUrl);
} catch (\InvalidArgumentException $e) {
return null;
}
}
public function __construct(string $url, string $baseUrl = null) {
if (preg_match(self::URI_PATTERN, $url, $match)) {
[$url, $scheme, $authority, $path, $query, $fragment] = array_pad($match, 6, "");

24
tests/cases/AbstractParserTestCase.php

@ -14,9 +14,11 @@ use MensBeam\Lax\Entry;
use MensBeam\Lax\Metadata;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Link\Link;
use MensBeam\Lax\Person\Person;
use MensBeam\Lax\Category\Category;
use MensBeam\Lax\Enclosure\Enclosure;
use MensBeam\Lax\Link\Collection as LinkCollection;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Category\Collection as CategoryCollection;
use MensBeam\Lax\Enclosure\Collection as EnclosureCollection;
@ -178,9 +180,11 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase {
$m = new Metadata;
foreach ($meta as $k => $v) {
if ($k === "url") {
$m->$k = new Url($v);
$m->$k = $this->makeUrl($v);
} elseif ($k === "type") {
$m->$k = MimeType::parse($v);
} elseif ($k === "links") {
$m->$k = $this->makeLinks($v);
} elseif (in_array($k, ["date", "lastModified", "expires"])) {
$m->$k = new Date($v);
} elseif (in_array($k, ["age", "maxAge"])) {
@ -213,4 +217,22 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase {
return new Url($url);
}
}
private function makeLinks(array $links): LinkCollection {
$out = new LinkCollection;
foreach ($links as $link) {
$m = new Link;
foreach ((array) $link as $k => $v) {
if ($k === "type") {
$m->$k = MimeType::parse($v);
} elseif (in_array($k, ["url", "anchor"])) {
$m->$k = $this->makeUrl($v);
} else {
$m->$k = $v;
}
}
$out[] = $m;
}
return $out;
}
}

18
tests/cases/HTTP/http.yaml

@ -119,3 +119,21 @@ Max age 5:
output:
meta:
maxAge: 'PT22S'
Links:
doc_url: 'http://example.org/blah'
input:
head:
Link: '<http://example.com/>; rel="alternate"; type="appplication/atom+xml; charset=\"UTF-8\";", </>; rel=alternate; type="application/rss+xml"'
output:
meta:
url: 'http://example.org/blah'
links:
- url: 'http://example.com/'
anchor: 'http://example.org/blah'
rel: alternate
type: 'appplication/atom+xml;charset=UTF-8'
- url: ['/', 'http://example.org/blah']
anchor: 'http://example.org/blah'
rel: alternate
type: 'application/rss+xml'

Loading…
Cancel
Save