From 312d09b58f3cca15493ec6e55cc0f5c174a4690d Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 17 Apr 2020 16:35:37 -0400 Subject: [PATCH] Expose structured media types Also prefer Media RSS mediums to extension-guessed types --- lib/Enclosure/Enclosure.php | 2 +- lib/{Parser => }/MimeType.php | 4 +--- lib/Parser/Construct.php | 8 +------- lib/Parser/JSON/Construct.php | 5 +++-- lib/Parser/JSON/Feed.php | 9 +++++---- lib/Parser/XML/Construct.php | 13 ++++--------- lib/Parser/XML/Entry.php | 11 +++++++---- lib/Parser/XML/Feed.php | 5 +++-- tests/cases/AbstractParserTestCase.php | 3 +++ tests/cases/JSON/JSONTest.php | 1 - tests/cases/JSON/entry.yaml | 8 ++++---- tests/cases/XML/XMLTest.php | 3 +-- tests/cases/XML/entry-other.yaml | 2 +- 13 files changed, 34 insertions(+), 40 deletions(-) rename lib/{Parser => }/MimeType.php (97%) diff --git a/lib/Enclosure/Enclosure.php b/lib/Enclosure/Enclosure.php index 158d6b5..103a839 100644 --- a/lib/Enclosure/Enclosure.php +++ b/lib/Enclosure/Enclosure.php @@ -9,7 +9,7 @@ namespace MensBeam\Lax\Enclosure; /** * @property \MensBeam\Lax\Url $url * @property \MensBeam\Lax\Text $title - * @property string $type + * @property \MensBeam\Lax\MimeType $type * @property bool $sample * @property int $height * @property int $width diff --git a/lib/Parser/MimeType.php b/lib/MimeType.php similarity index 97% rename from lib/Parser/MimeType.php rename to lib/MimeType.php index 85b6fec..ca67374 100644 --- a/lib/Parser/MimeType.php +++ b/lib/MimeType.php @@ -4,9 +4,7 @@ * See LICENSE and AUTHORS files for details */ declare(strict_types=1); -namespace MensBeam\Lax\Parser; - -use MensBeam\Lax\Url; +namespace MensBeam\Lax; /** {@inheritDoc} */ class MimeType extends \MensBeam\Mime\MimeType { diff --git a/lib/Parser/Construct.php b/lib/Parser/Construct.php index 6261bda..bb83cc7 100644 --- a/lib/Parser/Construct.php +++ b/lib/Parser/Construct.php @@ -8,6 +8,7 @@ namespace MensBeam\Lax\Parser; use MensBeam\Lax\Collection; use MensBeam\Lax\Date; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Url; trait Construct { @@ -50,13 +51,6 @@ trait Construct { return $out ?: null; } - protected function parseMediaType(string $type, ?Url $url = null): ?string { - if ($normalized = MimeType::parseLoose($type, $url)) { - return $normalized->essence; - } - return null; - } - protected function empty($o, array $ignore = []): bool { return !array_filter((array) $o, function($v, $k) use ($ignore) { return !in_array($k, $ignore) && !is_null($v) && (!$v instanceof Collection || sizeof($v) > 0); diff --git a/lib/Parser/JSON/Construct.php b/lib/Parser/JSON/Construct.php index f78c653..4a9911c 100644 --- a/lib/Parser/JSON/Construct.php +++ b/lib/Parser/JSON/Construct.php @@ -7,6 +7,7 @@ declare(strict_types=1); namespace MensBeam\Lax\Parser\JSON; use MensBeam\Lax\Date; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Text; use MensBeam\Lax\Person\Collection as PersonCollection; use MensBeam\Lax\Person\Person; @@ -44,9 +45,9 @@ trait Construct { } /** Returns a media type from an object member or from a URL's file name when possible */ - protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?string { + protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?MimeType { $type = $this->fetchMember($key, "str", $obj) ?? ""; - return $this->parseMediaType($type, $url); + return MimeType::parseLoose($type, $url); } /** Returns an object member as a parsed date */ diff --git a/lib/Parser/JSON/Feed.php b/lib/Parser/JSON/Feed.php index 4c04315..2eae316 100644 --- a/lib/Parser/JSON/Feed.php +++ b/lib/Parser/JSON/Feed.php @@ -8,13 +8,14 @@ namespace MensBeam\Lax\Parser\JSON; use MensBeam\Lax\Text; use MensBeam\Lax\Date; +use MensBeam\Lax\MimeType; +use MensBeam\Lax\Schedule; +use MensBeam\Lax\Url; use MensBeam\Lax\Feed as FeedStruct; use MensBeam\Lax\Person\Collection as PersonCollection; use MensBeam\Lax\Category\Collection as CategoryCollection; use MensBeam\Lax\Parser\Exception; use MensBeam\Lax\Parser\JSON\Entry as EntryParser; -use MensBeam\Lax\Schedule; -use MensBeam\Lax\Url; class Feed implements \MensBeam\Lax\Parser\Feed { use Construct; @@ -44,8 +45,8 @@ class Feed implements \MensBeam\Lax\Parser\Feed { /** Performs format-specific preparation and validation */ protected function init(FeedStruct $feed): FeedStruct { - $type = $this->parseMediaType($this->contentType) ?? ""; - if (strlen($type) && !in_array($type, self::MIME_TYPES)) { + $type = MimeType::parse($this->contentType); + if ($type && !in_array($type->essence, self::MIME_TYPES)) { throw new Exception("notJSONType"); } $data = @json_decode($this->data, false, 20, \JSON_BIGINT_AS_STRING | JSON_INVALID_UTF8_SUBSTITUTE); diff --git a/lib/Parser/XML/Construct.php b/lib/Parser/XML/Construct.php index 5658d0f..3bb153f 100644 --- a/lib/Parser/XML/Construct.php +++ b/lib/Parser/XML/Construct.php @@ -12,7 +12,7 @@ use MensBeam\Lax\Person\Person; use MensBeam\Lax\Person\Collection as PersonCollection; use MensBeam\Lax\Text; use MensBeam\Lax\Date; -use MensBeam\Lax\Parser\MimeType; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Url; abstract class Construct { @@ -252,9 +252,9 @@ abstract class Construct { if (!strlen($t) && (!$best || $best[1] < -1)) { return [$cur, -1]; // any preferred type will rank higher than -1 } - $t = $this->parseMediaType($t); + $t = MimeType::parse($t); if ($t) { - $rank = $mediaTypes[$t] ?? -2; // even no type will rank higher than a non-preferred type + $rank = $mediaTypes[$t->essence] ?? -2; // even no type will rank higher than a non-preferred type if (!$best || $rank > $best[1]) { // if there is currently no candidate or the candidate ranks lower, use the current link return [$cur, $rank]; @@ -274,15 +274,12 @@ abstract class Construct { continue; } // get the content type; assume "text" if not provided - $type = trim($node->getAttribute("type")); - $type = $this->parseMediaType((!strlen($type)) ? "text" : $type); switch (MimeType::parseAtom(trim($node->getAttribute("type")))->essence) { case "text/plain": if (is_null($out->plain)) { $plain = $this->trimText($node->textContent); if (strlen($plain)) { $out->plain = $plain; - $populated = true; } } break; @@ -292,7 +289,6 @@ abstract class Construct { if (strlen($html)) { $out->html = $html; $out->htmlBase = strlen($node->baseURI) ? $node->baseURI : null; - $populated = true; } } break; @@ -300,12 +296,11 @@ abstract class Construct { if (is_null($out->xhtml) && ($xhtml = $this->fetchElement("xhtml:div", $node))) { $out->xhtml = $xhtml->ownerDocument->saveXML($xhtml); $out->xhtmlBase = strlen($xhtml->baseURI) ? $xhtml->baseURI : null; - $populated = true; } break; } } - return $populated ? $out : null; + return (!$this->empty($out)) ? $out : null; } /** Finds and parses Atom person-constructs, and returns a collection of Person objects */ diff --git a/lib/Parser/XML/Entry.php b/lib/Parser/XML/Entry.php index a28f01d..0464ea6 100644 --- a/lib/Parser/XML/Entry.php +++ b/lib/Parser/XML/Entry.php @@ -13,6 +13,7 @@ use MensBeam\Lax\Category\Collection as CategoryCollection; use MensBeam\Lax\Enclosure\Collection as EnclosureCollection; use MensBeam\Lax\Enclosure\Enclosure; use MensBeam\Lax\Date; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Text; use MensBeam\Lax\Url; @@ -226,7 +227,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry { $title = $this->fetchString("@title", ".+", false, $el); $enc = new Enclosure; $enc->url = $this->fetchUrl("@href", $el); - $enc->type = $this->parseMediaType($this->fetchString("@type", null, false, $el) ?? "", $enc->url); + $enc->type = MimeType::parseLoose($this->fetchString("@type", null, false, $el) ?? "", $enc->url); $enc->title = isset($title) ? new Text($title) : null; $enc->size = ((int) $this->fetchString("@length", "\d+", false, $el)) ?: null; $out[] = $enc; @@ -273,7 +274,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry { $enc = new Enclosure; $enc->url = $url; // the enclosure module uses namespaced attributes, but it's conceivable documents might use attributes in the null namespace (which is more usual) - $enc->type = $this->parseMediaType($this->fetchString("@rss1file:type", ".+", false, $el) ?? $this->fetchString("@type", ".+", false, $el) ?? "", $enc->url); + $enc->type = MimeType::parseLoose($this->fetchString("@rss1file:type", ".+", false, $el) ?? $this->fetchString("@type", ".+", false, $el) ?? "", $enc->url); $enc->size = ((int) ($this->fetchString("@rss1file:length", "\d+", false, $el) ?? $this->fetchString("@length", "\d+", false, $el))) ?: null; $out[] = $enc; } @@ -288,7 +289,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry { if ($url) { $enc = new Enclosure; $enc->url = $url; - $enc->type = $this->parseMediaType($this->fetchString("@type", null, false, $el) ?? "", $enc->url); + $enc->type = MimeType::parseLoose($this->fetchString("@type", null, false, $el) ?? "", $enc->url); $enc->size = ((int) $this->fetchString("@length", "\d+", false, $el)) ?: null; $out[] = $enc; } @@ -302,7 +303,9 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry { if ($url) { $out = new Enclosure; $out->url = $url; - $out->type = $this->parseMediaType($this->fetchString("@type", ".+", false, $node) ?? "", $url) ?? $this->fetchString("@medium", "(?-i:image|audio|video|document|executable)", false, $node); + $out->type = MimeType::parseLoose($this->fetchString("@type", ".+", false, $node) ?? "") + ?? MimeType::parseLoose($this->fetchString("@medium", ".+", false, $node) ?? "") + ?? MimeType::parseLoose("", $url); $out->title = $this->fetchTitleMediaRss($node); foreach (self::ENCLOSURE_ATTR_INTEGERS as $prop => $query) { $value = (int) $this->fetchString($query, "\d+", false, $node); diff --git a/lib/Parser/XML/Feed.php b/lib/Parser/XML/Feed.php index cb02660..16e4e84 100644 --- a/lib/Parser/XML/Feed.php +++ b/lib/Parser/XML/Feed.php @@ -13,6 +13,7 @@ use MensBeam\Lax\Person\Collection as PersonCollection; use MensBeam\Lax\Category\Collection as CategoryCollection; use MensBeam\Lax\Feed as FeedStruct; use MensBeam\Lax\Date; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Schedule; use MensBeam\Lax\Text; use MensBeam\Lax\Url; @@ -49,8 +50,8 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed { /** Performs initialization of the instance */ protected function init(FeedStruct $feed): FeedStruct { - $type = $this->parseMediaType($this->contentType) ?? ""; - if (strlen($type) && !in_array($type, self::MIME_TYPES)) { + $type = MimeType::parse($this->contentType) ?? ""; + if ($type && !in_array($type->essence, self::MIME_TYPES)) { throw new Exception("notXMLType"); } $this->document = new \DOMDocument(); diff --git a/tests/cases/AbstractParserTestCase.php b/tests/cases/AbstractParserTestCase.php index baf8c1a..4186e31 100644 --- a/tests/cases/AbstractParserTestCase.php +++ b/tests/cases/AbstractParserTestCase.php @@ -42,6 +42,7 @@ use MensBeam\Lax\Url; use MensBeam\Lax\Entry; use MensBeam\Lax\Metadata; use MensBeam\Lax\Schedule; +use MensBeam\Lax\MimeType; use MensBeam\Lax\Person\Person; use MensBeam\Lax\Category\Category; use MensBeam\Lax\Enclosure\Enclosure; @@ -187,6 +188,8 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase { $e->$k = $this->makeUrl($v); } elseif ($k === "title") { $e->$k = $this->makeText($v); + } elseif ($k === "type") { + $e->$k = MimeType::parseLoose($v); } else { $e->$k = $v; } diff --git a/tests/cases/JSON/JSONTest.php b/tests/cases/JSON/JSONTest.php index 2210130..6d1bcf9 100644 --- a/tests/cases/JSON/JSONTest.php +++ b/tests/cases/JSON/JSONTest.php @@ -7,7 +7,6 @@ declare(strict_types=1); namespace MensBeam\Lax\TestCase\JSON; /** - * @covers MensBeam\Lax\Parser\Construct * @covers MensBeam\Lax\Parser\JSON\Feed * @covers MensBeam\Lax\Parser\JSON\Entry */ diff --git a/tests/cases/JSON/entry.yaml b/tests/cases/JSON/entry.yaml index 41b844e..4e83195 100644 --- a/tests/cases/JSON/entry.yaml +++ b/tests/cases/JSON/entry.yaml @@ -452,7 +452,7 @@ Entry attachments: "attachments": [ { "url": "http://example.com/image", - "mime_type": "image/svg+xml; charset=\"urf-8\"", + "mime_type": "image/svg+xml; charset=\"utf-8\"", "title": "Logo", "size_in_bytes": 2345 }, @@ -497,7 +497,7 @@ Entry attachments: enclosures: - data: - url: 'http://example.com/image' - type: 'image/svg+xml' + type: 'image/svg+xml;charset=utf-8' title: Logo size: 2345 - url: 'http://example.com/graphic.png' @@ -527,7 +527,7 @@ Entry image and attachments: "attachments": [ { "url": "http://example.com/logo", - "mime_type": "image/svg+xml; charset=\"urf-8\"", + "mime_type": "image/svg+xml; charset=\"utf-8\"", "title": "Logo", "size_in_bytes": 2345 } @@ -546,6 +546,6 @@ Entry image and attachments: preferred: true - data: - url: 'http://example.com/logo' - type: 'image/svg+xml' + type: 'image/svg+xml;charset=utf-8' title: Logo size: 2345 diff --git a/tests/cases/XML/XMLTest.php b/tests/cases/XML/XMLTest.php index 11dc97a..5958f63 100644 --- a/tests/cases/XML/XMLTest.php +++ b/tests/cases/XML/XMLTest.php @@ -7,10 +7,9 @@ declare(strict_types=1); namespace MensBeam\Lax\TestCase\XML; /** - * @covers MensBeam\Lax\Parser\Construct * @covers MensBeam\Lax\Parser\XML\Feed * @covers MensBeam\Lax\Parser\XML\Entry - * @covers MensBeam\Lax\Parser\XML\XPath + * @covers MensBeam\Lax\Parser\XML\XPath */ class XMLTest extends \MensBeam\Lax\TestCase\AbstractParserTestCase { /** @dataProvider provideXML */ diff --git a/tests/cases/XML/entry-other.yaml b/tests/cases/XML/entry-other.yaml index 65b7ab7..5b4f7d6 100644 --- a/tests/cases/XML/entry-other.yaml +++ b/tests/cases/XML/entry-other.yaml @@ -201,7 +201,7 @@ Media RSS enclosures: - url: 'http://example.com/' type: 'image' - url: 'http://example.com/entry.m4a' - type: 'audio/mp4' + type: 'image' - enclosures: - url: 'http://example.com/' title: 'Plain title'