Browse Source

Expose structured media types

Also prefer Media RSS mediums to extension-guessed types
master
J. King 4 years ago
parent
commit
312d09b58f
  1. 2
      lib/Enclosure/Enclosure.php
  2. 4
      lib/MimeType.php
  3. 8
      lib/Parser/Construct.php
  4. 5
      lib/Parser/JSON/Construct.php
  5. 9
      lib/Parser/JSON/Feed.php
  6. 13
      lib/Parser/XML/Construct.php
  7. 11
      lib/Parser/XML/Entry.php
  8. 5
      lib/Parser/XML/Feed.php
  9. 3
      tests/cases/AbstractParserTestCase.php
  10. 1
      tests/cases/JSON/JSONTest.php
  11. 8
      tests/cases/JSON/entry.yaml
  12. 3
      tests/cases/XML/XMLTest.php
  13. 2
      tests/cases/XML/entry-other.yaml

2
lib/Enclosure/Enclosure.php

@ -9,7 +9,7 @@ namespace MensBeam\Lax\Enclosure;
/**
* @property \MensBeam\Lax\Url $url
* @property \MensBeam\Lax\Text $title
* @property string $type
* @property \MensBeam\Lax\MimeType $type
* @property bool $sample
* @property int $height
* @property int $width

4
lib/Parser/MimeType.php → lib/MimeType.php

@ -4,9 +4,7 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\Parser;
use MensBeam\Lax\Url;
namespace MensBeam\Lax;
/** {@inheritDoc} */
class MimeType extends \MensBeam\Mime\MimeType {

8
lib/Parser/Construct.php

@ -8,6 +8,7 @@ namespace MensBeam\Lax\Parser;
use MensBeam\Lax\Collection;
use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Url;
trait Construct {
@ -50,13 +51,6 @@ trait Construct {
return $out ?: null;
}
protected function parseMediaType(string $type, ?Url $url = null): ?string {
if ($normalized = MimeType::parseLoose($type, $url)) {
return $normalized->essence;
}
return null;
}
protected function empty($o, array $ignore = []): bool {
return !array_filter((array) $o, function($v, $k) use ($ignore) {
return !in_array($k, $ignore) && !is_null($v) && (!$v instanceof Collection || sizeof($v) > 0);

5
lib/Parser/JSON/Construct.php

@ -7,6 +7,7 @@ declare(strict_types=1);
namespace MensBeam\Lax\Parser\JSON;
use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Text;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Person\Person;
@ -44,9 +45,9 @@ trait Construct {
}
/** Returns a media type from an object member or from a URL's file name when possible */
protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?string {
protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?MimeType {
$type = $this->fetchMember($key, "str", $obj) ?? "";
return $this->parseMediaType($type, $url);
return MimeType::parseLoose($type, $url);
}
/** Returns an object member as a parsed date */

9
lib/Parser/JSON/Feed.php

@ -8,13 +8,14 @@ namespace MensBeam\Lax\Parser\JSON;
use MensBeam\Lax\Text;
use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\Url;
use MensBeam\Lax\Feed as FeedStruct;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Category\Collection as CategoryCollection;
use MensBeam\Lax\Parser\Exception;
use MensBeam\Lax\Parser\JSON\Entry as EntryParser;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\Url;
class Feed implements \MensBeam\Lax\Parser\Feed {
use Construct;
@ -44,8 +45,8 @@ class Feed implements \MensBeam\Lax\Parser\Feed {
/** Performs format-specific preparation and validation */
protected function init(FeedStruct $feed): FeedStruct {
$type = $this->parseMediaType($this->contentType) ?? "";
if (strlen($type) && !in_array($type, self::MIME_TYPES)) {
$type = MimeType::parse($this->contentType);
if ($type && !in_array($type->essence, self::MIME_TYPES)) {
throw new Exception("notJSONType");
}
$data = @json_decode($this->data, false, 20, \JSON_BIGINT_AS_STRING | JSON_INVALID_UTF8_SUBSTITUTE);

13
lib/Parser/XML/Construct.php

@ -12,7 +12,7 @@ use MensBeam\Lax\Person\Person;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Text;
use MensBeam\Lax\Date;
use MensBeam\Lax\Parser\MimeType;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Url;
abstract class Construct {
@ -252,9 +252,9 @@ abstract class Construct {
if (!strlen($t) && (!$best || $best[1] < -1)) {
return [$cur, -1]; // any preferred type will rank higher than -1
}
$t = $this->parseMediaType($t);
$t = MimeType::parse($t);
if ($t) {
$rank = $mediaTypes[$t] ?? -2; // even no type will rank higher than a non-preferred type
$rank = $mediaTypes[$t->essence] ?? -2; // even no type will rank higher than a non-preferred type
if (!$best || $rank > $best[1]) {
// if there is currently no candidate or the candidate ranks lower, use the current link
return [$cur, $rank];
@ -274,15 +274,12 @@ abstract class Construct {
continue;
}
// get the content type; assume "text" if not provided
$type = trim($node->getAttribute("type"));
$type = $this->parseMediaType((!strlen($type)) ? "text" : $type);
switch (MimeType::parseAtom(trim($node->getAttribute("type")))->essence) {
case "text/plain":
if (is_null($out->plain)) {
$plain = $this->trimText($node->textContent);
if (strlen($plain)) {
$out->plain = $plain;
$populated = true;
}
}
break;
@ -292,7 +289,6 @@ abstract class Construct {
if (strlen($html)) {
$out->html = $html;
$out->htmlBase = strlen($node->baseURI) ? $node->baseURI : null;
$populated = true;
}
}
break;
@ -300,12 +296,11 @@ abstract class Construct {
if (is_null($out->xhtml) && ($xhtml = $this->fetchElement("xhtml:div", $node))) {
$out->xhtml = $xhtml->ownerDocument->saveXML($xhtml);
$out->xhtmlBase = strlen($xhtml->baseURI) ? $xhtml->baseURI : null;
$populated = true;
}
break;
}
}
return $populated ? $out : null;
return (!$this->empty($out)) ? $out : null;
}
/** Finds and parses Atom person-constructs, and returns a collection of Person objects */

11
lib/Parser/XML/Entry.php

@ -13,6 +13,7 @@ use MensBeam\Lax\Category\Collection as CategoryCollection;
use MensBeam\Lax\Enclosure\Collection as EnclosureCollection;
use MensBeam\Lax\Enclosure\Enclosure;
use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Text;
use MensBeam\Lax\Url;
@ -226,7 +227,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
$title = $this->fetchString("@title", ".+", false, $el);
$enc = new Enclosure;
$enc->url = $this->fetchUrl("@href", $el);
$enc->type = $this->parseMediaType($this->fetchString("@type", null, false, $el) ?? "", $enc->url);
$enc->type = MimeType::parseLoose($this->fetchString("@type", null, false, $el) ?? "", $enc->url);
$enc->title = isset($title) ? new Text($title) : null;
$enc->size = ((int) $this->fetchString("@length", "\d+", false, $el)) ?: null;
$out[] = $enc;
@ -273,7 +274,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
$enc = new Enclosure;
$enc->url = $url;
// the enclosure module uses namespaced attributes, but it's conceivable documents might use attributes in the null namespace (which is more usual)
$enc->type = $this->parseMediaType($this->fetchString("@rss1file:type", ".+", false, $el) ?? $this->fetchString("@type", ".+", false, $el) ?? "", $enc->url);
$enc->type = MimeType::parseLoose($this->fetchString("@rss1file:type", ".+", false, $el) ?? $this->fetchString("@type", ".+", false, $el) ?? "", $enc->url);
$enc->size = ((int) ($this->fetchString("@rss1file:length", "\d+", false, $el) ?? $this->fetchString("@length", "\d+", false, $el))) ?: null;
$out[] = $enc;
}
@ -288,7 +289,7 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
if ($url) {
$enc = new Enclosure;
$enc->url = $url;
$enc->type = $this->parseMediaType($this->fetchString("@type", null, false, $el) ?? "", $enc->url);
$enc->type = MimeType::parseLoose($this->fetchString("@type", null, false, $el) ?? "", $enc->url);
$enc->size = ((int) $this->fetchString("@length", "\d+", false, $el)) ?: null;
$out[] = $enc;
}
@ -302,7 +303,9 @@ class Entry extends Construct implements \MensBeam\Lax\Parser\Entry {
if ($url) {
$out = new Enclosure;
$out->url = $url;
$out->type = $this->parseMediaType($this->fetchString("@type", ".+", false, $node) ?? "", $url) ?? $this->fetchString("@medium", "(?-i:image|audio|video|document|executable)", false, $node);
$out->type = MimeType::parseLoose($this->fetchString("@type", ".+", false, $node) ?? "")
?? MimeType::parseLoose($this->fetchString("@medium", ".+", false, $node) ?? "")
?? MimeType::parseLoose("", $url);
$out->title = $this->fetchTitleMediaRss($node);
foreach (self::ENCLOSURE_ATTR_INTEGERS as $prop => $query) {
$value = (int) $this->fetchString($query, "\d+", false, $node);

5
lib/Parser/XML/Feed.php

@ -13,6 +13,7 @@ use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Category\Collection as CategoryCollection;
use MensBeam\Lax\Feed as FeedStruct;
use MensBeam\Lax\Date;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\Text;
use MensBeam\Lax\Url;
@ -49,8 +50,8 @@ class Feed extends Construct implements \MensBeam\Lax\Parser\Feed {
/** Performs initialization of the instance */
protected function init(FeedStruct $feed): FeedStruct {
$type = $this->parseMediaType($this->contentType) ?? "";
if (strlen($type) && !in_array($type, self::MIME_TYPES)) {
$type = MimeType::parse($this->contentType) ?? "";
if ($type && !in_array($type->essence, self::MIME_TYPES)) {
throw new Exception("notXMLType");
}
$this->document = new \DOMDocument();

3
tests/cases/AbstractParserTestCase.php

@ -42,6 +42,7 @@ use MensBeam\Lax\Url;
use MensBeam\Lax\Entry;
use MensBeam\Lax\Metadata;
use MensBeam\Lax\Schedule;
use MensBeam\Lax\MimeType;
use MensBeam\Lax\Person\Person;
use MensBeam\Lax\Category\Category;
use MensBeam\Lax\Enclosure\Enclosure;
@ -187,6 +188,8 @@ class AbstractParserTestCase extends \PHPUnit\Framework\TestCase {
$e->$k = $this->makeUrl($v);
} elseif ($k === "title") {
$e->$k = $this->makeText($v);
} elseif ($k === "type") {
$e->$k = MimeType::parseLoose($v);
} else {
$e->$k = $v;
}

1
tests/cases/JSON/JSONTest.php

@ -7,7 +7,6 @@ declare(strict_types=1);
namespace MensBeam\Lax\TestCase\JSON;
/**
* @covers MensBeam\Lax\Parser\Construct<extended>
* @covers MensBeam\Lax\Parser\JSON\Feed<extended>
* @covers MensBeam\Lax\Parser\JSON\Entry<extended>
*/

8
tests/cases/JSON/entry.yaml

@ -452,7 +452,7 @@ Entry attachments:
"attachments": [
{
"url": "http://example.com/image",
"mime_type": "image/svg+xml; charset=\"urf-8\"",
"mime_type": "image/svg+xml; charset=\"utf-8\"",
"title": "Logo",
"size_in_bytes": 2345
},
@ -497,7 +497,7 @@ Entry attachments:
enclosures:
- data:
- url: 'http://example.com/image'
type: 'image/svg+xml'
type: 'image/svg+xml;charset=utf-8'
title: Logo
size: 2345
- url: 'http://example.com/graphic.png'
@ -527,7 +527,7 @@ Entry image and attachments:
"attachments": [
{
"url": "http://example.com/logo",
"mime_type": "image/svg+xml; charset=\"urf-8\"",
"mime_type": "image/svg+xml; charset=\"utf-8\"",
"title": "Logo",
"size_in_bytes": 2345
}
@ -546,6 +546,6 @@ Entry image and attachments:
preferred: true
- data:
- url: 'http://example.com/logo'
type: 'image/svg+xml'
type: 'image/svg+xml;charset=utf-8'
title: Logo
size: 2345

3
tests/cases/XML/XMLTest.php

@ -7,10 +7,9 @@ declare(strict_types=1);
namespace MensBeam\Lax\TestCase\XML;
/**
* @covers MensBeam\Lax\Parser\Construct<extended>
* @covers MensBeam\Lax\Parser\XML\Feed<extended>
* @covers MensBeam\Lax\Parser\XML\Entry<extended>
* @covers MensBeam\Lax\Parser\XML\XPath<extended>
* @covers MensBeam\Lax\Parser\XML\XPath
*/
class XMLTest extends \MensBeam\Lax\TestCase\AbstractParserTestCase {
/** @dataProvider provideXML */

2
tests/cases/XML/entry-other.yaml

@ -201,7 +201,7 @@ Media RSS enclosures:
- url: 'http://example.com/'
type: 'image'
- url: 'http://example.com/entry.m4a'
type: 'audio/mp4'
type: 'image'
- enclosures:
- url: 'http://example.com/'
title: 'Plain title'

Loading…
Cancel
Save