Browse Source

Properly implement media type detection

master
J. King 4 years ago
parent
commit
43a99dbfcd
  1. 46
      lib/Parser/Construct.php
  2. 14
      lib/Parser/JSON/Construct.php
  3. 7
      lib/Parser/JSON/Entry.php

46
lib/Parser/Construct.php

@ -8,6 +8,7 @@ namespace JKingWeb\Lax\Parser;
use JKingWeb\Lax\Collection;
use JKingWeb\Lax\Date;
use JKingWeb\Lax\Url;
trait Construct {
/** Trims plain text and collapses whitespace */
@ -30,7 +31,7 @@ trait Construct {
/** Tests whether a string is a valid e-mail address
*
* Accepts IDN hosts and (with PHP 7.1 and above) Unicode localparts
* Accepts IDN hosts and Unicode localparts
*/
protected function validateMail(string $addr): bool {
$out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match);
@ -39,13 +40,12 @@ trait Construct {
}
$local = $match[1];
$domain = $match[2];
// PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformat first
$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)
if ($domain===false) {
return false;
// PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first
$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)
if ($domain !== false) {$addr = "$local@$domain";
return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE);
}
$addr = "$local@$domain";
return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE);
return false;
}
protected function parseDate(string $date): ?Date {
@ -64,6 +64,38 @@ trait Construct {
return $out ?: null;
}
protected function parseMediaType(string $type, ?Url $url = null): ?string {
if (preg_match('<^\s*([0-9a-z]+(?:/[!#$%&\'\*\+\-\.^_`|~0-9a-z]+))(?:\s|;|$)>i', $type, $match)) {
/* NOTE: The pattern used here is a subset of what is
technically allowed by RFC 7231: the "type" portion
is supposed to be as general as the "subtype" portion,
but in practice only alphabetic types have ever been
registered, making a more specific pattern more
practically useful for detecting media types.
See:
<https://tools.ietf.org/html/rfc7231#section-3.1.1.1>
<https://tools.ietf.org/html/rfc7230#section-3.2.6>
Additionally, types without subtypes are accepted as
we foresee the general type still being useful to
feed processors.
*/
return strtolower($match[1]);
}
if ($url) {
$file = substr($url->getPath(), (int) strrpos($url->getPath(), "/"));
$ext = strrpos($file, ".");
if ($ext !== false) {
$ext = substr($file, $ext + 1);
if (strlen($ext)) {
return ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext);
}
}
}
return null;
}
protected function empty($o): bool {
return !array_filter((array) $o, function($v) {
return !is_null($v) && (!$v instanceof Collection || sizeof($v) > 0);

14
lib/Parser/JSON/Construct.php

@ -19,7 +19,7 @@ trait Construct {
*
* Returns null otherwise
*/
protected function fetchMember(string $key, string $type, \stdClass $obj = null) {
protected function fetchMember(string $key, string $type, ?\stdClass $obj = null) {
$obj = $obj ?? $this->data;
if (!isset($obj->$key)) {
return null;
@ -34,7 +34,7 @@ trait Construct {
}
/** Returns an object member as a resolved and normalized URL */
protected function fetchUrl(string $key, \stdClass $obj = null): ?Url {
protected function fetchUrl(string $key, ?\stdClass $obj = null): ?Url {
$url = $this->fetchMember($key, "str", $obj);
try {
return (!is_null($url)) ? new Url($url, $this->url) : null;
@ -43,13 +43,19 @@ trait Construct {
}
}
/** Returns a media type from an object member or from a URL's file name when possible */
protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?string {
$type = $this->fetchMember($key, "str", $obj) ?? "";
return $this->parseMediaType($type, $url);
}
/** Returns an object member as a parsed date */
protected function fetchDate(string $key, \stdClass $obj = null): ?Date {
protected function fetchDate(string $key, ?\stdClass $obj = null): ?Date {
return $this->parseDate($this->fetchMember($key, "str", $obj) ?? "");
}
/** Returns a plain-text string object member wrapped in a Text object */
protected function fetchText(string $key, \stdClass $obj = null): ?Text {
protected function fetchText(string $key, ?\stdClass $obj = null): ?Text {
$t = $this->fetchMember($key, "str", $obj);
if (!is_null($t)) {
return new Text($t);

7
lib/Parser/JSON/Entry.php

@ -147,15 +147,10 @@ class Entry implements \JKingWeb\Lax\Parser\Entry {
if ($url) {
$m = new Enclosure;
$m->url = $url;
$m->type = $this->fetchMember("mime_type", "str", $attachment);
$m->type = $this->fetchType("mime_type", $url, $attachment);
$m->title = $this->fetchMember("title", "str", $attachment);
$m->size = $this->fetchMember("size_in_bytes", "int", $attachment);
$m->duration = $this->fetchMember("duration_in_seconds", "int", $attachment);
// detect media type from file name if no type is provided
if (!$m->type) {
$ext = substr();
$m->type = ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext);
}
$out[] = $m;
}
}

Loading…
Cancel
Save