diff --git a/lib/Parser/Construct.php b/lib/Parser/Construct.php index 1fdb11b..ff85946 100644 --- a/lib/Parser/Construct.php +++ b/lib/Parser/Construct.php @@ -8,6 +8,7 @@ namespace JKingWeb\Lax\Parser; use JKingWeb\Lax\Collection; use JKingWeb\Lax\Date; +use JKingWeb\Lax\Url; trait Construct { /** Trims plain text and collapses whitespace */ @@ -30,7 +31,7 @@ trait Construct { /** Tests whether a string is a valid e-mail address * - * Accepts IDN hosts and (with PHP 7.1 and above) Unicode localparts + * Accepts IDN hosts and Unicode localparts */ protected function validateMail(string $addr): bool { $out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match); @@ -39,13 +40,12 @@ trait Construct { } $local = $match[1]; $domain = $match[2]; - // PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformat first - $domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think) - if ($domain===false) { - return false; + // PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first + $domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think) + if ($domain !== false) {$addr = "$local@$domain"; + return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE); } - $addr = "$local@$domain"; - return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE); + return false; } protected function parseDate(string $date): ?Date { @@ -64,6 +64,38 @@ trait Construct { return $out ?: null; } + protected function parseMediaType(string $type, ?Url $url = null): ?string { + if (preg_match('<^\s*([0-9a-z]+(?:/[!#$%&\'\*\+\-\.^_`|~0-9a-z]+))(?:\s|;|$)>i', $type, $match)) { + /* NOTE: The pattern used here is a subset of what is + technically allowed by RFC 7231: the "type" portion + is supposed to be as general as the "subtype" portion, + but in practice only alphabetic types have ever been + registered, making a more specific pattern more + practically useful for detecting media types. + + See: + + + + Additionally, types without subtypes are accepted as + we foresee the general type still being useful to + feed processors. + */ + return strtolower($match[1]); + } + if ($url) { + $file = substr($url->getPath(), (int) strrpos($url->getPath(), "/")); + $ext = strrpos($file, "."); + if ($ext !== false) { + $ext = substr($file, $ext + 1); + if (strlen($ext)) { + return ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext); + } + } + } + return null; + } + protected function empty($o): bool { return !array_filter((array) $o, function($v) { return !is_null($v) && (!$v instanceof Collection || sizeof($v) > 0); diff --git a/lib/Parser/JSON/Construct.php b/lib/Parser/JSON/Construct.php index 9ee2322..93f510e 100644 --- a/lib/Parser/JSON/Construct.php +++ b/lib/Parser/JSON/Construct.php @@ -19,7 +19,7 @@ trait Construct { * * Returns null otherwise */ - protected function fetchMember(string $key, string $type, \stdClass $obj = null) { + protected function fetchMember(string $key, string $type, ?\stdClass $obj = null) { $obj = $obj ?? $this->data; if (!isset($obj->$key)) { return null; @@ -34,7 +34,7 @@ trait Construct { } /** Returns an object member as a resolved and normalized URL */ - protected function fetchUrl(string $key, \stdClass $obj = null): ?Url { + protected function fetchUrl(string $key, ?\stdClass $obj = null): ?Url { $url = $this->fetchMember($key, "str", $obj); try { return (!is_null($url)) ? new Url($url, $this->url) : null; @@ -43,13 +43,19 @@ trait Construct { } } + /** Returns a media type from an object member or from a URL's file name when possible */ + protected function fetchType(string $key, ?Url $url, ?\stdClass $obj = null): ?string { + $type = $this->fetchMember($key, "str", $obj) ?? ""; + return $this->parseMediaType($type, $url); + } + /** Returns an object member as a parsed date */ - protected function fetchDate(string $key, \stdClass $obj = null): ?Date { + protected function fetchDate(string $key, ?\stdClass $obj = null): ?Date { return $this->parseDate($this->fetchMember($key, "str", $obj) ?? ""); } /** Returns a plain-text string object member wrapped in a Text object */ - protected function fetchText(string $key, \stdClass $obj = null): ?Text { + protected function fetchText(string $key, ?\stdClass $obj = null): ?Text { $t = $this->fetchMember($key, "str", $obj); if (!is_null($t)) { return new Text($t); diff --git a/lib/Parser/JSON/Entry.php b/lib/Parser/JSON/Entry.php index dbc2e14..7a5b420 100644 --- a/lib/Parser/JSON/Entry.php +++ b/lib/Parser/JSON/Entry.php @@ -147,15 +147,10 @@ class Entry implements \JKingWeb\Lax\Parser\Entry { if ($url) { $m = new Enclosure; $m->url = $url; - $m->type = $this->fetchMember("mime_type", "str", $attachment); + $m->type = $this->fetchType("mime_type", $url, $attachment); $m->title = $this->fetchMember("title", "str", $attachment); $m->size = $this->fetchMember("size_in_bytes", "int", $attachment); $m->duration = $this->fetchMember("duration_in_seconds", "int", $attachment); - // detect media type from file name if no type is provided - if (!$m->type) { - $ext = substr(); - $m->type = ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext); - } $out[] = $m; } }