Browse Source

Initial integration of external media type parsing

master
J. King 4 years ago
parent
commit
86bfd93531
  1. 3
      composer.json
  2. 80
      composer.lock
  3. 44
      lib/Parser/Construct.php
  4. 148
      lib/Parser/MimeType.php
  5. 45
      lib/Parser/XML/Construct.php
  6. 20
      tests/cases/Util/Mime/README
  7. 3526
      tests/cases/Util/Mime/generated-mime-types.json
  8. 383
      tests/cases/Util/Mime/mime-types.json
  9. 42
      tests/cases/Util/MimeTypeTest.php

3
composer.json

@ -18,7 +18,8 @@
"ext-dom": "*",
"ext-intl": "*",
"ralouphie/mimey": "^2.1",
"psr/http-message": "^1.0"
"psr/http-message": "^1.0",
"mensbeam/mimesniff": "^0.1.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3"

80
composer.lock

@ -4,8 +4,54 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "a49fcefdbe1073b594c29260943b2919",
"content-hash": "3c8eb2468138219eddb42a853c7c0bc6",
"packages": [
{
"name": "mensbeam/mimesniff",
"version": "0.1.1",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/mime.git",
"reference": "cce75a26cba20fe9bc275237775f90a2a311abba"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/mime/zipball/cce75a26cba20fe9bc275237775f90a2a311abba",
"reference": "cce75a26cba20fe9bc275237775f90a2a311abba",
"shasum": ""
},
"require": {
"php": "^7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Mime\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "An implementation of the WHATWG MIME Sniffing specification",
"keywords": [
"WHATWG",
"mime",
"mimesniff"
],
"time": "2020-04-17T16:23:45+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
@ -100,31 +146,29 @@
"packages-dev": [
{
"name": "bamarni/composer-bin-plugin",
"version": "v1.3.0",
"version": "1.4.0",
"source": {
"type": "git",
"url": "https://github.com/bamarni/composer-bin-plugin.git",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24"
"reference": "46cb272590cc6b7f5947655063a7fd6ea097838b"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/67f9d314dc7ecf7245b8637906e151ccc62b8d24",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/46cb272590cc6b7f5947655063a7fd6ea097838b",
"reference": "46cb272590cc6b7f5947655063a7fd6ea097838b",
"shasum": ""
},
"require": {
"composer-plugin-api": "^1.0"
"composer-plugin-api": "^1.0 || ^2.0",
"php": "^5.6 || ^7.0 || ^8.0"
},
"require-dev": {
"composer/composer": "dev-master",
"composer/composer": "^1.0 || ^2.0",
"symfony/console": "^2.5 || ^3.0 || ^4.0"
},
"type": "composer-plugin",
"extra": {
"class": "Bamarni\\Composer\\Bin\\Plugin",
"branch-alias": {
"dev-master": "1.1-dev"
}
"class": "Bamarni\\Composer\\Bin\\Plugin"
},
"autoload": {
"psr-4": {
@ -135,7 +179,16 @@
"license": [
"MIT"
],
"time": "2019-03-17T12:38:04+00:00"
"description": "No conflicts for your bin dependencies",
"keywords": [
"composer",
"conflict",
"dependency",
"executable",
"isolation",
"tool"
],
"time": "2020-04-17T09:33:47+00:00"
}
],
"aliases": [],
@ -152,5 +205,6 @@
"platform-dev": [],
"platform-overrides": {
"php": "7.1.33"
}
},
"plugin-api-version": "1.1.0"
}

44
lib/Parser/Construct.php

@ -16,19 +16,6 @@ trait Construct {
return trim(preg_replace("<\s{2,}>s", " ", $text));
}
/** Takes an HTML string as input and returns a sanitized version of that string
*
* The $outputHtml parameter, when false, outputs only the plain-text content of the sanitized HTML
*/
protected function sanitizeString(string $markup, bool $outputHtml = true): string {
if (!preg_match("/<\S/", $markup)) {
// if the string does not appear to actually contain markup besides entities, we can skip most of the sanitization
return $outputHtml ? $markup : $this->trimText(html_entity_decode($markup, \ENT_QUOTES | \ENT_HTML5, "UTF-8"));
} else {
return "OOK!";
}
}
/** Tests whether a string is a valid e-mail address
*
* Accepts IDN hosts and Unicode localparts
@ -64,35 +51,8 @@ trait Construct {
}
protected function parseMediaType(string $type, ?Url $url = null): ?string {
if (preg_match('<^\s*([0-9a-z]+(?:/[!#$%&\'\*\+\-\.^_`|~0-9a-z]+)?)(?:\s|;|,|$)>i', $type, $match)) {
/* NOTE: The pattern used here is a subset of what is
technically allowed by RFC 7231: the "type" portion
is supposed to be as general as the "subtype" portion,
but in practice only alphabetic types have ever been
registered, making a more specific pattern more
practically useful for detecting media types.
See:
<https://tools.ietf.org/html/rfc7231#section-3.1.1.1>
<https://tools.ietf.org/html/rfc7230#section-3.2.6>
Additionally, types without subtypes are accepted as
we foresee the general type still being useful to
feed processors.
*/
return strtolower($match[1]);
}
if ($url && (strlen($url->getScheme()) && $url->host !== null)) {
$file = substr($url->getPath(), (int) strrpos($url->getPath(), "/"));
$ext = strrpos($file, ".");
if ($ext !== false) {
$ext = substr($file, $ext + 1);
if (strlen($ext)) {
return ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext);
}
}
} elseif ($url && $url->getScheme() === "data") {
return $this->parseMediaType($url->getPath()) ?? "text/plain";
if ($normalized = MimeType::parseLoose($type, $url)) {
return $normalized->essence;
}
return null;
}

148
lib/Parser/MimeType.php

@ -6,121 +6,59 @@
declare(strict_types=1);
namespace MensBeam\Lax\Parser;
use function PHPSTORM_META\type;
use MensBeam\Lax\Url;
/** @property-read string $essence */
class MimeType {
protected const TYPE_PATTERN = <<<'PATTERN'
<^
[\t\r\n ]* # optional leading whitespace
([^/]+) # type
/ # type/subtype delimiter
([^;]+) # subtype (possibly with trailing whitespace)
(;.*)? # optional parameters, to be parsed separately
[\t\r\n ]* # optional trailing whitespace
$>sx
PATTERN;
protected const PARAM_PATTERN = <<<'PATTERN'
<
[;\t\r\n ]* # parameter delimiter and leading whitespace, all optional
([^=;]*) # parameter name; may be empty
(?:= # parameter name/value delimiter
(
"(?:\\"|[^"])*(?:"|$)[^;]* # quoted parameter value and optional garbage
|[^;]* # unquoted parameter value (possibly with trailing whitespace)
)
)?
;? # optional trailing parameter delimiter
[\t\r\n ]* # optional trailing whitespace
>sx
PATTERN;
protected const TOKEN_PATTERN = '<^[A-Za-z0-9!#$%&\'*+\-\.\^_`|~]+$>s';
protected const BARE_VALUE_PATTERN = '<^[\t\x{20}-\x{7E}\x{80}-\x{FF}]+$>su';
protected const QUOTED_VALUE_PATTERN = '<^"((?:\\\"|[\t !\x{23}-\x{7E}\x{80}-\x{FF}])*)(?:"|$)>su';
protected const ESCAPE_PATTERN = '<\\\(.)>s';
/** {@inheritDoc} */
class MimeType extends \MensBeam\Mime\MimeType {
protected const MEDIUM_PATTERN = '<^[\t\r\n ]*(audio|video|image|text|application|document|executable)(?:$|[\t\r\n ;])>i';
protected const ATOM_TYPE_PATTERN = '<^\s*(|text|x?html)\s*$>i';
public $type = "";
public $subtype = "";
public $params = [];
private $essence;
public function __construct(string $type = "", string $subtype = "", array $params = []) {
$this->type = $type;
$this->subtype = $subtype;
$this->params = $params;
}
public function __get(string $name) {
if ($name === "essence") {
return $this->type."/".$this->subtype;
}
return $this->$name ?? null;
}
public function __toString(): string {
$out = $this->__get("essence");
if (is_array($this->params) && sizeof($this->params)) {
foreach ($this->params as $name => $value) {
$out .= ";$name=".(preg_match(self::TOKEN_PATTERN, $value) ? $value : '"'.str_replace(["\\", '"'], ["\\\\", "\\\""], $value).'"');
}
}
return $out;
}
public static function parse(string $mimeType): ?self {
if (preg_match(self::TYPE_PATTERN, $mimeType, $match)) {
[$mimeType, $type, $subtype, $params] = array_pad($match, 4, "");
if (strlen($type = static::parseHttpToken($type)) && strlen($subtype = static::parseHttpToken(rtrim($subtype, "\t\r\n ")))) {
return new static(strtolower($type), strtolower($subtype), static::parseParams($params));
}
}
return null;
}
protected static function parseParams(string $params): array {
$out = [];
if (preg_match_all(self::PARAM_PATTERN, $params, $matches, \PREG_SET_ORDER)) {
foreach ($matches as $match) {
[$param, $name, $value] = array_pad($match, 3, "");
$name = strtolower(static::parseHttpToken($name));
if (!strlen($name) || isset($out[$name])) {
continue;
} elseif (strlen($value) && $value[0] === '"') {
$value = static::parseHttpQuotedValue($value);
if (is_null($value)) {
continue;
}
} else {
$value = static::parseHttpBareValue($value);
if (!strlen($value)) {
continue;
protected static $mime;
/** Parses a MIME type, accepting types without a subtype */
public static function parseLoose(string $type, ?Url $url = null): ?self {
if ($normalized = self::parse($type)) {
return $normalized;
} elseif (preg_match(self::MEDIUM_PATTERN, $type, $match)) {
$type = strtolower($match[1]);
$type = ['document' => "text", 'executable' => "application"][$type] ?? $type;
return new self($type);
} elseif ($url && (strlen($url->getScheme()) && $url->host !== null)) {
$file = substr($url->getPath(), (int) strrpos($url->getPath(), "/"));
$ext = strrpos($file, ".");
if ($ext !== false) {
$ext = substr($file, $ext + 1);
if (strlen($ext)) {
$type = (self::$mime ?? (self::$mime = new \Mimey\MimeTypes))->getMimeType($ext);
if (!is_null($type)) {
return self::parse($type);
}
}
$out[$name] = $value;
}
} elseif ($url && $url->getScheme() === "data") {
$data = $url->getPath();
$candidate = substr($data, 0, (int) strpos($data, ","));
return self::parseLoose($candidate) ?? self::parse("text/plain");
}
return $out;
}
protected static function parseHttpToken(string $token): string {
if (preg_match(self::TOKEN_PATTERN, $token, $match)) {
return $token;
}
return "";
return null;
}
protected static function parseHttpBareValue(string $value): string {
$value = rtrim($value, "\t\r\n ");
if (preg_match(self::BARE_VALUE_PATTERN, $value, $match)) {
return $value;
/** Parses an Atom content type, which may be either a MIME type or the strings "text", "html", or "xhtml"
*
* If the supplied type is invalid "unknown/unknown" is returned
*/
public static function parseAtom(string $type): self {
if (preg_match(self::ATOM_TYPE_PATTERN, $type, $match)) {
$type = ['' => "text/plain", 'text' => "text/plain", 'html' => "text/html", 'xhtml' => "application/xhtml+xml"][$match[1]] ?? null;
assert(!is_null($type));
}
return "";
return self::parse($type) ?? self::parse("unknown/unknown");
}
protected static function parseHttpQuotedValue(string $value): ?string {
if (preg_match(self::QUOTED_VALUE_PATTERN, $value, $match)) {
return preg_replace(self::ESCAPE_PATTERN, '$1', $match[1]);
public function __get(string $name) {
if ($name === "essence") {
return $this->type.(strlen($this->subtype ?? "") ? "/".$this->subtype : "");
}
return null;
return $this->$name ?? null;
}
}

45
lib/Parser/XML/Construct.php

@ -12,6 +12,7 @@ use MensBeam\Lax\Person\Person;
use MensBeam\Lax\Person\Collection as PersonCollection;
use MensBeam\Lax\Text;
use MensBeam\Lax\Date;
use MensBeam\Lax\Parser\MimeType;
use MensBeam\Lax\Url;
abstract class Construct {
@ -275,29 +276,33 @@ abstract class Construct {
// get the content type; assume "text" if not provided
$type = trim($node->getAttribute("type"));
$type = $this->parseMediaType((!strlen($type)) ? "text" : $type);
if ($type === "text" || $type === "text/plain") {
if (is_null($out->plain)) {
$plain = $this->trimText($node->textContent);
if (strlen($plain)) {
$out->plain = $plain;
$populated = true;
switch (MimeType::parseAtom(trim($node->getAttribute("type")))->essence) {
case "text/plain":
if (is_null($out->plain)) {
$plain = $this->trimText($node->textContent);
if (strlen($plain)) {
$out->plain = $plain;
$populated = true;
}
}
}
} elseif ($type === "html" || $type === "text/html") {
if (is_null($out->html)) {
$html = trim($node->textContent);
if (strlen($html)) {
$out->html = $html;
$out->htmlBase = strlen($node->baseURI) ? $node->baseURI : null;
break;
case "text/html":
if (is_null($out->html)) {
$html = trim($node->textContent);
if (strlen($html)) {
$out->html = $html;
$out->htmlBase = strlen($node->baseURI) ? $node->baseURI : null;
$populated = true;
}
}
break;
case "application/xhtml+xml":
if (is_null($out->xhtml) && ($xhtml = $this->fetchElement("xhtml:div", $node))) {
$out->xhtml = $xhtml->ownerDocument->saveXML($xhtml);
$out->xhtmlBase = strlen($xhtml->baseURI) ? $xhtml->baseURI : null;
$populated = true;
}
}
} elseif ($type === "xhtml" || $type === "application/xhtml+xml") {
if (is_null($out->xhtml) && ($xhtml = $this->fetchElement("xhtml:div", $node))) {
$out->xhtml = $xhtml->ownerDocument->saveXML($xhtml);
$out->xhtmlBase = strlen($xhtml->baseURI) ? $xhtml->baseURI : null;
$populated = true;
}
break;
}
}
return $populated ? $out : null;

20
tests/cases/Util/Mime/README

@ -1,20 +0,0 @@
The Mime class used in Lax is an original work, but its test suite is
derived from an existing test corpus from the following source:
The Web Platform Test suite
<https://github.com/web-platform-tests/wpt/tree/62317fb983ca5687e4133d89f5523839fdab7f69/mimesniff/mime-types>
The license text is reproduced below, but please note that the license text
has changed since the last time the mimesniff portion of the test suite was
modified; the text included here is current as of 2019-04-25.
Copyright 2019 web-platform-tests contributors
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

3526
tests/cases/Util/Mime/generated-mime-types.json

File diff suppressed because it is too large

383
tests/cases/Util/Mime/mime-types.json

@ -1,383 +0,0 @@
[
"Basics",
{
"input": "text/html;charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "TEXT/HTML;CHARSET=GBK",
"output": "text/html;charset=GBK",
"navigable": true,
"encoding": "GBK"
},
"Legacy comment syntax",
{
"input": "text/html;charset=gbk(",
"output": "text/html;charset=\"gbk(\"",
"navigable": true,
"encoding": null
},
{
"input": "text/html;x=(;charset=gbk",
"output": "text/html;x=\"(\";charset=gbk",
"navigable": true,
"encoding": "GBK"
},
"Duplicate parameter",
{
"input": "text/html;charset=gbk;charset=windows-1255",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=();charset=GBK",
"output": "text/html;charset=\"()\"",
"navigable": true,
"encoding": null
},
"Spaces",
{
"input": "text/html;charset =gbk",
"output": "text/html",
"navigable": true,
"encoding": null
},
{
"input": "text/html ;charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html; charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset= gbk",
"output": "text/html;charset=\" gbk\"",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset= \"gbk\"",
"output": "text/html;charset=\" \\\"gbk\\\"\"",
"navigable": true,
"encoding": null
},
"0x0B and 0x0C",
{
"input": "text/html;charset=\u000Bgbk",
"output": "text/html",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset=\u000Cgbk",
"output": "text/html",
"navigable": true,
"encoding": null
},
{
"input": "text/html;\u000Bcharset=gbk",
"output": "text/html",
"navigable": true,
"encoding": null
},
{
"input": "text/html;\u000Ccharset=gbk",
"output": "text/html",
"navigable": true,
"encoding": null
},
"Single quotes are a token, not a delimiter",
{
"input": "text/html;charset='gbk'",
"output": "text/html;charset='gbk'",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset='gbk",
"output": "text/html;charset='gbk",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset=gbk'",
"output": "text/html;charset=gbk'",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset=';charset=GBK",
"output": "text/html;charset='",
"navigable": true,
"encoding": null
},
"Invalid parameters",
{
"input": "text/html;test;charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;test=;charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;';charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;\";charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html ; ; charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;;;;charset=gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset= \"\u007F;charset=GBK",
"output": "text/html;charset=GBK",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"\u007F;charset=foo\";charset=GBK",
"output": "text/html;charset=GBK",
"navigable": true,
"encoding": "GBK"
},
"Double quotes",
{
"input": "text/html;charset=\"gbk\"",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"gbk",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=gbk\"",
"output": "text/html;charset=\"gbk\\\"\"",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset=\" gbk\"",
"output": "text/html;charset=\" gbk\"",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"gbk \"",
"output": "text/html;charset=\"gbk \"",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"\\ gbk\"",
"output": "text/html;charset=\" gbk\"",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"\\g\\b\\k\"",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"gbk\"x",
"output": "text/html;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
{
"input": "text/html;charset=\"\";charset=GBK",
"output": "text/html;charset=\"\"",
"navigable": true,
"encoding": null
},
{
"input": "text/html;charset=\";charset=GBK",
"output": "text/html;charset=\";charset=GBK\"",
"navigable": true,
"encoding": null
},
"Unexpected code points",
{
"input": "text/html;charset={gbk}",
"output": "text/html;charset=\"{gbk}\"",
"navigable": true,
"encoding": null
},
"Parameter name longer than 127",
{
"input": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk",
"output": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk",
"navigable": true,
"encoding": "GBK"
},
"type/subtype longer than 127",
{
"input": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
"output": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"
},
"Valid",
{
"input": "!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
"output": "!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
},
{
"input": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\"",
"output": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\""
},
"End-of-file handling",
{
"input": "x/x;test",
"output": "x/x"
},
{
"input": "x/x;test=\"\\",
"output": "x/x;test=\"\\\\\""
},
"Whitespace (not handled by generated-mime-types.json or above)",
{
"input": "x/x;x= ",
"output": "x/x"
},
{
"input": "x/x;x=\t",
"output": "x/x"
},
{
"input": "x/x\n\r\t ;x=x",
"output": "x/x;x=x"
},
{
"input": "\n\r\t x/x;x=x\n\r\t ",
"output": "x/x;x=x"
},
{
"input": "x/x;\n\r\t x=x\n\r\t ;x=y",
"output": "x/x;x=x"
},
"Latin1",
{
"input": "text/html;test=\u00FF;charset=gbk",
"output": "text/html;test=\"\u00FF\";charset=gbk",
"navigable": true,
"encoding": "GBK"
},
">Latin1",
{
"input": "x/x;test=\uFFFD;x=x",
"output": "x/x;x=x"
},
"Failure",
{
"input": "\u000Bx/x",
"output": null
},
{
"input": "\u000Cx/x",
"output": null
},
{
"input": "x/x\u000B",
"output": null
},
{
"input": "x/x\u000C",
"output": null
},
{
"input": "",
"output": null
},
{
"input": "\t",
"output": null
},
{
"input": "/",
"output": null
},
{
"input": "bogus",
"output": null
},
{
"input": "bogus/",
"output": null
},
{
"input": "bogus/ ",
"output": null
},
{
"input": "bogus/bogus/;",
"output": null
},
{
"input": "</>",
"output": null
},
{
"input": "(/)",
"output": null
},
{
"input": "ÿ/ÿ",
"output": null
},
{
"input": "text/html(;doesnot=matter",
"output": null
},
{
"input": "{/}",
"output": null
},
{
"input": "\u0100/\u0100",
"output": null
},
{
"input": "text /html",
"output": null
},
{
"input": "text/ html",
"output": null
},
{
"input": "\"text/html\"",
"output": null
}
]

42
tests/cases/Util/MimeTypeTest.php

@ -1,42 +0,0 @@
<?php
/** @license MIT
* Copyright 2018 J. King
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Lax\TestCase\Util;
use MensBeam\Lax\Parser\MimeType as Mime;
/** @covers \MensBeam\Lax\Parser\Mime */
class MimeTypeTest extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideStandardTests */
public function testStandardTestSuite(string $input, ?string $exp): void {
if (is_null($exp)) {
$this->assertNull(Mime::parse($input));
} else {
$this->assertSame($exp, (string) Mime::parse($input));
}
}
public function provideStandardTests(): iterable {
foreach (new \GlobIterator(__DIR__."/Mime/*.json", \FilesystemIterator::CURRENT_AS_PATHNAME | \FilesystemIterator::KEY_AS_FILENAME) as $file => $path) {
$indexOffset = 0;
$description = "";
foreach (json_decode(file_get_contents($path)) as $index => $test) {
if (is_string($test)) {
// the array member is a description of the next member
// the index offset should be decremented, the description stored, and this entry skipped
$indexOffset--;
$description = $test;
continue;
} else {
$index += $indexOffset;
$description = $description ? ": $description" : "";
yield "$file #$index$description" => [$test->input, $test->output];
$description = null;
}
}
}
}
}
Loading…
Cancel
Save