diff --git a/composer.json b/composer.json index 6db5976..3cfe219 100644 --- a/composer.json +++ b/composer.json @@ -3,9 +3,10 @@ "description": "Parses HTML5 text into a php DOMDocument", "type": "library", "require": { - "php": "^7.0", + "php": "^7.1", "ext-dom": "*", - "mensbeam/intl": "*" + "mensbeam/intl": "*", + "mensbeam/mimesniff": "^0.2.0" }, "suggest": { "ext-ctype": "Improved performance" diff --git a/composer.lock b/composer.lock index 8a6eb1b..8ff9449 100644 --- a/composer.lock +++ b/composer.lock @@ -4,16 +4,22 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "037fcc961f6e8f68b15379be21f45854", + "content-hash": "96a5c225aba74182ddfac27fa5cee1e8", "packages": [ { "name": "mensbeam/intl", "version": "0.7.0", "source": { "type": "git", - "url": "https://code.mensbeam.com/MensBeam/intl", + "url": "https://github.com/mensbeam/intl.git", "reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e" }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/mensbeam/intl/zipball/c4a2ae17142a1846c841c5b5b9246e67d56c568e", + "reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e", + "shasum": "" + }, "require": { "php": "^7.0" }, @@ -50,36 +56,80 @@ "utf8" ], "time": "2019-12-21T01:56:59+00:00" + }, + { + "name": "mensbeam/mimesniff", + "version": "0.2.0", + "source": { + "type": "git", + "url": "https://github.com/mensbeam/mime.git", + "reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/mensbeam/mime/zipball/d3f6c031687d532c1bb32e3eb362da4a948d4750", + "reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750", + "shasum": "" + }, + "require": { + "php": "^7.1" + }, + "require-dev": { + "bamarni/composer-bin-plugin": "^1.3", + "ext-intl": "*" + }, + "type": "library", + "autoload": { + "psr-4": { + "MensBeam\\Mime\\": "lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "J. King", + "email": "jking@jkingweb.ca", + "homepage": "https://jkingweb.ca/" + } + ], + "description": "An implementation of the WHATWG MIME Sniffing specification", + "keywords": [ + "WHATWG", + "mime", + "mimesniff" + ], + "time": "2020-04-19T16:07:41+00:00" } ], "packages-dev": [ { "name": "bamarni/composer-bin-plugin", - "version": "v1.3.0", + "version": "1.4.1", "source": { "type": "git", "url": "https://github.com/bamarni/composer-bin-plugin.git", - "reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24" + "reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/67f9d314dc7ecf7245b8637906e151ccc62b8d24", - "reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24", + "url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/9329fb0fbe29e0e1b2db8f4639a193e4f5406225", + "reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225", "shasum": "" }, "require": { - "composer-plugin-api": "^1.0" + "composer-plugin-api": "^1.0 || ^2.0", + "php": "^5.5.9 || ^7.0 || ^8.0" }, "require-dev": { - "composer/composer": "dev-master", + "composer/composer": "^1.0 || ^2.0", "symfony/console": "^2.5 || ^3.0 || ^4.0" }, "type": "composer-plugin", "extra": { - "class": "Bamarni\\Composer\\Bin\\Plugin", - "branch-alias": { - "dev-master": "1.1-dev" - } + "class": "Bamarni\\Composer\\Bin\\Plugin" }, "autoload": { "psr-4": { @@ -90,7 +140,16 @@ "license": [ "MIT" ], - "time": "2019-03-17T12:38:04+00:00" + "description": "No conflicts for your bin dependencies", + "keywords": [ + "composer", + "conflict", + "dependency", + "executable", + "isolation", + "tool" + ], + "time": "2020-05-03T08:27:20+00:00" } ], "aliases": [], @@ -99,8 +158,9 @@ "prefer-stable": false, "prefer-lowest": false, "platform": { - "php": "^7.0", + "php": "^7.1", "ext-dom": "*" }, - "platform-dev": [] + "platform-dev": [], + "plugin-api-version": "1.1.0" } diff --git a/lib/Charset.php b/lib/Charset.php index 16c40b9..b00accc 100644 --- a/lib/Charset.php +++ b/lib/Charset.php @@ -3,6 +3,7 @@ declare(strict_types=1); namespace dW\HTML5; use MensBeam\Intl\Encoding; +use MensBeam\Mime\MimeType; abstract class Charset { /** Finds a Unicode byte order mark by a byte stream @@ -37,58 +38,11 @@ abstract class Charset { * @param string $contentType The value of a Content-Type header-field */ public static function fromTransport(string $contentType): ?string { - // Try to sniff out a charset from a Content-Type header-field. - // This does cut some corners, but should be sufficient for practical use - $s = preg_replace("/\s+/", " ", strtolower($contentType)); - $pos = 0; - $end = strlen($s); - // skip the type - while ($pos < $end && @$s[$pos++] !== "/"); - // skip the subtype - while ($pos < $end && @$s[$pos++] !== ";"); - // check parameters in sequence - while ($pos < $end) { - // skip any leading whitespace - if (@$s[$pos] === " ") { - $pos++; - } - // collect characters for the parameter name - $param = ""; - while ($pos < $end && @$s[$pos] !== "=") { - $param .= @$s[$pos++]; - } - // skip the equals sign - $pos++; - if ($s[$pos] === '"') { - // Value is a quoted-string - $pos++; - $value = ""; - while (!in_array($c = @$s[$pos++], ['"', ""])) { - if ($c === "\\") { - $value .= @$s[$pos++]; - } else { - $value .= $c; - } - } - // only interpret the value if a closing quotation mark was seen - if ($c !== '"') { - $value = ""; - } - } else { - // Value is a bare token - $value = ""; - while (!in_array($c = @$s[$pos++], [';', " ", ""])) { - $value .= $c; - } - } - // if the parameter was the character set, interpret its value and return - if ($param === "charset") { - $encoding = Encoding::matchLabel($value); - if ($encoding) { - return $encoding['name']; - } else { - return null; - } + $type = MimeType::parseBytes($contentType); + if ($type && isset($type->params['charset'])) { + $encoding = Encoding::matchLabel($type->params['charset']); + if ($encoding) { + return $encoding['name']; } } return null;