Browse Source

Replace Content-Type parser with proper version

ns
J. King 4 years ago
parent
commit
ad0a8ae27a
  1. 5
      composer.json
  2. 90
      composer.lock
  3. 58
      lib/Charset.php

5
composer.json

@ -3,9 +3,10 @@
"description": "Parses HTML5 text into a php DOMDocument", "description": "Parses HTML5 text into a php DOMDocument",
"type": "library", "type": "library",
"require": { "require": {
"php": "^7.0", "php": "^7.1",
"ext-dom": "*", "ext-dom": "*",
"mensbeam/intl": "*" "mensbeam/intl": "*",
"mensbeam/mimesniff": "^0.2.0"
}, },
"suggest": { "suggest": {
"ext-ctype": "Improved performance" "ext-ctype": "Improved performance"

90
composer.lock

@ -4,16 +4,22 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "037fcc961f6e8f68b15379be21f45854", "content-hash": "96a5c225aba74182ddfac27fa5cee1e8",
"packages": [ "packages": [
{ {
"name": "mensbeam/intl", "name": "mensbeam/intl",
"version": "0.7.0", "version": "0.7.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://code.mensbeam.com/MensBeam/intl", "url": "https://github.com/mensbeam/intl.git",
"reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e" "reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e"
}, },
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/intl/zipball/c4a2ae17142a1846c841c5b5b9246e67d56c568e",
"reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e",
"shasum": ""
},
"require": { "require": {
"php": "^7.0" "php": "^7.0"
}, },
@ -50,36 +56,80 @@
"utf8" "utf8"
], ],
"time": "2019-12-21T01:56:59+00:00" "time": "2019-12-21T01:56:59+00:00"
},
{
"name": "mensbeam/mimesniff",
"version": "0.2.0",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/mime.git",
"reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/mime/zipball/d3f6c031687d532c1bb32e3eb362da4a948d4750",
"reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750",
"shasum": ""
},
"require": {
"php": "^7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Mime\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "An implementation of the WHATWG MIME Sniffing specification",
"keywords": [
"WHATWG",
"mime",
"mimesniff"
],
"time": "2020-04-19T16:07:41+00:00"
} }
], ],
"packages-dev": [ "packages-dev": [
{ {
"name": "bamarni/composer-bin-plugin", "name": "bamarni/composer-bin-plugin",
"version": "v1.3.0", "version": "1.4.1",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/bamarni/composer-bin-plugin.git", "url": "https://github.com/bamarni/composer-bin-plugin.git",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24" "reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/67f9d314dc7ecf7245b8637906e151ccc62b8d24", "url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/9329fb0fbe29e0e1b2db8f4639a193e4f5406225",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24", "reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
"composer-plugin-api": "^1.0" "composer-plugin-api": "^1.0 || ^2.0",
"php": "^5.5.9 || ^7.0 || ^8.0"
}, },
"require-dev": { "require-dev": {
"composer/composer": "dev-master", "composer/composer": "^1.0 || ^2.0",
"symfony/console": "^2.5 || ^3.0 || ^4.0" "symfony/console": "^2.5 || ^3.0 || ^4.0"
}, },
"type": "composer-plugin", "type": "composer-plugin",
"extra": { "extra": {
"class": "Bamarni\\Composer\\Bin\\Plugin", "class": "Bamarni\\Composer\\Bin\\Plugin"
"branch-alias": {
"dev-master": "1.1-dev"
}
}, },
"autoload": { "autoload": {
"psr-4": { "psr-4": {
@ -90,7 +140,16 @@
"license": [ "license": [
"MIT" "MIT"
], ],
"time": "2019-03-17T12:38:04+00:00" "description": "No conflicts for your bin dependencies",
"keywords": [
"composer",
"conflict",
"dependency",
"executable",
"isolation",
"tool"
],
"time": "2020-05-03T08:27:20+00:00"
} }
], ],
"aliases": [], "aliases": [],
@ -99,8 +158,9 @@
"prefer-stable": false, "prefer-stable": false,
"prefer-lowest": false, "prefer-lowest": false,
"platform": { "platform": {
"php": "^7.0", "php": "^7.1",
"ext-dom": "*" "ext-dom": "*"
}, },
"platform-dev": [] "platform-dev": [],
"plugin-api-version": "1.1.0"
} }

58
lib/Charset.php

@ -3,6 +3,7 @@ declare(strict_types=1);
namespace dW\HTML5; namespace dW\HTML5;
use MensBeam\Intl\Encoding; use MensBeam\Intl\Encoding;
use MensBeam\Mime\MimeType;
abstract class Charset { abstract class Charset {
/** Finds a Unicode byte order mark by a byte stream /** Finds a Unicode byte order mark by a byte stream
@ -37,58 +38,11 @@ abstract class Charset {
* @param string $contentType The value of a Content-Type header-field * @param string $contentType The value of a Content-Type header-field
*/ */
public static function fromTransport(string $contentType): ?string { public static function fromTransport(string $contentType): ?string {
// Try to sniff out a charset from a Content-Type header-field. $type = MimeType::parseBytes($contentType);
// This does cut some corners, but should be sufficient for practical use if ($type && isset($type->params['charset'])) {
$s = preg_replace("/\s+/", " ", strtolower($contentType)); $encoding = Encoding::matchLabel($type->params['charset']);
$pos = 0; if ($encoding) {
$end = strlen($s); return $encoding['name'];
// skip the type
while ($pos < $end && @$s[$pos++] !== "/");
// skip the subtype
while ($pos < $end && @$s[$pos++] !== ";");
// check parameters in sequence
while ($pos < $end) {
// skip any leading whitespace
if (@$s[$pos] === " ") {
$pos++;
}
// collect characters for the parameter name
$param = "";
while ($pos < $end && @$s[$pos] !== "=") {
$param .= @$s[$pos++];
}
// skip the equals sign
$pos++;
if ($s[$pos] === '"') {
// Value is a quoted-string
$pos++;
$value = "";
while (!in_array($c = @$s[$pos++], ['"', ""])) {
if ($c === "\\") {
$value .= @$s[$pos++];
} else {
$value .= $c;
}
}
// only interpret the value if a closing quotation mark was seen
if ($c !== '"') {
$value = "";
}
} else {
// Value is a bare token
$value = "";
while (!in_array($c = @$s[$pos++], [';', " ", ""])) {
$value .= $c;
}
}
// if the parameter was the character set, interpret its value and return
if ($param === "charset") {
$encoding = Encoding::matchLabel($value);
if ($encoding) {
return $encoding['name'];
} else {
return null;
}
} }
} }
return null; return null;

Loading…
Cancel
Save