Browse Source

Replace Content-Type parser with proper version

ns
J. King 4 years ago
parent
commit
ad0a8ae27a
  1. 5
      composer.json
  2. 90
      composer.lock
  3. 58
      lib/Charset.php

5
composer.json

@ -3,9 +3,10 @@
"description": "Parses HTML5 text into a php DOMDocument",
"type": "library",
"require": {
"php": "^7.0",
"php": "^7.1",
"ext-dom": "*",
"mensbeam/intl": "*"
"mensbeam/intl": "*",
"mensbeam/mimesniff": "^0.2.0"
},
"suggest": {
"ext-ctype": "Improved performance"

90
composer.lock

@ -4,16 +4,22 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "037fcc961f6e8f68b15379be21f45854",
"content-hash": "96a5c225aba74182ddfac27fa5cee1e8",
"packages": [
{
"name": "mensbeam/intl",
"version": "0.7.0",
"source": {
"type": "git",
"url": "https://code.mensbeam.com/MensBeam/intl",
"url": "https://github.com/mensbeam/intl.git",
"reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/intl/zipball/c4a2ae17142a1846c841c5b5b9246e67d56c568e",
"reference": "c4a2ae17142a1846c841c5b5b9246e67d56c568e",
"shasum": ""
},
"require": {
"php": "^7.0"
},
@ -50,36 +56,80 @@
"utf8"
],
"time": "2019-12-21T01:56:59+00:00"
},
{
"name": "mensbeam/mimesniff",
"version": "0.2.0",
"source": {
"type": "git",
"url": "https://github.com/mensbeam/mime.git",
"reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mensbeam/mime/zipball/d3f6c031687d532c1bb32e3eb362da4a948d4750",
"reference": "d3f6c031687d532c1bb32e3eb362da4a948d4750",
"shasum": ""
},
"require": {
"php": "^7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.3",
"ext-intl": "*"
},
"type": "library",
"autoload": {
"psr-4": {
"MensBeam\\Mime\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "J. King",
"email": "jking@jkingweb.ca",
"homepage": "https://jkingweb.ca/"
}
],
"description": "An implementation of the WHATWG MIME Sniffing specification",
"keywords": [
"WHATWG",
"mime",
"mimesniff"
],
"time": "2020-04-19T16:07:41+00:00"
}
],
"packages-dev": [
{
"name": "bamarni/composer-bin-plugin",
"version": "v1.3.0",
"version": "1.4.1",
"source": {
"type": "git",
"url": "https://github.com/bamarni/composer-bin-plugin.git",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24"
"reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/67f9d314dc7ecf7245b8637906e151ccc62b8d24",
"reference": "67f9d314dc7ecf7245b8637906e151ccc62b8d24",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/9329fb0fbe29e0e1b2db8f4639a193e4f5406225",
"reference": "9329fb0fbe29e0e1b2db8f4639a193e4f5406225",
"shasum": ""
},
"require": {
"composer-plugin-api": "^1.0"
"composer-plugin-api": "^1.0 || ^2.0",
"php": "^5.5.9 || ^7.0 || ^8.0"
},
"require-dev": {
"composer/composer": "dev-master",
"composer/composer": "^1.0 || ^2.0",
"symfony/console": "^2.5 || ^3.0 || ^4.0"
},
"type": "composer-plugin",
"extra": {
"class": "Bamarni\\Composer\\Bin\\Plugin",
"branch-alias": {
"dev-master": "1.1-dev"
}
"class": "Bamarni\\Composer\\Bin\\Plugin"
},
"autoload": {
"psr-4": {
@ -90,7 +140,16 @@
"license": [
"MIT"
],
"time": "2019-03-17T12:38:04+00:00"
"description": "No conflicts for your bin dependencies",
"keywords": [
"composer",
"conflict",
"dependency",
"executable",
"isolation",
"tool"
],
"time": "2020-05-03T08:27:20+00:00"
}
],
"aliases": [],
@ -99,8 +158,9 @@
"prefer-stable": false,
"prefer-lowest": false,
"platform": {
"php": "^7.0",
"php": "^7.1",
"ext-dom": "*"
},
"platform-dev": []
"platform-dev": [],
"plugin-api-version": "1.1.0"
}

58
lib/Charset.php

@ -3,6 +3,7 @@ declare(strict_types=1);
namespace dW\HTML5;
use MensBeam\Intl\Encoding;
use MensBeam\Mime\MimeType;
abstract class Charset {
/** Finds a Unicode byte order mark by a byte stream
@ -37,58 +38,11 @@ abstract class Charset {
* @param string $contentType The value of a Content-Type header-field
*/
public static function fromTransport(string $contentType): ?string {
// Try to sniff out a charset from a Content-Type header-field.
// This does cut some corners, but should be sufficient for practical use
$s = preg_replace("/\s+/", " ", strtolower($contentType));
$pos = 0;
$end = strlen($s);
// skip the type
while ($pos < $end && @$s[$pos++] !== "/");
// skip the subtype
while ($pos < $end && @$s[$pos++] !== ";");
// check parameters in sequence
while ($pos < $end) {
// skip any leading whitespace
if (@$s[$pos] === " ") {
$pos++;
}
// collect characters for the parameter name
$param = "";
while ($pos < $end && @$s[$pos] !== "=") {
$param .= @$s[$pos++];
}
// skip the equals sign
$pos++;
if ($s[$pos] === '"') {
// Value is a quoted-string
$pos++;
$value = "";
while (!in_array($c = @$s[$pos++], ['"', ""])) {
if ($c === "\\") {
$value .= @$s[$pos++];
} else {
$value .= $c;
}
}
// only interpret the value if a closing quotation mark was seen
if ($c !== '"') {
$value = "";
}
} else {
// Value is a bare token
$value = "";
while (!in_array($c = @$s[$pos++], [';', " ", ""])) {
$value .= $c;
}
}
// if the parameter was the character set, interpret its value and return
if ($param === "charset") {
$encoding = Encoding::matchLabel($value);
if ($encoding) {
return $encoding['name'];
} else {
return null;
}
$type = MimeType::parseBytes($contentType);
if ($type && isset($type->params['charset'])) {
$encoding = Encoding::matchLabel($type->params['charset']);
if ($encoding) {
return $encoding['name'];
}
}
return null;

Loading…
Cancel
Save