From f9e3d795a74e9db9dd3a6c6528634c816e2af0eb Mon Sep 17 00:00:00 2001 From: "J. King" Date: Fri, 20 Dec 2019 18:32:44 -0500 Subject: [PATCH] Add label matcher --- CHANGELOG | 6 +++--- lib/Encoding.php | 36 ++++++++++++++++++++++++++++++++++++ tools/mkindex.php | 4 ++++ tools/mklabels.php | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 lib/Encoding.php create mode 100644 tools/mklabels.php diff --git a/CHANGELOG b/CHANGELOG index 93e96f8..b888cf4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,9 @@ -Version 0.6.0 (2019-12-18) +Version 0.7.0 (2019-12-20) ========================== New features: -- Added $allowSurrogates parameter to Encoding constructor -- Added posErr public instance property to Encoding +- Added \MensBeam\Intl\Encoding abstract class with createDecoder() and + matchLabel() static methods Version 0.5.0 (2019-12-13) ========================== diff --git a/lib/Encoding.php b/lib/Encoding.php new file mode 100644 index 0000000..b613742 --- /dev/null +++ b/lib/Encoding.php @@ -0,0 +1,36 @@ +"Big5",'big5-hkscs'=>"Big5",'cn-big5'=>"Big5",'csbig5'=>"Big5",'x-x-big5'=>"Big5",'cseuckr'=>"EUC-KR",'csksc56011987'=>"EUC-KR",'euc-kr'=>"EUC-KR",'iso-ir-149'=>"EUC-KR",'korean'=>"EUC-KR",'ks_c_5601-1987'=>"EUC-KR",'ks_c_5601-1989'=>"EUC-KR",'ksc5601'=>"EUC-KR",'ksc_5601'=>"EUC-KR",'windows-949'=>"EUC-KR",'gb18030'=>"gb18030",'chinese'=>"GBK",'csgb2312'=>"GBK",'csiso58gb231280'=>"GBK",'gb2312'=>"GBK",'gb_2312'=>"GBK",'gb_2312-80'=>"GBK",'gbk'=>"GBK",'iso-ir-58'=>"GBK",'x-gbk'=>"GBK",'866'=>"IBM866",'cp866'=>"IBM866",'csibm866'=>"IBM866",'ibm866'=>"IBM866",'csisolatin6'=>"ISO-8859-10",'iso-8859-10'=>"ISO-8859-10",'iso-ir-157'=>"ISO-8859-10",'iso8859-10'=>"ISO-8859-10",'iso885910'=>"ISO-8859-10",'l6'=>"ISO-8859-10",'latin6'=>"ISO-8859-10",'iso-8859-13'=>"ISO-8859-13",'iso8859-13'=>"ISO-8859-13",'iso885913'=>"ISO-8859-13",'iso-8859-14'=>"ISO-8859-14",'iso8859-14'=>"ISO-8859-14",'iso885914'=>"ISO-8859-14",'csisolatin9'=>"ISO-8859-15",'iso-8859-15'=>"ISO-8859-15",'iso8859-15'=>"ISO-8859-15",'iso885915'=>"ISO-8859-15",'iso_8859-15'=>"ISO-8859-15",'l9'=>"ISO-8859-15",'iso-8859-16'=>"ISO-8859-16",'csisolatin2'=>"ISO-8859-2",'iso-8859-2'=>"ISO-8859-2",'iso-ir-101'=>"ISO-8859-2",'iso8859-2'=>"ISO-8859-2",'iso88592'=>"ISO-8859-2",'iso_8859-2'=>"ISO-8859-2",'iso_8859-2:1987'=>"ISO-8859-2",'l2'=>"ISO-8859-2",'latin2'=>"ISO-8859-2",'csisolatin3'=>"ISO-8859-3",'iso-8859-3'=>"ISO-8859-3",'iso-ir-109'=>"ISO-8859-3",'iso8859-3'=>"ISO-8859-3",'iso88593'=>"ISO-8859-3",'iso_8859-3'=>"ISO-8859-3",'iso_8859-3:1988'=>"ISO-8859-3",'l3'=>"ISO-8859-3",'latin3'=>"ISO-8859-3",'csisolatin4'=>"ISO-8859-4",'iso-8859-4'=>"ISO-8859-4",'iso-ir-110'=>"ISO-8859-4",'iso8859-4'=>"ISO-8859-4",'iso88594'=>"ISO-8859-4",'iso_8859-4'=>"ISO-8859-4",'iso_8859-4:1988'=>"ISO-8859-4",'l4'=>"ISO-8859-4",'latin4'=>"ISO-8859-4",'csisolatincyrillic'=>"ISO-8859-5",'cyrillic'=>"ISO-8859-5",'iso-8859-5'=>"ISO-8859-5",'iso-ir-144'=>"ISO-8859-5",'iso8859-5'=>"ISO-8859-5",'iso88595'=>"ISO-8859-5",'iso_8859-5'=>"ISO-8859-5",'iso_8859-5:1988'=>"ISO-8859-5",'arabic'=>"ISO-8859-6",'asmo-708'=>"ISO-8859-6",'csiso88596e'=>"ISO-8859-6",'csiso88596i'=>"ISO-8859-6",'csisolatinarabic'=>"ISO-8859-6",'ecma-114'=>"ISO-8859-6",'iso-8859-6'=>"ISO-8859-6",'iso-8859-6-e'=>"ISO-8859-6",'iso-8859-6-i'=>"ISO-8859-6",'iso-ir-127'=>"ISO-8859-6",'iso8859-6'=>"ISO-8859-6",'iso88596'=>"ISO-8859-6",'iso_8859-6'=>"ISO-8859-6",'iso_8859-6:1987'=>"ISO-8859-6",'csisolatingreek'=>"ISO-8859-7",'ecma-118'=>"ISO-8859-7",'elot_928'=>"ISO-8859-7",'greek'=>"ISO-8859-7",'greek8'=>"ISO-8859-7",'iso-8859-7'=>"ISO-8859-7",'iso-ir-126'=>"ISO-8859-7",'iso8859-7'=>"ISO-8859-7",'iso88597'=>"ISO-8859-7",'iso_8859-7'=>"ISO-8859-7",'iso_8859-7:1987'=>"ISO-8859-7",'sun_eu_greek'=>"ISO-8859-7",'csiso88598e'=>"ISO-8859-8",'csisolatinhebrew'=>"ISO-8859-8",'hebrew'=>"ISO-8859-8",'iso-8859-8'=>"ISO-8859-8",'iso-8859-8-e'=>"ISO-8859-8",'iso-ir-138'=>"ISO-8859-8",'iso8859-8'=>"ISO-8859-8",'iso88598'=>"ISO-8859-8",'iso_8859-8'=>"ISO-8859-8",'iso_8859-8:1988'=>"ISO-8859-8",'visual'=>"ISO-8859-8",'csiso88598i'=>"ISO-8859-8-I",'iso-8859-8-i'=>"ISO-8859-8-I",'logical'=>"ISO-8859-8-I",'cskoi8r'=>"KOI8-R",'koi'=>"KOI8-R",'koi8'=>"KOI8-R",'koi8-r'=>"KOI8-R",'koi8_r'=>"KOI8-R",'koi8-ru'=>"KOI8-U",'koi8-u'=>"KOI8-U",'csmacintosh'=>"macintosh",'mac'=>"macintosh",'macintosh'=>"macintosh",'x-mac-roman'=>"macintosh",'utf-16be'=>"UTF-16BE",'utf-16'=>"UTF-16LE",'utf-16le'=>"UTF-16LE",'unicode-1-1-utf-8'=>"UTF-8",'utf-8'=>"UTF-8",'utf8'=>"UTF-8",'cp1250'=>"windows-1250",'windows-1250'=>"windows-1250",'x-cp1250'=>"windows-1250",'cp1251'=>"windows-1251",'windows-1251'=>"windows-1251",'x-cp1251'=>"windows-1251",'ansi_x3.4-1968'=>"windows-1252",'ascii'=>"windows-1252",'cp1252'=>"windows-1252",'cp819'=>"windows-1252",'csisolatin1'=>"windows-1252",'ibm819'=>"windows-1252",'iso-8859-1'=>"windows-1252",'iso-ir-100'=>"windows-1252",'iso8859-1'=>"windows-1252",'iso88591'=>"windows-1252",'iso_8859-1'=>"windows-1252",'iso_8859-1:1987'=>"windows-1252",'l1'=>"windows-1252",'latin1'=>"windows-1252",'us-ascii'=>"windows-1252",'windows-1252'=>"windows-1252",'x-cp1252'=>"windows-1252",'cp1253'=>"windows-1253",'windows-1253'=>"windows-1253",'x-cp1253'=>"windows-1253",'cp1254'=>"windows-1254",'csisolatin5'=>"windows-1254",'iso-8859-9'=>"windows-1254",'iso-ir-148'=>"windows-1254",'iso8859-9'=>"windows-1254",'iso88599'=>"windows-1254",'iso_8859-9'=>"windows-1254",'iso_8859-9:1989'=>"windows-1254",'l5'=>"windows-1254",'latin5'=>"windows-1254",'windows-1254'=>"windows-1254",'x-cp1254'=>"windows-1254",'cp1255'=>"windows-1255",'windows-1255'=>"windows-1255",'x-cp1255'=>"windows-1255",'cp1256'=>"windows-1256",'windows-1256'=>"windows-1256",'x-cp1256'=>"windows-1256",'cp1257'=>"windows-1257",'windows-1257'=>"windows-1257",'x-cp1257'=>"windows-1257",'cp1258'=>"windows-1258",'windows-1258'=>"windows-1258",'x-cp1258'=>"windows-1258",'dos-874'=>"windows-874",'iso-8859-11'=>"windows-874",'iso8859-11'=>"windows-874",'iso885911'=>"windows-874",'tis-620'=>"windows-874",'windows-874'=>"windows-874",'x-mac-cyrillic'=>"x-mac-cyrillic",'x-mac-ukrainian'=>"x-mac-cyrillic",'x-user-defined'=>"x-user-defined"]; + const NAME_MAP = ['Big5'=>\MensBeam\Intl\Encoding\Big5::class,'EUC-KR'=>\MensBeam\Intl\Encoding\EUCKR::class,'gb18030'=>\MensBeam\Intl\Encoding\GB18030::class,'GBK'=>\MensBeam\Intl\Encoding\GBK::class,'IBM866'=>\MensBeam\Intl\Encoding\IBM866::class,'ISO-8859-10'=>\MensBeam\Intl\Encoding\ISO885910::class,'ISO-8859-13'=>\MensBeam\Intl\Encoding\ISO885913::class,'ISO-8859-14'=>\MensBeam\Intl\Encoding\ISO885914::class,'ISO-8859-15'=>\MensBeam\Intl\Encoding\ISO885915::class,'ISO-8859-16'=>\MensBeam\Intl\Encoding\ISO885916::class,'ISO-8859-2'=>\MensBeam\Intl\Encoding\ISO88592::class,'ISO-8859-3'=>\MensBeam\Intl\Encoding\ISO88593::class,'ISO-8859-4'=>\MensBeam\Intl\Encoding\ISO88594::class,'ISO-8859-5'=>\MensBeam\Intl\Encoding\ISO88595::class,'ISO-8859-6'=>\MensBeam\Intl\Encoding\ISO88596::class,'ISO-8859-7'=>\MensBeam\Intl\Encoding\ISO88597::class,'ISO-8859-8'=>\MensBeam\Intl\Encoding\ISO88598::class,'ISO-8859-8-I'=>\MensBeam\Intl\Encoding\ISO88598I::class,'KOI8-R'=>\MensBeam\Intl\Encoding\KOI8R::class,'KOI8-U'=>\MensBeam\Intl\Encoding\KOI8U::class,'macintosh'=>\MensBeam\Intl\Encoding\Macintosh::class,'UTF-16BE'=>\MensBeam\Intl\Encoding\UTF16BE::class,'UTF-16LE'=>\MensBeam\Intl\Encoding\UTF16LE::class,'UTF-8'=>\MensBeam\Intl\Encoding\UTF8::class,'windows-1250'=>\MensBeam\Intl\Encoding\Windows1250::class,'windows-1251'=>\MensBeam\Intl\Encoding\Windows1251::class,'windows-1252'=>\MensBeam\Intl\Encoding\Windows1252::class,'windows-1253'=>\MensBeam\Intl\Encoding\Windows1253::class,'windows-1254'=>\MensBeam\Intl\Encoding\Windows1254::class,'windows-1255'=>\MensBeam\Intl\Encoding\Windows1255::class,'windows-1256'=>\MensBeam\Intl\Encoding\Windows1256::class,'windows-1257'=>\MensBeam\Intl\Encoding\Windows1257::class,'windows-1258'=>\MensBeam\Intl\Encoding\Windows1258::class,'windows-874'=>\MensBeam\Intl\Encoding\Windows874::class,'x-mac-cyrillic'=>\MensBeam\Intl\Encoding\XMacCyrillic::class,'x-user-defined'=>\MensBeam\Intl\Encoding\XUserDefined::class]; + + public static function createDecoder(string $encodingLabel, string $data, bool $fatal = false, bool $allowSurrogates = false): ?\MensBeam\Intl\Encoding\Encoding { + $encoding = self::matchLabel($encodingLabel); + if ($encoding) { + $class = $encoding['class']; + return new $class($data, $fatal, $allowSurrogates); + } else { + return null; + } + } + + public static function matchLabel(string $label): ?array { + $label = strtolower(trim($label)); + $name = self::LABEL_MAP[$label] ?? null; + if ($name) { + return [ + 'label' => $label, + 'name' => $name, + 'class' => self::NAME_MAP[$name], + ]; + } else { + return null; + } + } +} diff --git a/tools/mkindex.php b/tools/mkindex.php index 1643632..2701a21 100644 --- a/tools/mkindex.php +++ b/tools/mkindex.php @@ -1,4 +1,7 @@ "big5", //'euc-jp' => "eucjp", @@ -160,6 +163,7 @@ function make_decoder_point_array(array $entries): string { function make_decoder_char_array(array $entries): string { $out = []; + $i = 0; foreach ($entries as $match) { $index = (int) $match[1]; $code = $match[2]; diff --git a/tools/mklabels.php b/tools/mklabels.php new file mode 100644 index 0000000..b090cfe --- /dev/null +++ b/tools/mklabels.php @@ -0,0 +1,40 @@ +implementsInterface(Encoding::class) && $class->isInstantiable()) { + $name = $class->getConstant("NAME"); + $names[$name] = $className; + foreach ($class->getConstant("LABELS") as $label) { + $labels[$label] = $name; + } + } +} + +$labelList = []; +foreach ($labels as $k => $v) { + $labelList[] = "'$k'=>\"$v\""; +} +$labelList = "const LABEL_MAP = [".implode(",", $labelList)."];"; + +$nameList = []; +foreach ($names as $k => $v) { + $nameList[] = "'$k'=>$v::class"; +} +$nameList = "const NAME_MAP = [".implode(",", $nameList)."];"; + +echo "$labelList\n"; +echo "$nameList\n";