J. King
6 years ago
2 changed files with 309 additions and 0 deletions
@ -0,0 +1,272 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2018 J. King et al. |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace MensBeam\Intl\Encoding; |
|||
|
|||
class Label { |
|||
const LABELS = [ |
|||
'ansi_x3.4-1968' => "Windows1252", |
|||
'arabic' => "ISO88596", |
|||
'ascii' => "Windows1252", |
|||
'asmo-708' => "ISO88596", |
|||
'big5' => "Big5", |
|||
'big5-hkscs' => "Big5", |
|||
'chinese' => "GBK", |
|||
'cn-big5' => "Big5", |
|||
'cp1250' => "Windows1250", |
|||
'cp1251' => "Windows1251", |
|||
'cp1252' => "Windows1252", |
|||
'cp1253' => "Windows1253", |
|||
'cp1254' => "Windows1254", |
|||
'cp1255' => "Windows1255", |
|||
'cp1256' => "Windows1256", |
|||
'cp1257' => "Windows1257", |
|||
'cp1258' => "Windows1258", |
|||
'cp819' => "Windows1252", |
|||
'cp866' => "IBM866", |
|||
'csbig5' => "Big5", |
|||
'cseuckr' => "EUCKR", |
|||
'cseucpkdfmtjapanese' => "EUCJP", |
|||
'csgb2312' => "GBK", |
|||
'csibm866' => "IBM866", |
|||
'csiso2022jp' => "ISO2022JP", |
|||
'csiso2022kr' => "Replacement", |
|||
'csiso58gb231280' => "GBK", |
|||
'csiso88596e' => "ISO88596", |
|||
'csiso88596i' => "ISO88596", |
|||
'csiso88598e' => "ISO88598", |
|||
'csiso88598i' => "ISO88598I", |
|||
'csisolatin1' => "Windows1252", |
|||
'csisolatin2' => "ISO88592", |
|||
'csisolatin3' => "ISO88593", |
|||
'csisolatin4' => "ISO88594", |
|||
'csisolatin5' => "Windows1254", |
|||
'csisolatin6' => "ISO885910", |
|||
'csisolatin9' => "ISO885915", |
|||
'csisolatinarabic' => "ISO88596", |
|||
'csisolatincyrillic' => "ISO88595", |
|||
'csisolatingreek' => "ISO88597", |
|||
'csisolatinhebrew' => "ISO88598", |
|||
'cskoi8r' => "KOI8R", |
|||
'csksc56011987' => "EUCKR", |
|||
'csmacintosh' => "Macintosh", |
|||
'csshiftjis' => "ShiftJIS", |
|||
'cyrillic' => "ISO88595", |
|||
'dos-874' => "Windows874", |
|||
'ecma-114' => "ISO88596", |
|||
'ecma-118' => "ISO88597", |
|||
'elot_928' => "ISO88597", |
|||
'euc-jp' => "EUCJP", |
|||
'euc-kr' => "EUCKR", |
|||
'gb18030' => "GB18030", |
|||
'gb2312' => "GBK", |
|||
'gb_2312' => "GBK", |
|||
'gb_2312-80' => "GBK", |
|||
'gbk' => "GBK", |
|||
'greek' => "ISO88597", |
|||
'greek8' => "ISO88597", |
|||
'hebrew' => "ISO88598", |
|||
'hz-gb-2312' => "Replacement", |
|||
'ibm819' => "Windows1252", |
|||
'ibm866' => "IBM866", |
|||
'iso-2022-cn' => "Replacement", |
|||
'iso-2022-cn-ext' => "Replacement", |
|||
'iso-2022-jp' => "ISO2022JP", |
|||
'iso-2022-kr' => "Replacement", |
|||
'iso-8859-1' => "Windows1252", |
|||
'iso-8859-10' => "ISO885910", |
|||
'iso-8859-11' => "Windows874", |
|||
'iso-8859-13' => "ISO885913", |
|||
'iso-8859-14' => "ISO885914", |
|||
'iso-8859-15' => "ISO885915", |
|||
'iso-8859-16' => "ISO885916", |
|||
'iso-8859-2' => "ISO88592", |
|||
'iso-8859-3' => "ISO88593", |
|||
'iso-8859-4' => "ISO88594", |
|||
'iso-8859-5' => "ISO88595", |
|||
'iso-8859-6' => "ISO88596", |
|||
'iso-8859-6-e' => "ISO88596", |
|||
'iso-8859-6-i' => "ISO88596", |
|||
'iso-8859-7' => "ISO88597", |
|||
'iso-8859-8' => "ISO88598", |
|||
'iso-8859-8-e' => "ISO88598", |
|||
'iso-8859-8-i' => "ISO88598I", |
|||
'iso-8859-9' => "Windows1254", |
|||
'iso-ir-100' => "Windows1252", |
|||
'iso-ir-101' => "ISO88592", |
|||
'iso-ir-109' => "ISO88593", |
|||
'iso-ir-110' => "ISO88594", |
|||
'iso-ir-126' => "ISO88597", |
|||
'iso-ir-127' => "ISO88596", |
|||
'iso-ir-138' => "ISO88598", |
|||
'iso-ir-144' => "ISO88595", |
|||
'iso-ir-148' => "Windows1254", |
|||
'iso-ir-149' => "EUCKR", |
|||
'iso-ir-157' => "ISO885910", |
|||
'iso-ir-58' => "GBK", |
|||
'iso8859-1' => "Windows1252", |
|||
'iso8859-10' => "ISO885910", |
|||
'iso8859-11' => "Windows874", |
|||
'iso8859-13' => "ISO885913", |
|||
'iso8859-14' => "ISO885914", |
|||
'iso8859-15' => "ISO885915", |
|||
'iso8859-2' => "ISO88592", |
|||
'iso8859-3' => "ISO88593", |
|||
'iso8859-4' => "ISO88594", |
|||
'iso8859-5' => "ISO88595", |
|||
'iso8859-6' => "ISO88596", |
|||
'iso8859-7' => "ISO88597", |
|||
'iso8859-8' => "ISO88598", |
|||
'iso8859-9' => "Windows1254", |
|||
'iso88591' => "Windows1252", |
|||
'iso885910' => "ISO885910", |
|||
'iso885911' => "Windows874", |
|||
'iso885913' => "ISO885913", |
|||
'iso885914' => "ISO885914", |
|||
'iso885915' => "ISO885915", |
|||
'iso88592' => "ISO88592", |
|||
'iso88593' => "ISO88593", |
|||
'iso88594' => "ISO88594", |
|||
'iso88595' => "ISO88595", |
|||
'iso88596' => "ISO88596", |
|||
'iso88597' => "ISO88597", |
|||
'iso88598' => "ISO88598", |
|||
'iso88599' => "Windows1254", |
|||
'iso_8859-1' => "Windows1252", |
|||
'iso_8859-15' => "ISO885915", |
|||
'iso_8859-1:1987' => "Windows1252", |
|||
'iso_8859-2' => "ISO88592", |
|||
'iso_8859-2:1987' => "ISO88592", |
|||
'iso_8859-3' => "ISO88593", |
|||
'iso_8859-3:1988' => "ISO88593", |
|||
'iso_8859-4' => "ISO88594", |
|||
'iso_8859-4:1988' => "ISO88594", |
|||
'iso_8859-5' => "ISO88595", |
|||
'iso_8859-5:1988' => "ISO88595", |
|||
'iso_8859-6' => "ISO88596", |
|||
'iso_8859-6:1987' => "ISO88596", |
|||
'iso_8859-7' => "ISO88597", |
|||
'iso_8859-7:1987' => "ISO88597", |
|||
'iso_8859-8' => "ISO88598", |
|||
'iso_8859-8:1988' => "ISO88598", |
|||
'iso_8859-9' => "Windows1254", |
|||
'iso_8859-9:1989' => "Windows1254", |
|||
'koi' => "KOI8R", |
|||
'koi8' => "KOI8R", |
|||
'koi8-r' => "KOI8R", |
|||
'koi8-ru' => "KOI8U", |
|||
'koi8-u' => "KOI8U", |
|||
'koi8_r' => "KOI8R", |
|||
'korean' => "EUCKR", |
|||
'ks_c_5601-1987' => "EUCKR", |
|||
'ks_c_5601-1989' => "EUCKR", |
|||
'ksc5601' => "EUCKR", |
|||
'ksc_5601' => "EUCKR", |
|||
'l1' => "Windows1252", |
|||
'l2' => "ISO88592", |
|||
'l3' => "ISO88593", |
|||
'l4' => "ISO88594", |
|||
'l5' => "Windows1254", |
|||
'l6' => "ISO885910", |
|||
'l9' => "ISO885915", |
|||
'latin1' => "Windows1252", |
|||
'latin2' => "ISO88592", |
|||
'latin3' => "ISO88593", |
|||
'latin4' => "ISO88594", |
|||
'latin5' => "Windows1254", |
|||
'latin6' => "ISO885910", |
|||
'logical' => "ISO88598I", |
|||
'mac' => "Macintosh", |
|||
'macintosh' => "Macintosh", |
|||
'ms932' => "ShiftJIS", |
|||
'ms_kanji' => "ShiftJIS", |
|||
'replacement' => "Replacement", |
|||
'shift-jis' => "ShiftJIS", |
|||
'shift_jis' => "ShiftJIS", |
|||
'sjis' => "ShiftJIS", |
|||
'sun_eu_greek' => "ISO88597", |
|||
'tis-620' => "Windows874", |
|||
'unicode-1-1-utf-8' => "UTF8", |
|||
'us-ascii' => "Windows1252", |
|||
'utf-16' => "UTF16LE", |
|||
'utf-16be' => "UTF16BE", |
|||
'utf-16le' => "UTF16LE", |
|||
'utf-8' => "UTF8", |
|||
'utf8' => "UTF8", |
|||
'visual' => "ISO88598", |
|||
'windows-1250' => "Windows1250", |
|||
'windows-1251' => "Windows1251", |
|||
'windows-1252' => "Windows1252", |
|||
'windows-1253' => "Windows1253", |
|||
'windows-1254' => "Windows1254", |
|||
'windows-1255' => "Windows1255", |
|||
'windows-1256' => "Windows1256", |
|||
'windows-1257' => "Windows1257", |
|||
'windows-1258' => "Windows1258", |
|||
'windows-31j' => "ShiftJIS", |
|||
'windows-874' => "Windows874", |
|||
'windows-949' => "EUCKR", |
|||
'x-cp1250' => "Windows1250", |
|||
'x-cp1251' => "Windows1251", |
|||
'x-cp1252' => "Windows1252", |
|||
'x-cp1253' => "Windows1253", |
|||
'x-cp1254' => "Windows1254", |
|||
'x-cp1255' => "Windows1255", |
|||
'x-cp1256' => "Windows1256", |
|||
'x-cp1257' => "Windows1257", |
|||
'x-cp1258' => "Windows1258", |
|||
'x-euc-jp' => "EUCJP", |
|||
'x-gbk' => "GBK", |
|||
'x-mac-cyrillic' => "XMacCyrillic", |
|||
'x-mac-roman' => "Macintosh", |
|||
'x-mac-ukrainian' => "XMacCyrillic", |
|||
'x-sjis' => "ShiftJIS", |
|||
'x-user-defined' => "XUserDefined", |
|||
'x-x-big5' => "Big5", |
|||
'866' => "IBM866", |
|||
]; |
|||
|
|||
public static function match(string $label) { |
|||
$class = self::LABELS[self::normalize($label)]; |
|||
if ($class) { |
|||
return __NAMESPACE__; |
|||
} else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
public static function normalize(string $label): string { |
|||
return strtolower(trim($label, " \t\r\n\x0C")); |
|||
} |
|||
|
|||
public static function name(string $label) { |
|||
$class = self::match($label); |
|||
if ($class) { |
|||
return $class::NAME; |
|||
} else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
public static function list(string $label) { |
|||
$class = self::match($label); |
|||
if ($class) { |
|||
return $class::LABELS; |
|||
} else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
public static function equiv(string $label1, string $label2): bool { |
|||
$class1 = self::match($label1); |
|||
$class2 = self::match($label2); |
|||
if ($class1 && $class2) { |
|||
return $class1 == $class2; |
|||
} else { |
|||
return false; |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,37 @@ |
|||
<?php |
|||
$data = json_decode(file_get_contents("https://encoding.spec.whatwg.org/encodings.json"), true) or die("encoding list could not be retrieved from network."); |
|||
$labels = []; |
|||
$classes = []; |
|||
$longest = 0; |
|||
foreach ($data as $block) { |
|||
foreach ($block['encodings'] as $encoding) { |
|||
foreach($encoding['labels'] as $label) { |
|||
$labels[$label] = $encoding['name']; |
|||
$longest = max(strlen($label), $longest); |
|||
} |
|||
$name = $encoding['name']; |
|||
if ($name == "gb18030") { |
|||
$class = strtoupper($name); |
|||
} else { |
|||
$class = $name; |
|||
$class = strtoupper($class[0]).substr($class, 1); |
|||
$class = str_replace("_", "-", $class); |
|||
$found = 0; |
|||
while(($found = strpos($class, "-", $found + 1)) !== false) { |
|||
$class = substr($class,0, $found).strtoupper($class[$found + 1]).substr($class, $found + 2); |
|||
} |
|||
$class = str_replace("-", "", $class); |
|||
} |
|||
$classes[$name] = $class; |
|||
} |
|||
} |
|||
ksort($labels); |
|||
$out = []; |
|||
foreach($labels as $label => $name) { |
|||
$pad = str_repeat(" ", $longest - strlen($label)); |
|||
$class = $classes[$name]; |
|||
$out[] = " '$label'$pad => \"$class\","; |
|||
} |
|||
array_unshift($out, ' const LABELS = ['); |
|||
$out[] = " ];"; |
|||
echo implode("\n", $out); |
Loading…
Reference in new issue