Compare commits

...

133 Commits

Author SHA1 Message Date
J. King 88dbf8398a Don't use @; fix dynamic properties 1 year ago
J. King 07d26e3f45 Add BOM handling 2 years ago
J. King de037b182c Update changelog 3 years ago
J. King 2e2ed16788 Tests for ISO-2022-JP spanning 3 years ago
J. King 143590cb53 Hopefully less incorrect spanning for ISO-2022-JP 3 years ago
J. King e5aac0b409 Improved spanning for ISO-2022-JP 3 years ago
J. King d9d92e5e77 Test all spanning other than ISO-2022-JP 3 years ago
J. King 81186973f1 Partial tests for ASCII spanning 3 years ago
J. King c64a43992b Prototype span test 3 years ago
J. King 60a5487e46 Fix spanning with single-byte encodings 3 years ago
J. King cc9c937810 Don't rely on PHP 8 signature changes 3 years ago
J. King bf81571ce4 Prototype strspn equivalent 3 years ago
J. King 7327e55a50 Update tooling 3 years ago
J. King 95d573c014 Update changeloog 3 years ago
J. King 2029cd2820 Validate for PHP 8 3 years ago
J. King 5c8116afb8 Prepare release 3 years ago
J. King 4539e56e87 Merge branch 'multi-byte' into master 3 years ago
J. King 87ec30a375 Explicit constant visibility 3 years ago
J. King 600379a4dd Fill out API documentation 3 years ago
J. King c234702cce Speed up encoding; make ISO 2022-JP more consistent 3 years ago
J. King efdac91b30 Optimize ISO 2022-JP encoder 3 years ago
J. King be2134cc71 API re-organization 3 years ago
J. King 464bc4a0a9 Specify PHP 7.1 requirement 3 years ago
J. King cde4100b8a Make correct termination of an ISO 2022-JP output string easier 3 years ago
J. King 808b4128dd Tests for replacement encoding; readme correction 3 years ago
J. King ffa3f431d6 Coverage fixes 3 years ago
J. King d580e93e52 ISO 2022-JP encoder tests and fixes 3 years ago
J. King 10328b6806 Tests for general encoder 4 years ago
J. King db738bba99 Encoder for x-user-defined 4 years ago
J. King a57dde6dbd Style fixes 4 years ago
J. King 4299bf0100 Pre-emptively update changelog 4 years ago
J. King 16f411c767 Prototype ISO 2022-JP encoder 4 years ago
J. King cdd1c0182b Corrected ISO 2022-JP decoder and seeker 4 years ago
J. King 9f7e496bf6 Plug potential memory leak 4 years ago
J. King 86c2b0d628 Fix coverage 4 years ago
J. King 2f3ad29ce6 Prototype ISO 2022-JP decoder 4 years ago
J. King 53b27d1a55 Correct buggy Shift_JIS tests 4 years ago
J. King 96846d061c Complete Shift_JIS testing 4 years ago
J. King d45e0be7c3 Typo 4 years ago
J. King 915aa7ca93 Finally fix Shift_JIS seeker 4 years ago
J. King 4b2a396c64 Prototype for replacement encoding 4 years ago
J. King ef9932ffcb Correct various ShiftJIS errors 4 years ago
J. King d9b8cd8dd1 Fixes for multi-byte index-base encoders 4 years ago
J. King 9e812ffdf8 Second stab at Shift_JIS 4 years ago
J. King b284056644 Encode correct duplicate pointers in EUC-JP 4 years ago
J. King 46b6ac3c44 Complete and correct EUC-JP implementation 4 years ago
J. King 0682e294c8 Add new labels 4 years ago
J. King 7803b8af9e Cleanup 6 years ago
J. King 1200891feb Update changelog 4 years ago
J. King f7246ccc34 Fix gb18030 seeking; tidy up 4 years ago
J. King 14d67ad49f Add fuzz test for backwards seeking 4 years ago
J. King 0eb2a8ac24 Fix bugs in gb18030 and UTF-16 4 years ago
J. King a12a2a0413 Simplify EUC-KR seeking 4 years ago
J. King be034a08e0 Move dirty EOF handling to UTF-16 4 years ago
J. King 1f007b88f1 Fix UTF-8 seeking through truncated sequences 4 years ago
J. King 220cbce9a0 Address performance regression in peeking 4 years ago
J. King 9f08fb7424 Fix backwards seeking for Big5 4 years ago
J. King 6417e8f0be Start overhauling error handling; adjust coverage annotations 4 years ago
J. King b90f194a81 Support PCOV for code coverage 4 years ago
J. King e06096c624 Ensure seekBack is defined 4 years ago
J. King 61a77086bb Make GenericEncoding trait an abstract class 4 years ago
J. King 235fdc4103 Note self-synchronizing encodings for later 4 years ago
J. King a3c16252b8 Correct documentation of StatefulEncoding 4 years ago
J. King f69cd98b4c Make posErr fully generic 4 years ago
J. King 7339176e3e Split error handlers 4 years ago
J. King befd1feb3a Apply stricter house style where possible 4 years ago
J. King b4630657cb Update changelog 4 years ago
J. King 0954518eb5 Fix gb18030 seeking; tidy up 4 years ago
J. King c82aaf8b80 Add fuzz test for backwards seeking 4 years ago
J. King 6d9eba25d3 Fix bugs in gb18030 and UTF-16 4 years ago
J. King e48220250a Simplify EUC-KR seeking 4 years ago
J. King b4c3f3c86d Move dirty EOF handling to UTF-16 4 years ago
J. King dc11f98c4c Fix UTF-8 seeking through truncated sequences 4 years ago
J. King 93cbbc24b7 Address performance regression in peeking 4 years ago
J. King f2c3488ec0 Fix backwards seeking for Big5 4 years ago
J. King 33059a2906 Start overhauling error handling; adjust coverage annotations 4 years ago
J. King ba7daf1075 Support PCOV for code coverage 4 years ago
J. King b9ea5f8b9b Ensure seekBack is defined 4 years ago
J. King 9421a3aca2 Make GenericEncoding trait an abstract class 4 years ago
J. King 7ec8f148ff Note self-synchronizing encodings for later 4 years ago
J. King 280e97e444 Correct documentation of StatefulEncoding 4 years ago
J. King 87e34b3074 Make posErr fully generic 4 years ago
J. King c115e3857a Split error handlers 4 years ago
J. King a7142284f5 Apply stricter house style where possible 4 years ago
J. King 85f06186f2 Partial Shift_JIS implementation 4 years ago
J. King f49d632642 Merge branch 'master' into multi-byte 4 years ago
J. King fc44bb1415 Generalize handling of dirty EOF 4 years ago
J. King c4a2ae1714 Tests for new features 4 years ago
J. King 19a28edebd Restore changelog 4 years ago
J. King f9e3d795a7 Add label matcher 4 years ago
J. King 200a310f72 Optionally allow surrogates 4 years ago
J. King 2e47fde774 Upgrade to PHPUnit 8 4 years ago
J. King eae901a9e2 Add new methods 4 years ago
J. King ba35252b80 Tooling update 4 years ago
J. King 106167ab39 Cleanup 6 years ago
J. King 74d8e07a65 Fully corrected WPT test data for EUC-JP 6 years ago
J. King 8dfb1ba984 Initial implementation of EUC-JP 6 years ago
J. King 58328b7524 Changelog for 0.4.0 6 years ago
J. King 2810ed9b2a Full tests for EUC-KR 6 years ago
J. King 929d55cffe Encode whitespace code points correctly in browser tests 6 years ago
J. King fb70543c0f Change gb18030 loop to be consistent with Big5 and EUC-KR 6 years ago
J. King 1121f32e96 Minor Big5 corrections 6 years ago
J. King c4cdbdd5c8 Initial implementation of EUC-KR 6 years ago
J. King c2a8b1ba52 Style fixes 6 years ago
J. King bfc6c677c5 Complete Big5 tests, with numerous fixes 6 years ago
J. King 5217a6c0bc Tidying 6 years ago
J. King 32d7fc47b0 Fix HTML test generator; clean up 6 years ago
J. King 55cbc915c3 Refactor HTML-based test generators 6 years ago
J. King 5967d148c0 Consolidate index generation into a single, better script 6 years ago
J. King 63fccc3c3a Test UTF-16 EOF handling better 6 years ago
J. King 3b8db5822a Add all available tests to platform test generator 6 years ago
J. King 4a091610e9 Initial implementation of Big5 encoding 6 years ago
J. King fdbeecdb17 Add name and label to x-user-defined 6 years ago
J. King d5327a3b83 Implement x-user-defined decoder 6 years ago
J. King dd9bed2e84 Implement UTF-16 6 years ago
J. King a0bf8a9b05 Don't check for dirty EOF on every iteration 6 years ago
J. King e683167905 Style fixes 6 years ago
J. King 1449fae908 Refactor UTF-8 seeking 6 years ago
J. King e4b6acb24a Refactor tests 6 years ago
J. King 61993bb900 Fix typo... 6 years ago
J. King 647a2a51a4 Documentation update 6 years ago
J. King 4c686aa8a1 Complete battery of tests for gb18030 6 years ago
J. King 1b9889914a Fix numerous bugs with gb18030 6 years ago
J. King 467c565e8c Implement gb18030 seeking 6 years ago
J. King 40d0054bd1 Implement gb18030 and GBK encoders 6 years ago
J. King 766643aa37 Common infrstructure for gb18030 and GBK 6 years ago
J. King d6747532cd Implement gb18030 decoder 6 years ago
J. King 3a19b93aab Move nextChar to generic class 6 years ago
J. King 58444b9545 Documentation update 6 years ago
J. King 3ee653307c Implement all other single-byte encodings 6 years ago
J. King 269ecf4a96 Style fixes 6 years ago
J. King 7de6d7a6fc Implement ISO-8859-6 single-byte encoding 6 years ago
J. King 8c97b42303 Define interfaces for encodings 6 years ago
  1. 11
      .gitattributes
  2. 1
      .gitignore
  3. 66
      .php_cs.dist
  4. 107
      CHANGELOG
  5. 2
      README.md
  6. 113
      RoboFile.php
  7. 14
      composer.json
  8. 51
      composer.lock
  9. 98
      lib/Encoding.php
  10. 250
      lib/Encoding/AbstractEncoding.php
  11. 177
      lib/Encoding/Big5.php
  12. 20
      lib/Encoding/Coder.php
  13. 105
      lib/Encoding/Decoder.php
  14. 147
      lib/Encoding/EUCJP.php
  15. 137
      lib/Encoding/EUCKR.php
  16. 322
      lib/Encoding/Encoder.php
  17. 13
      lib/Encoding/GB18030.php
  18. 211
      lib/Encoding/GBCommon.php
  19. 23
      lib/Encoding/GBK.php
  20. 21
      lib/Encoding/IBM866.php
  21. 372
      lib/Encoding/ISO2022JP.php
  22. 24
      lib/Encoding/ISO885910.php
  23. 20
      lib/Encoding/ISO885913.php
  24. 20
      lib/Encoding/ISO885914.php
  25. 23
      lib/Encoding/ISO885915.php
  26. 18
      lib/Encoding/ISO885916.php
  27. 26
      lib/Encoding/ISO88592.php
  28. 26
      lib/Encoding/ISO88593.php
  29. 26
      lib/Encoding/ISO88594.php
  30. 25
      lib/Encoding/ISO88595.php
  31. 31
      lib/Encoding/ISO88596.php
  32. 29
      lib/Encoding/ISO88597.php
  33. 28
      lib/Encoding/ISO88598.php
  34. 20
      lib/Encoding/ISO88598I.php
  35. 22
      lib/Encoding/KOI8R.php
  36. 19
      lib/Encoding/KOI8U.php
  37. 21
      lib/Encoding/Macintosh.php
  38. 17
      lib/Encoding/ModalCoder.php
  39. 133
      lib/Encoding/Replacement.php
  40. 161
      lib/Encoding/ShiftJIS.php
  41. 89
      lib/Encoding/SingleByteEncoding.php
  42. 180
      lib/Encoding/UTF16.php
  43. 16
      lib/Encoding/UTF16BE.php
  44. 21
      lib/Encoding/UTF16LE.php
  45. 280
      lib/Encoding/UTF8.php
  46. 20
      lib/Encoding/Windows1250.php
  47. 20
      lib/Encoding/Windows1251.php
  48. 34
      lib/Encoding/Windows1252.php
  49. 20
      lib/Encoding/Windows1253.php
  50. 29
      lib/Encoding/Windows1254.php
  51. 20
      lib/Encoding/Windows1255.php
  52. 20
      lib/Encoding/Windows1256.php
  53. 20
      lib/Encoding/Windows1257.php
  54. 20
      lib/Encoding/Windows1258.php
  55. 23
      lib/Encoding/Windows874.php
  56. 19
      lib/Encoding/XMacCyrillic.php
  57. 113
      lib/Encoding/XUserDefined.php
  58. 16
      robo
  59. 6
      tests/bootstrap.php
  60. 233
      tests/cases/Encoding/TestBig5.php
  61. 243
      tests/cases/Encoding/TestEUCJP.php
  62. 222
      tests/cases/Encoding/TestEUCKR.php
  63. 315
      tests/cases/Encoding/TestGB18030.php
  64. 279
      tests/cases/Encoding/TestISO2022JP.php
  65. 221
      tests/cases/Encoding/TestReplacement.php
  66. 235
      tests/cases/Encoding/TestShiftJIS.php
  67. 339
      tests/cases/Encoding/TestSingleByte.php
  68. 49
      tests/cases/Encoding/TestUTF16BE.php
  69. 170
      tests/cases/Encoding/TestUTF16LE.php
  70. 509
      tests/cases/Encoding/TestUTF8.php
  71. 202
      tests/cases/Encoding/TestXUserDefined.php
  72. 92
      tests/cases/TestEncoding.php
  73. 48
      tests/lib/CoderDecoderTest.php
  74. 404
      tests/lib/DecoderTest.php
  75. 15
      tests/phpunit.xml
  76. 256
      tools/mkindex.php
  77. 40
      tools/mklabels.php
  78. 72
      tools/mktest.php
  79. 57
      tools/test-big5.html
  80. 57
      tools/test-eucjp.html
  81. 38
      tools/test-euckr.html
  82. 77
      tools/test-gb18030.html
  83. 17
      tools/test-gbk.html
  84. 46
      tools/test-iso2022jp.html
  85. 42
      tools/test-shiftjis.html
  86. 20
      tools/test-utf16.html
  87. 70
      tools/test-utf8.html
  88. 177
      tools/test.js
  89. 1970
      vendor-bin/csfixer/composer.lock
  90. 2
      vendor-bin/phpunit/composer.json
  91. 1332
      vendor-bin/phpunit/composer.lock
  92. 2
      vendor-bin/robo/composer.json
  93. 1844
      vendor-bin/robo/composer.lock

11
.gitattributes

@ -5,3 +5,14 @@
*.bat eol=crlf
*.cmd eol=crlf
.gitignore -eol
/tests export-ignore
/perf export-ignore
/tools export-ignore
/vendor-bin export-ignore
/.gitattributes export-ignore
/.gitignore export-ignore
/.php_cs.dist export-ignore
/robo export-ignore
/robo.bat export-ignore
/RoboFile.php export-ignore

1
.gitignore

@ -1,5 +1,6 @@
vendor/
tests/coverage/
/tests/.phpunit.result.cache
perf/docs/
.php_cs.cache

66
.php_cs.dist

@ -1,24 +1,82 @@
<?php
declare(strict_types=1);
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\UTF8;
const BASE = __DIR__.DIRECTORY_SEPARATOR;
ini_set("memory_limit", "-1");
$paths = [
__FILE__,
BASE."RoboFile.php",
BASE."lib",
BASE."perf",
BASE."tests",
BASE."tools",
];
$rules = [
// house rules where PSR series is silent
'align_multiline_comment' => ['comment_type' => "phpdocs_only"],
'array_syntax' => ['syntax' => "short"],
'binary_operator_spaces' => [
//'default' => "single_space",
//'operators' => ['=>' => "align_single_space"],
],
'cast_spaces' => ['space' => "single"],
'concat_space' => ['spacing' => "none"],
//'list_syntax' => ['syntax' => "short"],
'magic_constant_casing' => true,
'magic_method_casing' => true,
'modernize_types_casting' => true,
'native_function_casing' => true,
'native_function_type_declaration_casing' => true,
'no_binary_string' => true,
'no_blank_lines_after_phpdoc' => true,
'no_empty_comment' => true,
'no_empty_phpdoc' => true,
'no_extra_blank_lines' => true, // this could probably use more configuration
'no_mixed_echo_print' => ['use' => "echo"],
'no_short_bool_cast' => true,
'no_trailing_comma_in_singleline_array' => true,
'no_unneeded_control_parentheses' => true,
'no_unneeded_curly_braces' => true,
'no_unused_imports' => true,
'no_whitespace_before_comma_in_array' => true,
'normalize_index_brace' => true,
'object_operator_without_whitespace' => true,
'pow_to_exponentiation' => true,
'set_type_to_cast' => true,
'standardize_not_equals' => true,
'trailing_comma_in_multiline_array' => true,
'unary_operator_spaces' => true,
'yoda_style' => false,
// PSR standard to apply
'@PSR2' => true,
'braces' => ['position_after_functions_and_oop_constructs' => "same"],
'function_declaration' => ['closure_function_spacing' => "none"],
];
// PSR-12 rules; php-cs-fixer does not yet support PSR-12 natively
'compact_nullable_typehint' => true,
'declare_equal_normalize' => ['space' => "none"],
'function_typehint_space' => true,
'lowercase_cast' => true,
'lowercase_static_reference' => true,
'no_alternative_syntax' => true,
'no_empty_statement' => true,
'no_leading_import_slash' => true,
'no_leading_namespace_whitespace' => true,
'no_whitespace_in_blank_line' => true,
'return_type_declaration' => ['space_before' => "none"],
'single_trait_insert_per_statement' => true,
'short_scalar_cast' => true,
//'visibility_required' => ['elements' => ["const", "property", "method"]],
// house exceptions to PSR rules
'braces' => ['position_after_functions_and_oop_constructs' => "same"],
'function_declaration' => ['closure_function_spacing' => "none"],
'new_with_braces' => false, // no option to specify absence of braces
];
$finder = \PhpCsFixer\Finder::create();
foreach ($paths as $path) {
@ -28,4 +86,4 @@ foreach ($paths as $path) {
$finder = $finder->in($path);
}
}
return \PhpCsFixer\Config::create()->setRules($rules)->setFinder($finder);
return \PhpCsFixer\Config::create()->setRiskyAllowed(true)->setRules($rules)->setFinder($finder);

107
CHANGELOG

@ -0,0 +1,107 @@
Version 0.9.2 (2023-01-25)
==========================
Bug fixes
- Define properties which were accidentally created dynamically
- Avoid use of @ operator to play nice with custom error handlers
Version 0.9.1 (2021-10-24)
==========================
Bug fixes
- Correctly skip byte order marks
- Detect byte order marks in \MensBeam\Intl\Encoding::createEncoder()
Version 0.9.0 (2021-03-25)
==========================
New features:
- Add asciiSpan() and asciiSpanNot() methods to decoders
Version 0.8.1 (2021-03-06)
==========================
Changes:
- Support PHP 8
Version 0.8.0 (2020-10-27)
==========================
New features:
- Implementation of EUC-JP encoding
- Implementation of Shift_JIS encoding
- Implementation of ISO-2022-JP encoding
- Implementation of replacement encoding
- Added missing encoder for x-user-defined encoding
- Added general-purpose \MensBeam\Intl\Encoding\Encoder class
also accessible via \MensBeam\Intl\Encoding::createEncoder()
static method
Bug fixes:
- Fixed errors in Big5, gb18030, and GBK encoders
- Plugged potential memory leak when using the rewind() method of
Big5, gb18030, GBK, and EUC-KR decoders
Changes:
- Added new labels for UTF-8 and UTF-16
- Improved performance of Big5, gb18030, GBK, and EUC-KR encoders
- Corrected requirement of PHP 7.1
Version 0.7.1 (2020-10-05)
==========================
Bug fixes:
- Fixed decoding of invalid GBK characters yielding null
- Corrected backwards seeking of UTF-8 strings with truncated sequences
- Corrected backwards seeking of Big5, gb18030, GBK, and EUC-KR strings
with invalid data
Version 0.7.0 (2019-12-20)
==========================
New features:
- Added \MensBeam\Intl\Encoding abstract class with createDecoder() and
matchLabel() static methods
Version 0.6.0 (2019-12-18)
==========================
New features:
- Added $allowSurrogates parameter to Encoding constructor
- Added posErr public instance property to Encoding
Version 0.5.0 (2019-12-13)
==========================
Breaking changes:
- Renamed Encoding::len() to Encoding::lenChar()
New features:
- Added Encoding::lenByte() method
- Added Encoding::eof() method
Version 0.4.0 (2018-09-15)
==========================
New features:
- Implemention of UTF-16 encoding
- Implemention of Big5 encoding
- Implemention of EUC-KR encoding
- Implemention of x-user-defined encoding
Version 0.3.0 (2018-08-29)
==========================
New features:
- Implemention of gb18030 and GBK encodings
Version 0.2.0 (2018-08-11)
==========================
New features:
- Implementation of all single-byte WHATWG encodings
Version 0.1.0 (2018-08-10)
==========================
Initial release

2
README.md

@ -2,7 +2,7 @@
While PHP's [internationalization extension][PHP_INTL] offers excellent and extensive functionality for dealing with human languages, character encodings, and various related things, it is not always available. Moreover, its character decoder does not yield the same results as [WHATWG's Encoding standard][ENCODING], making it unsuitable for implementing parsers for URLs or HTML. The more widely used [multi-byte string extension][PHP_MBSTRING] not only suffers the same problems, but is also very slow.
Included here is a WHATWG-compatible UTF-8 string decoder which is reasonably performant while requiring no external dependencies or PHP extensions. In time it will be extended to cover the entire suite of WHATWG character encodings, and may also provide other character-centric internationalization functionality.
Included here is a complete suite of WHATWG-compatible seekable string decoders which are reasonably performant while requiring no external dependencies or PHP extensions. Where applicable, code point encoders are also included. In time it may also provide other character-centric internationalization functionality.
[PHP_INTL]: https://php.net/manual/en/book.intl.php
[PHP_MBSTRING]: https://php.net/manual/en/book.mbstring.php

113
RoboFile.php

@ -1,13 +1,23 @@
<?php
declare(strict_types=1);
use Robo\Result;
class RoboFile extends \Robo\Tasks {
const BASE = __DIR__.\DIRECTORY_SEPARATOR;
const BASE_TEST = self::BASE."tests".\DIRECTORY_SEPARATOR;
const BASE = __DIR__.\DIRECTORY_SEPARATOR;
const BASE_TEST = BASE."tests".\DIRECTORY_SEPARATOR;
define("IS_WIN", defined("PHP_WINDOWS_VERSION_MAJOR"));
define("IS_MAC", php_uname("s") === "Darwin");
/**
* Runs the typical test suite
function norm(string $path): string {
$out = realpath($path);
if (!$out) {
$out = str_replace(["/", "\\"], \DIRECTORY_SEPARATOR, $path);
}
return $out;
}
class RoboFile extends \Robo\Tasks {
/** Runs the typical test suite
*
* Arguments passed to the task are passed on to PHPUnit. Thus one may, for
* example, run the following command and get the expected results:
@ -15,28 +25,27 @@ class RoboFile extends \Robo\Tasks {
* ./robo test --testsuite TTRSS --exclude-group slow --testdox
*
* Please see the PHPUnit documentation for available options.
*/
*/
public function test(array $args): Result {
return $this->runTests("php", "typical", $args);
return $this->runTests(escapeshellarg(\PHP_BINARY), "typical", $args);
}
/**
* Runs the full test suite
/** Runs the full test suite
*
* This includes pedantic tests which may help to identify problems.
* See help for the "test" task for more details.
*/
*/
public function testFull(array $args): Result {
return $this->runTests("php", "full", $args);
return $this->runTests(escapeshellarg(\PHP_BINARY), "full", $args);
}
/**
* Runs a quick subset of the test suite
*
* See help for the "test" task for more details.
*/
*/
public function testQuick(array $args): Result {
return $this->runTests("php", "quick", $args);
return $this->runTests(escapeshellarg(\PHP_BINARY), "quick", $args);
}
/** Produces a code coverage report
@ -48,45 +57,78 @@ class RoboFile extends \Robo\Tasks {
* Robo first tries to use phpdbg and will fall back to Xdebug if available.
* Because Xdebug slows down non-coverage tasks, however, phpdbg is highly
* recommended if debugging facilities are not otherwise needed.
*/
*/
public function coverage(array $args): Result {
// run tests with code coverage reporting enabled
$exec = $this->findCoverageEngine();
return $this->runTests($exec, "typical", array_merge(["--coverage-html", self::BASE_TEST."coverage"], $args));
return $this->runTests($exec, "coverage", array_merge(["--coverage-html", BASE_TEST."coverage"], $args));
}
/** Runs a performance evaluation.
/** Produces a code coverage report, with redundant tests
*
* The performance of the library's basic functionality is tested against
* the IntlCodePointBreakIterator class
*/
public function perf(array $args): Result {
$execpath = realpath(self::BASE."perf/perf.php");
return $this->taskExec("php")->arg($execpath)->args($args)->run();
* Depending on the environment, some tests that normally provide
* coverage may be skipped, while working alternatives are normally
* suppressed for reasons of time. This coverage report will try to
* run all tests which may cover code.
*
* See also help for the "coverage" task for more details.
*/
public function coverageFull(array $args): Result {
// run tests with code coverage reporting enabled
$exec = $this->findCoverageEngine();
return $this->runTests($exec, "typical", array_merge(["--coverage-html", BASE_TEST."coverage"], $args));
}
/** Runs the coding standards fixer */
public function clean($opts = ['demo|d' => false]): Result {
$t = $this->taskExec(realpath(self::BASE."vendor/bin/php-cs-fixer"));
$t->arg("fix");
$t = $this->taskExec(realpath(BASE."vendor/bin/php-cs-fixer"));
$t->arg("fix")->arg("--allow-risky=yes");
if ($opts['demo']) {
$t->args("--dry-run", "--diff")->option("--diff-format", "udiff");
}
return $t->run();
}
/** Runs a performance evaluation.
*
* The performance of the library's basic functionality is tested against
* the IntlCodePointBreakIterator class
*/
public function perf(array $args): Result {
$execpath = realpath(norm(BASE."perf/perf.php"));
return $this->taskExec("php")->arg($execpath)->args($args)->run();
}
protected function findCoverageEngine(): string {
$null = null;
$code = 0;
exec("phpdbg --version", $null, $code);
if (!$code) {
return "phpdbg -qrr";
$dir = rtrim(ini_get("extension_dir"), "/").\DIRECTORY_SEPARATOR;
$ext = IS_WIN ? "dll" : (IS_MAC ? "dylib" : "so");
$php = escapeshellarg(\PHP_BINARY);
$code = escapeshellarg(BASE."lib");
if (extension_loaded("pcov")) {
return "$php -d pcov.enabled=1 -d pcov.directory=$code";
} elseif (extension_loaded("xdebug")) {
return $php;
} elseif (file_exists($dir."pcov.$ext")) {
return "$php -d extension=pcov.$ext -d pcov.enabled=1 -d pcov.directory=$code";
} elseif (file_exists($dir."pcov.$ext")) {
return "$php -d zend_extension=xdebug.$ext";
} else {
return "php";
if (IS_WIN) {
$dbg = dirname(\PHP_BINARY)."\\phpdbg.exe";
$dbg = file_exists($dbg) ? $dbg : "";
} else {
$dbg = trim(`which phpdbg 2>/dev/null`);
}
if ($dbg) {
return escapeshellarg($dbg)." -qrr";
} else {
return $php;
}
}
}
protected function runTests(string $executor, string $set, array $args) : Result {
protected function runTests(string $executor, string $set, array $args): Result {
error_reporting(0);
switch ($set) {
case "typical":
$set = ["--exclude-group", "optional"];
@ -94,14 +136,17 @@ class RoboFile extends \Robo\Tasks {
case "quick":
$set = ["--exclude-group", "optional,slow"];
break;
case "coverage":
$set = ["--exclude-group", "optional,coverageOptional"];
break;
case "full":
$set = [];
break;
default:
throw new \Exception;
}
$execpath = realpath(self::BASE."vendor-bin/phpunit/vendor/phpunit/phpunit/phpunit");
$confpath = realpath(self::BASE_TEST."phpunit.xml");
return $this->taskExec($executor)->arg($execpath)->option("-c", $confpath)->args(array_merge($set, $args))->run();
$execpath = norm(BASE."vendor-bin/phpunit/vendor/phpunit/phpunit/phpunit");
$confpath = realpath(BASE_TEST."phpunit.dist.xml") ?: norm(BASE_TEST."phpunit.xml");
return $this->taskExec($executor)->option("-d", "zend.assertions=1")->arg($execpath)->option("-c", $confpath)->args(array_merge($set, $args))->run();
}
}

14
composer.json

@ -2,7 +2,7 @@
"name": "mensbeam/intl",
"type": "library",
"description": "A set of dependency-free basic internationalization tools",
"keywords": ["internationalization", "intl", "encoding", "unicode", "charset", "utf-8", "utf8"],
"keywords": ["whatwg", "internationalization", "intl", "encoding", "unicode", "charset", "utf-8", "utf8"],
"license": "MIT",
"authors": [
{
@ -13,7 +13,7 @@
],
"require": {
"php": "^7.0"
"php": ">=7.1"
},
"require-dev": {
"ext-intl": "*",
@ -27,5 +27,15 @@
"psr-4": {
"MensBeam\\Intl\\": "lib/"
}
},
"autoload-dev": {
"psr-4": {
"MensBeam\\Intl\\Test\\": "tests/lib/"
}
},
"config": {
"allow-plugins": {
"bamarni/composer-bin-plugin": true
}
}
}

51
composer.lock

@ -1,39 +1,44 @@
{
"_readme": [
"This file locks the dependencies of your project to a known state",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "ba27aa72527421b04188393db2c8510b",
"content-hash": "73ac4ceead4f67bdc956746ffd9e7887",
"packages": [],
"packages-dev": [
{
"name": "bamarni/composer-bin-plugin",
"version": "v1.2.0",
"version": "1.8.2",
"source": {
"type": "git",
"url": "https://github.com/bamarni/composer-bin-plugin.git",
"reference": "62fef740245a85f00665e81ea8f0aa0b72afe6e7"
"reference": "92fd7b1e6e9cdae19b0d57369d8ad31a37b6a880"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/62fef740245a85f00665e81ea8f0aa0b72afe6e7",
"reference": "62fef740245a85f00665e81ea8f0aa0b72afe6e7",
"url": "https://api.github.com/repos/bamarni/composer-bin-plugin/zipball/92fd7b1e6e9cdae19b0d57369d8ad31a37b6a880",
"reference": "92fd7b1e6e9cdae19b0d57369d8ad31a37b6a880",
"shasum": ""
},
"require": {
"composer-plugin-api": "^1.0"
"composer-plugin-api": "^2.0",
"php": "^7.2.5 || ^8.0"
},
"require-dev": {
"composer/composer": "dev-master",
"symfony/console": "^2.5 || ^3.0"
"composer/composer": "^2.0",
"ext-json": "*",
"phpstan/extension-installer": "^1.1",
"phpstan/phpstan": "^1.8",
"phpstan/phpstan-phpunit": "^1.1",
"phpunit/phpunit": "^8.5 || ^9.5",
"symfony/console": "^2.8.52 || ^3.4.35 || ^4.4 || ^5.0 || ^6.0",
"symfony/finder": "^2.8.52 || ^3.4.35 || ^4.4 || ^5.0 || ^6.0",
"symfony/process": "^2.8.52 || ^3.4.35 || ^4.4 || ^5.0 || ^6.0"
},
"type": "composer-plugin",
"extra": {
"class": "Bamarni\\Composer\\Bin\\Plugin",
"branch-alias": {
"dev-master": "1.1-dev"
}
"class": "Bamarni\\Composer\\Bin\\BamarniBinPlugin"
},
"autoload": {
"psr-4": {
@ -44,7 +49,20 @@
"license": [
"MIT"
],
"time": "2017-09-11T13:13:58+00:00"
"description": "No conflicts for your bin dependencies",
"keywords": [
"composer",
"conflict",
"dependency",
"executable",
"isolation",
"tool"
],
"support": {
"issues": "https://github.com/bamarni/composer-bin-plugin/issues",
"source": "https://github.com/bamarni/composer-bin-plugin/tree/1.8.2"
},
"time": "2022-10-31T08:38:03+00:00"
}
],
"aliases": [],
@ -53,9 +71,10 @@
"prefer-stable": false,
"prefer-lowest": false,
"platform": {
"php": "^7.0"
"php": ">=7.1"
},
"platform-dev": {
"ext-intl": "*"
}
},
"plugin-api-version": "2.3.0"
}

98
lib/Encoding.php

File diff suppressed because one or more lines are too long

250
lib/Encoding/AbstractEncoding.php

@ -0,0 +1,250 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
abstract class AbstractEncoding implements Decoder {
protected const MODE_NULL = 0;
protected const MODE_REPLACE = 1;
protected const MODE_FATAL = 2;
protected const HIGH_BYTES = "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
/** @var string $string The string being decoded */
protected $string;
/** @var int $posByte The current byte position in the string */
protected $posByte = 0;
/** @var int $posChar The current character (code point) position in the string */
protected $posChar = 0;
/** @var int $lenByte The length of the string, in bytes */
protected $lenByte = null;
/** @var int $lenChar The length of the string in characters, if known */
protected $lenChar = null;
/** @var array $errStack A list of error data to aid in backwards seeking; the most recent error is kept off the stack */
protected $errStack = [];
/** @var int $errMark The byte position marking the most recent error. The one or more bytes previous to this position constitute an invalid character */
protected $errMark = -1;
/** @var int $errSync The byte position to which to move to skip over the most recent erroneous character */
protected $errSync = -2;
/** @var int $errMode The selected error mode (fatal or replace) */
protected $errMode = self::MODE_REPLACE;
/** @var bool $allowSurrogates Whether surrogates in encodings other than UTF-16 should be passed through */
protected $allowSurrogates = false;
/** @var bool $selfSynchronizing Whether the concrete class represents a self-synchronizing decoder. Such decoders do not use the error stack */
protected $selfSynchronizing = false;
/** @var string[] $stateProps The list of properties which constitutee state which must be saved when peeking/seeking; some encodings may add to this last for their own purposes */
protected $stateProps = ["posChar", "posByte", "posErr"];
public $posErr = 0;
/** Seeks backwards through the string the specified number of characters.
* If the beginning of the string is reached before the requested number
* of characters has been skipped over, the number of remaining characters
* is returned.
*/
abstract protected function seekBack(int $distance): int;
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false) {
$this->string = $string;
$this->lenByte = strlen($string);
$this->errMode = $fatal ? self::MODE_FATAL : self::MODE_REPLACE;
$this->allowSurrogates = $allowSurrogates;
}
public function posByte(): int {
return $this->posByte;
}
public function posChar(): int {
return $this->posChar;
}
public function rewind(): void {
$this->posByte = 0;
$this->posChar = 0;
$this->errMark = -1;
$this->errSync = -2;
$this->errStack = [];
}
public function nextChar(): string {
// get the byte at the current position
$b = $this->string[$this->posByte] ?? "";
if ($b === "") {
// if the byte is end of input, simply return it
return "";
} elseif (ord($b) < 0x80) {
// if the byte is an ASCII character, simply return it
$this->posChar++;
$this->posByte++;
return $b;
} else {
// otherwise return the serialization of the code point at the current position
return UTF8::encode($this->nextCode());
}
}
public function seek(int $distance): int {
if ($distance > 0) {
do {
$p = $this->nextCode();
} while ($p !== false && --$distance);
return $distance;
} elseif ($distance < 0) {
$distance = abs($distance);
if (!$this->posChar) {
return $distance;
}
$mode = $this->errMode;
$this->errMode = self::MODE_NULL;
$out = $this->seekBack($distance);
$this->errMode = $mode;
return $out;
} else {
return 0;
}
}
public function peekChar(int $num = 1): string {
$out = "";
$state = $this->stateSave();
try {
while ($num-- > 0 && ($b = $this->nextChar()) !== "") {
$out .= $b;
}
} finally {
$this->stateApply($state);
}
return $out;
}
public function peekCode(int $num = 1): array {
$out = [];
$state = $this->stateSave();
try {
while ($num-- > 0 && ($b = $this->nextCode()) !== false) {
$out[] = $b;
}
} finally {
$this->stateApply($state);
}
return $out;
}
public function lenByte(): int {
return $this->lenByte;
}
public function lenChar(): int {
return $this->lenChar ?? (function() {
$state = $this->stateSave();
while ($this->nextCode() !== false);
$this->lenChar = $this->posChar;
$this->stateApply($state);
return $this->lenChar;
})();
}
public function eof(): bool {
return $this->posByte >= $this->lenByte;
}
public function chars(): \Generator {
while (($c = $this->nextChar()) !== "") {
yield ($this->posChar - 1) => $c;
}
}
public function codes(): \Generator {
while (($c = $this->nextCode()) !== false) {
yield ($this->posChar - 1) => $c;
}
}
public function asciiSpan(string $mask, int $length = null): string {
$mask = preg_replace('/[\x80-\xFF]/s', "", $mask);
if ($length !== null) {
$len = strspn($this->string, $mask, $this->posByte, $length);
} else {
$len = strspn($this->string, $mask, $this->posByte);
}
if ($len) {
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
} else {
return "";
}
}
public function asciiSpanNot(string $mask, int $length = null): string {
$mask .= self::HIGH_BYTES;
if ($length !== null) {
$len = strcspn($this->string, $mask, $this->posByte, $length);
} else {
$len = strcspn($this->string, $mask, $this->posByte);
}
if ($len) {
$out = substr($this->string, $this->posByte, $len);
$this->posByte += $len;
$this->posChar += $len;
return $out;
} else {
return "";
}
}
/** Returns a copy of the decoder's state to keep in memory */
protected function stateSave(): array {
$out = ['errCount' => sizeof($this->errStack)];
foreach ($this->stateProps as $prop) {
$out[$prop] = $this->$prop;
}
return $out;
}
/** Sets the decoder's state to the values specified */
protected function stateApply(array $state): void {
while (sizeof($this->errStack) > $state['errCount']) {
list($this->errMark, $this->errSync) = array_pop($this->errStack);
}
unset($state['errCount']);
foreach ($state as $key => $value) {
$this->$key = $value;
}
}
/** Handles decoding errors */
protected function errDec(int $mode, int $charOffset, int $byteOffset): ?int {
if ($mode !== self::MODE_NULL) {
// expose the error to the user; this disambiguates a literal replacement character
$this->posErr = $this->posChar;
// unless the decoder is self-synchronizing, mark the error so that it can be skipped when seeking back
if (!$this->selfSynchronizing) {
$this->errStack[] = [$this->errMark, $this->errSync];
$this->errMark = $this->posByte;
$this->errSync = $byteOffset;
}
if ($mode === self::MODE_FATAL) {
throw new DecoderException("Invalid code sequence at character offset $charOffset (byte offset $byteOffset)", self::E_INVALID_BYTE);
} else {
return 0xFFFD;
}
}
return null;
}
/** Handles encoding errors */
protected static function errEnc(bool $htmlMode, $data = null): string {
if ($htmlMode) {
return "&#".(string) $data.";";
} else {
// fatal replacement mode for encoders; not applicable to Unicode transformation formats
throw new EncoderException("Code point $data not available in target encoding", Coder::E_UNAVAILABLE_CODE_POINT);
}
}
}

177
lib/Encoding/Big5.php

File diff suppressed because one or more lines are too long

20
lib/Encoding/Coder.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
interface Coder {
public const E_INVALID_CODE_POINT = 1;
public const E_UNAVAILABLE_CODE_POINT = 3;
public const E_UNAVAILABLE_ENCODER = 4;
/** Returns the encoding of $codePoint as a byte string
*
* @param int $codePoint The Unicode code point to encode. If less than 0 or greater than 1114111, an exception is thrown
* @param bool $fatal Whether an exception will be thrown if the code point cannot be encoded into a character; if false HTML character references will be substituted
*/
public static function encode(int $codePoint, bool $fatal = true): string;
}

105
lib/Encoding/Decoder.php

@ -0,0 +1,105 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
interface Decoder {
public const E_INVALID_BYTE = 2;
/** Constructs a new decoder
*
* @param string $string The string to decode
* @param bool $fatal If true, throw enceptions when encountering invalid input. If false, substitute U+FFFD REPLACEMENT CHARACTER instead
* @param bool $allowSurrogates If true, treats surrogate characters as valid input; this only affects UTF-8 and UTF-16 encodings
*/
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false);
/** Returns the current byte position of the decoder */
public function posByte(): int;
/** Returns the current character position of the decoder */
public function posChar(): int;
/** Retrieve the next character in the string, in UTF-8 encoding
*
* The returned character may be a replacement character, or the empty string if the end of the string has been reached
*/
public function nextChar(): string;
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|false
*/
public function nextCode();
/** Advance $distance characters through the string
*
* If the end (or beginning) of the string was reached before the end of the operation, the remaining number of requested characters is returned
*
* @param int $distance The number of characters to advance. If negative, the operation will seek back toward the beginning of the string
*/
public function seek(int $distance): int;
/** Seeks to the start of the string
*
* This is usually faster than using the seek method for the same purpose
*/
public function rewind(): void;
/** Retrieves the next $num characters (in UTF-8 encoding) from the string without advancing the character pointer
*
* @param int $num The number of characters to retrieve
*/
public function peekChar(int $num = 1): string;
/** Retrieves the next $num code points from the string, without advancing the character pointer
*
* @param int $num The number of code points to retrieve
*/
public function peekCode(int $num = 1): array;
/** Calculates the length of the string in bytes */
public function lenByte(): int;
/** Calculates the length of the string in code points
*
* Note that this may involve processing to the end of the string
*/
public function lenChar(): int;
/** Returns whether the character pointer is at the end of the string */
public function eof(): bool;
/** Generates an iterator which steps through each character in the string */
public function chars(): \Generator;
/** Generates an iterator which steps through each code point in the string */
public function codes(): \Generator;
/** Fast-forwards through a span of ASCII characters matching the supplied mask, returning any consumed characters
*
* The mask must consist only of ASCII characters.
*
* Note that if the empty string is returned, this does not necessarily signal the end of the string
*
* @param string $mask The set of ASCII characters to match
* @param int $length The maximum number oof characters to advance by
*/
public function asciiSpan(string $mask, int $length = null): string;
/** Fast-forwards through a span of ASCII characters not matching the supplied mask, returning any consumed characters
*
* The mask must consist only of ASCII characters.
*
* Note that if the empty string is returned, this does not necessarily signal the end of the string
*
* @param string $mask The set of ASCII characters to not match
* @param int $length The maximum number oof characters to advance by
*/
public function asciiSpanNot(string $mask, int $length = null): string;
}

147
lib/Encoding/EUCJP.php

File diff suppressed because one or more lines are too long

137
lib/Encoding/EUCKR.php

File diff suppressed because one or more lines are too long

322
lib/Encoding/Encoder.php

@ -0,0 +1,322 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
use MensBeam\Intl\Encoding as Matcher;
class Encoder {
protected $name;
protected $fatal = true;
protected $mode = null;
/** Constructs a new encoder for the specified $label
*
* @param string $label One of the encoding labels listed in the specification e.g. "utf-8", "Latin1", "shift_JIS"
* @param bool $fatal If true (the default) exceptions will be thrown when a character cannot be represented in the target encoding; if false HTML character references will be substituted instead
*
* @see https://encoding.spec.whatwg.org#names-and-labels
*/
public function __construct(string $label, bool $fatal = true) {
$l = Matcher::matchLabel($label);
if (!$l || !$l['encoder']) {
throw new EncoderException("Label '$label' does not have an encoder", Coder::E_UNAVAILABLE_ENCODER);
} else {
$this->name = $l['name'];
$this->fatal = $fatal;
}
}
/** Encodes a series of code point numbers into a string
*
* @param iterable $codePoints An iterable set of integers representing code points in the Unicode range
*/
public function encode(iterable $codePoints): string {
$out = "";
switch ($this->name) {
case "UTF-8":
foreach ($codePoints as $codePoint) {
$out .= UTF8::encode($codePoint, $this->fatal);
}
break;
case "Big5":
foreach ($codePoints as $codePoint) {
$out .= Big5::encode($codePoint, $this->fatal);
}
break;
case "EUC-JP":
foreach ($codePoints as $codePoint) {
$out .= EUCJP::encode($codePoint, $this->fatal);
}
break;
case "EUC-KR":
foreach ($codePoints as $codePoint) {
$out .= EUCKR::encode($codePoint, $this->fatal);
}
break;
case "gb18030":
foreach ($codePoints as $codePoint) {
$out .= GB18030::encode($codePoint, $this->fatal);
}
break;
case "GBK":
foreach ($codePoints as $codePoint) {
$out .= GBK::encode($codePoint, $this->fatal);
}
break;
case "IBM866":
foreach ($codePoints as $codePoint) {
$out .= IBM866::encode($codePoint, $this->fatal);
}
break;
case "ISO-2022-JP":
foreach ($codePoints as $codePoint) {
$out .= ISO2022JP::encode($codePoint, $this->fatal, $mode);
}
$out .= ISO2022JP::encode(null, $this->fatal, $mode);
break;
case "ISO-8859-2":
foreach ($codePoints as $codePoint) {
$out .= ISO88592::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-3":
foreach ($codePoints as $codePoint) {
$out .= ISO88593::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-4":
foreach ($codePoints as $codePoint) {
$out .= ISO88594::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-5":
foreach ($codePoints as $codePoint) {
$out .= ISO88595::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-6":
foreach ($codePoints as $codePoint) {
$out .= ISO88596::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-7":
foreach ($codePoints as $codePoint) {
$out .= ISO88597::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-8":
foreach ($codePoints as $codePoint) {
$out .= ISO88598::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-8-I":
foreach ($codePoints as $codePoint) {
$out .= ISO88598I::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-10":
foreach ($codePoints as $codePoint) {
$out .= ISO885910::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-13":
foreach ($codePoints as $codePoint) {
$out .= ISO885913::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-14":
foreach ($codePoints as $codePoint) {
$out .= ISO885914::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-15":
foreach ($codePoints as $codePoint) {
$out .= ISO885915::encode($codePoint, $this->fatal);
}
break;
case "ISO-8859-16":
foreach ($codePoints as $codePoint) {
$out .= ISO885916::encode($codePoint, $this->fatal);
}
break;
case "KOI8-R":
foreach ($codePoints as $codePoint) {
$out .= KOI8R::encode($codePoint, $this->fatal);
}
break;
case "KOI8-U":
foreach ($codePoints as $codePoint) {
$out .= KOI8U::encode($codePoint, $this->fatal);
}
break;
case "macintosh":
foreach ($codePoints as $codePoint) {
$out .= Macintosh::encode($codePoint, $this->fatal);
}
break;
case "Shift_JIS":
foreach ($codePoints as $codePoint) {
$out .= ShiftJIS::encode($codePoint, $this->fatal);
}
break;
case "windows-1250":
foreach ($codePoints as $codePoint) {
$out .= Windows1250::encode($codePoint, $this->fatal);
}
break;
case "windows-1251":
foreach ($codePoints as $codePoint) {
$out .= Windows1251::encode($codePoint, $this->fatal);
}
break;
case "windows-1252":
foreach ($codePoints as $codePoint) {
$out .= Windows1252::encode($codePoint, $this->fatal);
}
break;
case "windows-1253":
foreach ($codePoints as $codePoint) {
$out .= Windows1253::encode($codePoint, $this->fatal);
}
break;
case "windows-1254":
foreach ($codePoints as $codePoint) {
$out .= Windows1254::encode($codePoint, $this->fatal);
}
break;
case "windows-1255":
foreach ($codePoints as $codePoint) {
$out .= Windows1255::encode($codePoint, $this->fatal);
}
break;
case "windows-1256":
foreach ($codePoints as $codePoint) {
$out .= Windows1256::encode($codePoint, $this->fatal);
}
break;
case "windows-1257":
foreach ($codePoints as $codePoint) {
$out .= Windows1257::encode($codePoint, $this->fatal);
}
break;
case "windows-1258":
foreach ($codePoints as $codePoint) {
$out .= Windows1258::encode($codePoint, $this->fatal);
}
break;
case "windows-874":
foreach ($codePoints as $codePoint) {
$out .= Windows874::encode($codePoint, $this->fatal);
}
break;
case "x-mac-cyrillic":
foreach ($codePoints as $codePoint) {
$out .= XMacCyrillic::encode($codePoint, $this->fatal);
}
break;
case "x-user-defined":
foreach ($codePoints as $codePoint) {
$out .= XUserDefined::encode($codePoint, $this->fatal);
}
break;
}
return $out;
}
/** Encodes a single character into a string
*
* When using this method to encode a string, the finalize() method should be called to terminate the string
*
* @param int $codePoint An integer representing the Unicode code point number to encode
*/
public function encodeChar(int $codePoint): string {
switch ($this->name) {
case "UTF-8":
return UTF8::encode($codePoint, $this->fatal);
case "Big5":
return Big5::encode($codePoint, $this->fatal);
case "EUC-JP":
return EUCJP::encode($codePoint, $this->fatal);
case "EUC-KR":
return EUCKR::encode($codePoint, $this->fatal);
case "gb18030":
return GB18030::encode($codePoint, $this->fatal);
case "GBK":
return GBK::encode($codePoint, $this->fatal);
case "IBM866":
return IBM866::encode($codePoint, $this->fatal);
case "ISO-8859-2":
return ISO88592::encode($codePoint, $this->fatal);
case "ISO-8859-3":
return ISO88593::encode($codePoint, $this->fatal);
case "ISO-8859-4":
return ISO88594::encode($codePoint, $this->fatal);
case "ISO-8859-5":
return ISO88595::encode($codePoint, $this->fatal);
case "ISO-8859-6":
return ISO88596::encode($codePoint, $this->fatal);
case "ISO-8859-7":
return ISO88597::encode($codePoint, $this->fatal);
case "ISO-8859-8":
return ISO88598::encode($codePoint, $this->fatal);
case "ISO-8859-8-I":
return ISO88598I::encode($codePoint, $this->fatal);
case "ISO-8859-10":
return ISO885910::encode($codePoint, $this->fatal);
case "ISO-8859-13":
return ISO885913::encode($codePoint, $this->fatal);
case "ISO-8859-14":
return ISO885914::encode($codePoint, $this->fatal);
case "ISO-8859-15":
return ISO885915::encode($codePoint, $this->fatal);
case "ISO-8859-16":
return ISO885916::encode($codePoint, $this->fatal);
case "KOI8-R":
return KOI8R::encode($codePoint, $this->fatal);
case "KOI8-U":
return KOI8U::encode($codePoint, $this->fatal);
case "macintosh":
return Macintosh::encode($codePoint, $this->fatal);
case "Shift_JIS":
return ShiftJIS::encode($codePoint, $this->fatal);
case "windows-1250":
return Windows1250::encode($codePoint, $this->fatal);
case "windows-1251":
return Windows1251::encode($codePoint, $this->fatal);
case "windows-1252":
return Windows1252::encode($codePoint, $this->fatal);
case "windows-1253":
return Windows1253::encode($codePoint, $this->fatal);
case "windows-1254":
return Windows1254::encode($codePoint, $this->fatal);
case "windows-1255":
return Windows1255::encode($codePoint, $this->fatal);
case "windows-1256":
return Windows1256::encode($codePoint, $this->fatal);
case "windows-1257":
return Windows1257::encode($codePoint, $this->fatal);
case "windows-1258":
return Windows1258::encode($codePoint, $this->fatal);
case "windows-874":
return Windows874::encode($codePoint, $this->fatal);
case "x-mac-cyrillic":
return XMacCyrillic::encode($codePoint, $this->fatal);
case "x-user-defined":
return XUserDefined::encode($codePoint, $this->fatal);
case "ISO-2022-JP":
return ISO2022JP::encode($codePoint, $this->fatal, $this->mode);
}
} // @codeCoverageIgnore
/** Finalizes a string, returning any terminal bytes to append to the output
*
* For the ISO-2022-JP encoding, this method must be called fater the last character is encoded to correctly encode a string; for other encodings this is a no-op
*/
public function finalize(): string {
return ISO2022JP::encode(null, $this->fatal, $this->mode);
}
}

13
lib/Encoding/GB18030.php

@ -0,0 +1,13 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class GB18030 extends GBCommon {
protected const GBK = false;
public const NAME = "gb18030";
public const LABELS = ["gb18030"];
}

211
lib/Encoding/GBCommon.php

File diff suppressed because one or more lines are too long

23
lib/Encoding/GBK.php

@ -0,0 +1,23 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class GBK extends GBCommon {
protected const GBK = true;
public const NAME = "GBK";
public const LABELS = [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk",
];
}

21
lib/Encoding/IBM866.php

@ -0,0 +1,21 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class IBM866 extends SingleByteEncoding {
public const NAME = "IBM866";
public const LABELS = [
"866",
"cp866",
"csibm866",
"ibm866",
];
protected const TABLE_DEC_CHAR = ["\u{410}","\u{411}","\u{412}","\u{413}","\u{414}","\u{415}","\u{416}","\u{417}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{424}","\u{425}","\u{426}","\u{427}","\u{428}","\u{429}","\u{42a}","\u{42b}","\u{42c}","\u{42d}","\u{42e}","\u{42f}","\u{430}","\u{431}","\u{432}","\u{433}","\u{434}","\u{435}","\u{436}","\u{437}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{2591}","\u{2592}","\u{2593}","\u{2502}","\u{2524}","\u{2561}","\u{2562}","\u{2556}","\u{2555}","\u{2563}","\u{2551}","\u{2557}","\u{255d}","\u{255c}","\u{255b}","\u{2510}","\u{2514}","\u{2534}","\u{252c}","\u{251c}","\u{2500}","\u{253c}","\u{255e}","\u{255f}","\u{255a}","\u{2554}","\u{2569}","\u{2566}","\u{2560}","\u{2550}","\u{256c}","\u{2567}","\u{2568}","\u{2564}","\u{2565}","\u{2559}","\u{2558}","\u{2552}","\u{2553}","\u{256b}","\u{256a}","\u{2518}","\u{250c}","\u{2588}","\u{2584}","\u{258c}","\u{2590}","\u{2580}","\u{440}","\u{441}","\u{442}","\u{443}","\u{444}","\u{445}","\u{446}","\u{447}","\u{448}","\u{449}","\u{44a}","\u{44b}","\u{44c}","\u{44d}","\u{44e}","\u{44f}","\u{401}","\u{451}","\u{404}","\u{454}","\u{407}","\u{457}","\u{40e}","\u{45e}","\u{b0}","\u{2219}","\u{b7}","\u{221a}","\u{2116}","\u{a4}","\u{25a0}","\u{a0}"];
protected const TABLE_DEC_CODE = [1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,9617,9618,9619,9474,9508,9569,9570,9558,9557,9571,9553,9559,9565,9564,9563,9488,9492,9524,9516,9500,9472,9532,9566,9567,9562,9556,9577,9574,9568,9552,9580,9575,9576,9572,9573,9561,9560,9554,9555,9579,9578,9496,9484,9608,9604,9612,9616,9600,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1025,1105,1028,1108,1031,1111,1038,1118,176,8729,183,8730,8470,164,9632,160];
protected const TABLE_ENC = [160=>"\xFF",164=>"\xFD",176=>"\xF8",183=>"\xFA",1025=>"\xF0",1028=>"\xF2",1031=>"\xF4",1038=>"\xF6",1040=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0","\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",1105=>"\xF1",1108=>"\xF3",1111=>"\xF5",1118=>"\xF7",8470=>"\xFC",8729=>"\xF9","\xFB",9472=>"\xC4",9474=>"\xB3",9484=>"\xDA",9488=>"\xBF",9492=>"\xC0",9496=>"\xD9",9500=>"\xC3",9508=>"\xB4",9516=>"\xC2",9524=>"\xC1",9532=>"\xC5",9552=>"\xCD","\xBA","\xD5","\xD6","\xC9","\xB8","\xB7","\xBB","\xD4","\xD3","\xC8","\xBE","\xBD","\xBC","\xC6","\xC7","\xCC","\xB5","\xB6","\xB9","\xD1","\xD2","\xCB","\xCF","\xD0","\xCA","\xD8","\xD7","\xCE",9600=>"\xDF",9604=>"\xDC",9608=>"\xDB",9612=>"\xDD",9616=>"\xDE","\xB0","\xB1","\xB2",9632=>"\xFE"];
}

372
lib/Encoding/ISO2022JP.php

File diff suppressed because one or more lines are too long

24
lib/Encoding/ISO885910.php

@ -0,0 +1,24 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO885910 extends SingleByteEncoding {
public const NAME = "ISO-8859-10";
public const LABELS = [
"csisolatin6",
"iso-8859-10",
"iso-ir-157",
"iso8859-10",
"iso885910",
"l6",
"latin6",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{104}","\u{112}","\u{122}","\u{12a}","\u{128}","\u{136}","\u{a7}","\u{13b}","\u{110}","\u{160}","\u{166}","\u{17d}","\u{ad}","\u{16a}","\u{14a}","\u{b0}","\u{105}","\u{113}","\u{123}","\u{12b}","\u{129}","\u{137}","\u{b7}","\u{13c}","\u{111}","\u{161}","\u{167}","\u{17e}","\u{2015}","\u{16b}","\u{14b}","\u{100}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{12e}","\u{10c}","\u{c9}","\u{118}","\u{cb}","\u{116}","\u{cd}","\u{ce}","\u{cf}","\u{d0}","\u{145}","\u{14c}","\u{d3}","\u{d4}","\u{d5}","\u{d6}","\u{168}","\u{d8}","\u{172}","\u{da}","\u{db}","\u{dc}","\u{dd}","\u{de}","\u{df}","\u{101}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{12f}","\u{10d}","\u{e9}","\u{119}","\u{eb}","\u{117}","\u{ed}","\u{ee}","\u{ef}","\u{f0}","\u{146}","\u{14d}","\u{f3}","\u{f4}","\u{f5}","\u{f6}","\u{169}","\u{f8}","\u{173}","\u{fa}","\u{fb}","\u{fc}","\u{fd}","\u{fe}","\u{138}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,274,290,298,296,310,167,315,272,352,358,381,173,362,330,176,261,275,291,299,297,311,183,316,273,353,359,382,8213,363,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,207,208,325,332,211,212,213,214,360,216,370,218,219,220,221,222,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,239,240,326,333,243,244,245,246,361,248,371,250,251,252,253,254,312];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",167=>"\xA7",173=>"\xAD",176=>"\xB0",183=>"\xB7",193=>"\xC1","\xC2","\xC3","\xC4","\xC5","\xC6",201=>"\xC9",203=>"\xCB",205=>"\xCD","\xCE","\xCF","\xD0",211=>"\xD3","\xD4","\xD5","\xD6",216=>"\xD8",218=>"\xDA","\xDB","\xDC","\xDD","\xDE","\xDF",225=>"\xE1","\xE2","\xE3","\xE4","\xE5","\xE6",233=>"\xE9",235=>"\xEB",237=>"\xED","\xEE","\xEF","\xF0",243=>"\xF3","\xF4","\xF5","\xF6",248=>"\xF8",250=>"\xFA","\xFB","\xFC","\xFD","\xFE",256=>"\xC0","\xE0",260=>"\xA1","\xB1",268=>"\xC8","\xE8",272=>"\xA9","\xB9","\xA2","\xB2",278=>"\xCC","\xEC","\xCA","\xEA",290=>"\xA3","\xB3",296=>"\xA5","\xB5","\xA4","\xB4",302=>"\xC7","\xE7",310=>"\xA6","\xB6","\xFF",315=>"\xA8","\xB8",325=>"\xD1","\xF1",330=>"\xAF","\xBF","\xD2","\xF2",352=>"\xAA","\xBA",358=>"\xAB","\xBB","\xD7","\xF7","\xAE","\xBE",370=>"\xD9","\xF9",381=>"\xAC","\xBC",8213=>"\xBD"];
}

20
lib/Encoding/ISO885913.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO885913 extends SingleByteEncoding {
public const NAME = "ISO-8859-13";
public const LABELS = [
"iso-8859-13",
"iso8859-13",
"iso885913",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{201d}","\u{a2}","\u{a3}","\u{a4}","\u{201e}","\u{a6}","\u{a7}","\u{d8}","\u{a9}","\u{156}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{c6}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{201c}","\u{b5}","\u{b6}","\u{b7}","\u{f8}","\u{b9}","\u{157}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{e6}","\u{104}","\u{12e}","\u{100}","\u{106}","\u{c4}","\u{c5}","\u{118}","\u{112}","\u{10c}","\u{c9}","\u{179}","\u{116}","\u{122}","\u{136}","\u{12a}","\u{13b}","\u{160}","\u{143}","\u{145}","\u{d3}","\u{14c}","\u{d5}","\u{d6}","\u{d7}","\u{172}","\u{141}","\u{15a}","\u{16a}","\u{dc}","\u{17b}","\u{17d}","\u{df}","\u{105}","\u{12f}","\u{101}","\u{107}","\u{e4}","\u{e5}","\u{119}","\u{113}","\u{10d}","\u{e9}","\u{17a}","\u{117}","\u{123}","\u{137}","\u{12b}","\u{13c}","\u{161}","\u{144}","\u{146}","\u{f3}","\u{14d}","\u{f5}","\u{f6}","\u{f7}","\u{173}","\u{142}","\u{15b}","\u{16b}","\u{fc}","\u{17c}","\u{17e}","\u{2019}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8221,162,163,164,8222,166,167,216,169,342,171,172,173,174,198,176,177,178,179,8220,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,8217];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",162=>"\xA2","\xA3","\xA4",166=>"\xA6","\xA7",169=>"\xA9",171=>"\xAB","\xAC","\xAD","\xAE",176=>"\xB0","\xB1","\xB2","\xB3",181=>"\xB5","\xB6","\xB7",185=>"\xB9",187=>"\xBB","\xBC","\xBD","\xBE",196=>"\xC4","\xC5","\xAF",201=>"\xC9",211=>"\xD3",213=>"\xD5","\xD6","\xD7","\xA8",220=>"\xDC",223=>"\xDF",228=>"\xE4","\xE5","\xBF",233=>"\xE9",243=>"\xF3",245=>"\xF5","\xF6","\xF7","\xB8",252=>"\xFC",256=>"\xC2","\xE2",260=>"\xC0","\xE0","\xC3","\xE3",268=>"\xC8","\xE8",274=>"\xC7","\xE7",278=>"\xCB","\xEB","\xC6","\xE6",290=>"\xCC","\xEC",298=>"\xCE","\xEE",302=>"\xC1","\xE1",310=>"\xCD","\xED",315=>"\xCF","\xEF",321=>"\xD9","\xF9","\xD1","\xF1","\xD2","\xF2",332=>"\xD4","\xF4",342=>"\xAA","\xBA",346=>"\xDA","\xFA",352=>"\xD0","\xF0",362=>"\xDB","\xFB",370=>"\xD8","\xF8",377=>"\xCA","\xEA","\xDD","\xFD","\xDE","\xFE",8217=>"\xFF",8220=>"\xB4","\xA1","\xA5"];
}

20
lib/Encoding/ISO885914.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO885914 extends SingleByteEncoding {
public const NAME = "ISO-8859-14";
public const LABELS = [
"iso-8859-14",
"iso8859-14",
"iso885914",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{1e02}","\u{1e03}","\u{a3}","\u{10a}","\u{10b}","\u{1e0a}","\u{a7}","\u{1e80}","\u{a9}","\u{1e82}","\u{1e0b}","\u{1ef2}","\u{ad}","\u{ae}","\u{178}","\u{1e1e}","\u{1e1f}","\u{120}","\u{121}","\u{1e40}","\u{1e41}","\u{b6}","\u{1e56}","\u{1e81}","\u{1e57}","\u{1e83}","\u{1e60}","\u{1ef3}","\u{1e84}","\u{1e85}","\u{1e61}","\u{c0}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}","\u{174}","\u{d1}","\u{d2}","\u{d3}","\u{d4}","\u{d5}","\u{d6}","\u{1e6a}","\u{d8}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{dd}","\u{176}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}","\u{175}","\u{f1}","\u{f2}","\u{f3}","\u{f4}","\u{f5}","\u{f6}","\u{1e6b}","\u{f8}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{fd}","\u{177}","\u{ff}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,7682,7683,163,266,267,7690,167,7808,169,7810,7691,7922,173,174,376,7710,7711,288,289,7744,7745,182,7766,7809,7767,7811,7776,7923,7812,7813,7777,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,372,209,210,211,212,213,214,7786,216,217,218,219,220,221,374,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,373,241,242,243,244,245,246,7787,248,249,250,251,252,253,375,255];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",163=>"\xA3",167=>"\xA7",169=>"\xA9",173=>"\xAD","\xAE",182=>"\xB6",192=>"\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF",209=>"\xD1","\xD2","\xD3","\xD4","\xD5","\xD6",216=>"\xD8","\xD9","\xDA","\xDB","\xDC","\xDD",223=>"\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",241=>"\xF1","\xF2","\xF3","\xF4","\xF5","\xF6",248=>"\xF8","\xF9","\xFA","\xFB","\xFC","\xFD",255=>"\xFF",266=>"\xA4","\xA5",288=>"\xB2","\xB3",372=>"\xD0","\xF0","\xDE","\xFE","\xAF",7682=>"\xA1","\xA2",7690=>"\xA6","\xAB",7710=>"\xB0","\xB1",7744=>"\xB4","\xB5",7766=>"\xB7","\xB9",7776=>"\xBB","\xBF",7786=>"\xD7","\xF7",7808=>"\xA8","\xB8","\xAA","\xBA","\xBD","\xBE",7922=>"\xAC","\xBC"];
}

23
lib/Encoding/ISO885915.php

@ -0,0 +1,23 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO885915 extends SingleByteEncoding {
public const NAME = "ISO-8859-15";
public const LABELS = [
"csisolatin9",
"iso-8859-15",
"iso8859-15",
"iso885915",
"iso_8859-15",
"l9",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{a1}","\u{a2}","\u{a3}","\u{20ac}","\u{a5}","\u{160}","\u{a7}","\u{161}","\u{a9}","\u{aa}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{17d}","\u{b5}","\u{b6}","\u{b7}","\u{17e}","\u{b9}","\u{ba}","\u{bb}","\u{152}","\u{153}","\u{178}","\u{bf}","\u{c0}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}","\u{d0}","\u{d1}","\u{d2}","\u{d3}","\u{d4}","\u{d5}","\u{d6}","\u{d7}","\u{d8}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{dd}","\u{de}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}","\u{f0}","\u{f1}","\u{f2}","\u{f3}","\u{f4}","\u{f5}","\u{f6}","\u{f7}","\u{f8}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{fd}","\u{fe}","\u{ff}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,8364,165,352,167,353,169,170,171,172,173,174,175,176,177,178,179,381,181,182,183,382,185,186,187,338,339,376,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0","\xA1","\xA2","\xA3",165=>"\xA5",167=>"\xA7",169=>"\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3",181=>"\xB5","\xB6","\xB7",185=>"\xB9","\xBA","\xBB",191=>"\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE","\xFF",338=>"\xBC","\xBD",352=>"\xA6","\xA8",376=>"\xBE",381=>"\xB4","\xB8",8364=>"\xA4"];
}

18
lib/Encoding/ISO885916.php

@ -0,0 +1,18 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO885916 extends SingleByteEncoding {
public const NAME = "ISO-8859-16";
public const LABELS = [
"iso-8859-16",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{104}","\u{105}","\u{141}","\u{20ac}","\u{201e}","\u{160}","\u{a7}","\u{161}","\u{a9}","\u{218}","\u{ab}","\u{179}","\u{ad}","\u{17a}","\u{17b}","\u{b0}","\u{b1}","\u{10c}","\u{142}","\u{17d}","\u{201d}","\u{b6}","\u{b7}","\u{17e}","\u{10d}","\u{219}","\u{bb}","\u{152}","\u{153}","\u{178}","\u{17c}","\u{c0}","\u{c1}","\u{c2}","\u{102}","\u{c4}","\u{106}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}","\u{110}","\u{143}","\u{d2}","\u{d3}","\u{d4}","\u{150}","\u{d6}","\u{15a}","\u{170}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{118}","\u{21a}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{103}","\u{e4}","\u{107}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}","\u{111}","\u{144}","\u{f2}","\u{f3}","\u{f4}","\u{151}","\u{f6}","\u{15b}","\u{171}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{119}","\u{21b}","\u{ff}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,261,321,8364,8222,352,167,353,169,536,171,377,173,378,379,176,177,268,322,381,8221,182,183,382,269,537,187,338,339,376,380,192,193,194,258,196,262,198,199,200,201,202,203,204,205,206,207,272,323,210,211,212,336,214,346,368,217,218,219,220,280,538,223,224,225,226,259,228,263,230,231,232,233,234,235,236,237,238,239,273,324,242,243,244,337,246,347,369,249,250,251,252,281,539,255];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",167=>"\xA7",169=>"\xA9",171=>"\xAB",173=>"\xAD",176=>"\xB0","\xB1",182=>"\xB6","\xB7",187=>"\xBB",192=>"\xC0","\xC1","\xC2",196=>"\xC4",198=>"\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF",210=>"\xD2","\xD3","\xD4",214=>"\xD6",217=>"\xD9","\xDA","\xDB","\xDC",223=>"\xDF","\xE0","\xE1","\xE2",228=>"\xE4",230=>"\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",242=>"\xF2","\xF3","\xF4",246=>"\xF6",249=>"\xF9","\xFA","\xFB","\xFC",255=>"\xFF",258=>"\xC3","\xE3","\xA1","\xA2","\xC5","\xE5",268=>"\xB2","\xB9",272=>"\xD0","\xF0",280=>"\xDD","\xFD",321=>"\xA3","\xB3","\xD1","\xF1",336=>"\xD5","\xF5","\xBC","\xBD",346=>"\xD7","\xF7",352=>"\xA6","\xA8",368=>"\xD8","\xF8",376=>"\xBE","\xAC","\xAE","\xAF","\xBF","\xB4","\xB8",536=>"\xAA","\xBA","\xDE","\xFE",8221=>"\xB5","\xA5",8364=>"\xA4"];
}

26
lib/Encoding/ISO88592.php

@ -0,0 +1,26 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88592 extends SingleByteEncoding {
public const NAME = "ISO-8859-2";
public const LABELS = [
"csisolatin2",
"iso-8859-2",
"iso-ir-101",
"iso8859-2",
"iso88592",
"iso_8859-2",
"iso_8859-2:1987",
"l2",
"latin2",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{104}","\u{2d8}","\u{141}","\u{a4}","\u{13d}","\u{15a}","\u{a7}","\u{a8}","\u{160}","\u{15e}","\u{164}","\u{179}","\u{ad}","\u{17d}","\u{17b}","\u{b0}","\u{105}","\u{2db}","\u{142}","\u{b4}","\u{13e}","\u{15b}","\u{2c7}","\u{b8}","\u{161}","\u{15f}","\u{165}","\u{17a}","\u{2dd}","\u{17e}","\u{17c}","\u{154}","\u{c1}","\u{c2}","\u{102}","\u{c4}","\u{139}","\u{106}","\u{c7}","\u{10c}","\u{c9}","\u{118}","\u{cb}","\u{11a}","\u{cd}","\u{ce}","\u{10e}","\u{110}","\u{143}","\u{147}","\u{d3}","\u{d4}","\u{150}","\u{d6}","\u{d7}","\u{158}","\u{16e}","\u{da}","\u{170}","\u{dc}","\u{dd}","\u{162}","\u{df}","\u{155}","\u{e1}","\u{e2}","\u{103}","\u{e4}","\u{13a}","\u{107}","\u{e7}","\u{10d}","\u{e9}","\u{119}","\u{eb}","\u{11b}","\u{ed}","\u{ee}","\u{10f}","\u{111}","\u{144}","\u{148}","\u{f3}","\u{f4}","\u{151}","\u{f6}","\u{f7}","\u{159}","\u{16f}","\u{fa}","\u{171}","\u{fc}","\u{fd}","\u{163}","\u{2d9}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,728,321,164,317,346,167,168,352,350,356,377,173,381,379,176,261,731,322,180,318,347,711,184,353,351,357,378,733,382,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",164=>"\xA4",167=>"\xA7","\xA8",173=>"\xAD",176=>"\xB0",180=>"\xB4",184=>"\xB8",193=>"\xC1","\xC2",196=>"\xC4",199=>"\xC7",201=>"\xC9",203=>"\xCB",205=>"\xCD","\xCE",211=>"\xD3","\xD4",214=>"\xD6","\xD7",218=>"\xDA",220=>"\xDC","\xDD",223=>"\xDF",225=>"\xE1","\xE2",228=>"\xE4",231=>"\xE7",233=>"\xE9",235=>"\xEB",237=>"\xED","\xEE",243=>"\xF3","\xF4",246=>"\xF6","\xF7",250=>"\xFA",252=>"\xFC","\xFD",258=>"\xC3","\xE3","\xA1","\xB1","\xC6","\xE6",268=>"\xC8","\xE8","\xCF","\xEF","\xD0","\xF0",280=>"\xCA","\xEA","\xCC","\xEC",313=>"\xC5","\xE5",317=>"\xA5","\xB5",321=>"\xA3","\xB3","\xD1","\xF1",327=>"\xD2","\xF2",336=>"\xD5","\xF5",340=>"\xC0","\xE0",344=>"\xD8","\xF8","\xA6","\xB6",350=>"\xAA","\xBA","\xA9","\xB9","\xDE","\xFE","\xAB","\xBB",366=>"\xD9","\xF9","\xDB","\xFB",377=>"\xAC","\xBC","\xAF","\xBF","\xAE","\xBE",711=>"\xB7",728=>"\xA2","\xFF",731=>"\xB2",733=>"\xBD"];
}

26
lib/Encoding/ISO88593.php

@ -0,0 +1,26 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88593 extends SingleByteEncoding {
public const NAME = "ISO-8859-3";
public const LABELS = [
"csisolatin3",
"iso-8859-3",
"iso-ir-109",
"iso8859-3",
"iso88593",
"iso_8859-3",
"iso_8859-3:1988",
"l3",
"latin3",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{126}","\u{2d8}","\u{a3}","\u{a4}",38=>"\u{124}","\u{a7}","\u{a8}","\u{130}","\u{15e}","\u{11e}","\u{134}","\u{ad}",47=>"\u{17b}","\u{b0}","\u{127}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{125}","\u{b7}","\u{b8}","\u{131}","\u{15f}","\u{11f}","\u{135}","\u{bd}",63=>"\u{17c}","\u{c0}","\u{c1}","\u{c2}",68=>"\u{c4}","\u{10a}","\u{108}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}",81=>"\u{d1}","\u{d2}","\u{d3}","\u{d4}","\u{120}","\u{d6}","\u{d7}","\u{11c}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{16c}","\u{15c}","\u{df}","\u{e0}","\u{e1}","\u{e2}",100=>"\u{e4}","\u{10b}","\u{109}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}",113=>"\u{f1}","\u{f2}","\u{f3}","\u{f4}","\u{121}","\u{f6}","\u{f7}","\u{11d}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{16d}","\u{15d}","\u{2d9}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,294,728,163,164,38=>292,167,168,304,350,286,308,173,47=>379,176,295,178,179,180,181,293,183,184,305,351,287,309,189,63=>380,192,193,194,68=>196,266,264,199,200,201,202,203,204,205,206,207,81=>209,210,211,212,288,214,215,284,217,218,219,220,364,348,223,224,225,226,100=>228,267,265,231,232,233,234,235,236,237,238,239,113=>241,242,243,244,289,246,247,285,249,250,251,252,365,349,729];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",163=>"\xA3","\xA4",167=>"\xA7","\xA8",173=>"\xAD",176=>"\xB0",178=>"\xB2","\xB3","\xB4","\xB5",183=>"\xB7","\xB8",189=>"\xBD",192=>"\xC0","\xC1","\xC2",196=>"\xC4",199=>"\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF",209=>"\xD1","\xD2","\xD3","\xD4",214=>"\xD6","\xD7",217=>"\xD9","\xDA","\xDB","\xDC",223=>"\xDF","\xE0","\xE1","\xE2",228=>"\xE4",231=>"\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",241=>"\xF1","\xF2","\xF3","\xF4",246=>"\xF6","\xF7",249=>"\xF9","\xFA","\xFB","\xFC",264=>"\xC6","\xE6","\xC5","\xE5",284=>"\xD8","\xF8","\xAB","\xBB","\xD5","\xF5",292=>"\xA6","\xB6","\xA1","\xB1",304=>"\xA9","\xB9",308=>"\xAC","\xBC",348=>"\xDE","\xFE","\xAA","\xBA",364=>"\xDD","\xFD",379=>"\xAF","\xBF",728=>"\xA2","\xFF"];
}

26
lib/Encoding/ISO88594.php

@ -0,0 +1,26 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88594 extends SingleByteEncoding {
public const NAME = "ISO-8859-4";
public const LABELS = [
"csisolatin4",
"iso-8859-4",
"iso-ir-110",
"iso8859-4",
"iso88594",
"iso_8859-4",
"iso_8859-4:1988",
"l4",
"latin4",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{104}","\u{138}","\u{156}","\u{a4}","\u{128}","\u{13b}","\u{a7}","\u{a8}","\u{160}","\u{112}","\u{122}","\u{166}","\u{ad}","\u{17d}","\u{af}","\u{b0}","\u{105}","\u{2db}","\u{157}","\u{b4}","\u{129}","\u{13c}","\u{2c7}","\u{b8}","\u{161}","\u{113}","\u{123}","\u{167}","\u{14a}","\u{17e}","\u{14b}","\u{100}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{12e}","\u{10c}","\u{c9}","\u{118}","\u{cb}","\u{116}","\u{cd}","\u{ce}","\u{12a}","\u{110}","\u{145}","\u{14c}","\u{136}","\u{d4}","\u{d5}","\u{d6}","\u{d7}","\u{d8}","\u{172}","\u{da}","\u{db}","\u{dc}","\u{168}","\u{16a}","\u{df}","\u{101}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{12f}","\u{10d}","\u{e9}","\u{119}","\u{eb}","\u{117}","\u{ed}","\u{ee}","\u{12b}","\u{111}","\u{146}","\u{14d}","\u{137}","\u{f4}","\u{f5}","\u{f6}","\u{f7}","\u{f8}","\u{173}","\u{fa}","\u{fb}","\u{fc}","\u{169}","\u{16b}","\u{2d9}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,312,342,164,296,315,167,168,352,274,290,358,173,381,175,176,261,731,343,180,297,316,711,184,353,275,291,359,330,382,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,298,272,325,332,310,212,213,214,215,216,370,218,219,220,360,362,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,299,273,326,333,311,244,245,246,247,248,371,250,251,252,361,363,729];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",164=>"\xA4",167=>"\xA7","\xA8",173=>"\xAD",175=>"\xAF","\xB0",180=>"\xB4",184=>"\xB8",193=>"\xC1","\xC2","\xC3","\xC4","\xC5","\xC6",201=>"\xC9",203=>"\xCB",205=>"\xCD","\xCE",212=>"\xD4","\xD5","\xD6","\xD7","\xD8",218=>"\xDA","\xDB","\xDC",223=>"\xDF",225=>"\xE1","\xE2","\xE3","\xE4","\xE5","\xE6",233=>"\xE9",235=>"\xEB",237=>"\xED","\xEE",244=>"\xF4","\xF5","\xF6","\xF7","\xF8",250=>"\xFA","\xFB","\xFC",256=>"\xC0","\xE0",260=>"\xA1","\xB1",268=>"\xC8","\xE8",272=>"\xD0","\xF0","\xAA","\xBA",278=>"\xCC","\xEC","\xCA","\xEA",290=>"\xAB","\xBB",296=>"\xA5","\xB5","\xCF","\xEF",302=>"\xC7","\xE7",310=>"\xD3","\xF3","\xA2",315=>"\xA6","\xB6",325=>"\xD1","\xF1",330=>"\xBD","\xBF","\xD2","\xF2",342=>"\xA3","\xB3",352=>"\xA9","\xB9",358=>"\xAC","\xBC","\xDD","\xFD","\xDE","\xFE",370=>"\xD9","\xF9",381=>"\xAE","\xBE",711=>"\xB7",729=>"\xFF",731=>"\xB2"];
}

25
lib/Encoding/ISO88595.php

@ -0,0 +1,25 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88595 extends SingleByteEncoding {
public const NAME = "ISO-8859-5";
public const LABELS = [
"csisolatincyrillic",
"cyrillic",
"iso-8859-5",
"iso-ir-144",
"iso8859-5",
"iso88595",
"iso_8859-5",
"iso_8859-5:1988",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{401}","\u{402}","\u{403}","\u{404}","\u{405}","\u{406}","\u{407}","\u{408}","\u{409}","\u{40a}","\u{40b}","\u{40c}","\u{ad}","\u{40e}","\u{40f}","\u{410}","\u{411}","\u{412}","\u{413}","\u{414}","\u{415}","\u{416}","\u{417}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{424}","\u{425}","\u{426}","\u{427}","\u{428}","\u{429}","\u{42a}","\u{42b}","\u{42c}","\u{42d}","\u{42e}","\u{42f}","\u{430}","\u{431}","\u{432}","\u{433}","\u{434}","\u{435}","\u{436}","\u{437}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{440}","\u{441}","\u{442}","\u{443}","\u{444}","\u{445}","\u{446}","\u{447}","\u{448}","\u{449}","\u{44a}","\u{44b}","\u{44c}","\u{44d}","\u{44e}","\u{44f}","\u{2116}","\u{451}","\u{452}","\u{453}","\u{454}","\u{455}","\u{456}","\u{457}","\u{458}","\u{459}","\u{45a}","\u{45b}","\u{45c}","\u{a7}","\u{45e}","\u{45f}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,173,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,8470,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,167,1118,1119];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",167=>"\xFD",173=>"\xAD",1025=>"\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC",1038=>"\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",1105=>"\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC",1118=>"\xFE","\xFF",8470=>"\xF0"];
}

31
lib/Encoding/ISO88596.php

@ -0,0 +1,31 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88596 extends SingleByteEncoding {
public const NAME = "ISO-8859-6";
public const LABELS = [
"arabic",
"asmo-708",
"csiso88596e",
"csiso88596i",
"csisolatinarabic",
"ecma-114",
"iso-8859-6",
"iso-8859-6-e",
"iso-8859-6-i",
"iso-ir-127",
"iso8859-6",
"iso88596",
"iso_8859-6",
"iso_8859-6:1987",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}",36=>"\u{a4}",44=>"\u{60c}","\u{ad}",59=>"\u{61b}",63=>"\u{61f}",65=>"\u{621}","\u{622}","\u{623}","\u{624}","\u{625}","\u{626}","\u{627}","\u{628}","\u{629}","\u{62a}","\u{62b}","\u{62c}","\u{62d}","\u{62e}","\u{62f}","\u{630}","\u{631}","\u{632}","\u{633}","\u{634}","\u{635}","\u{636}","\u{637}","\u{638}","\u{639}","\u{63a}",96=>"\u{640}","\u{641}","\u{642}","\u{643}","\u{644}","\u{645}","\u{646}","\u{647}","\u{648}","\u{649}","\u{64a}","\u{64b}","\u{64c}","\u{64d}","\u{64e}","\u{64f}","\u{650}","\u{651}","\u{652}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,36=>164,44=>1548,173,59=>1563,63=>1567,65=>1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,96=>1600,1601,1602,1603,1604,1605,1606,1607,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617,1618];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",164=>"\xA4",173=>"\xAD",1548=>"\xAC",1563=>"\xBB",1567=>"\xBF",1569=>"\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA",1600=>"\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2"];
}

29
lib/Encoding/ISO88597.php

@ -0,0 +1,29 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88597 extends SingleByteEncoding {
public const NAME = "ISO-8859-7";
public const LABELS = [
"csisolatingreek",
"ecma-118",
"elot_928",
"greek",
"greek8",
"iso-8859-7",
"iso-ir-126",
"iso8859-7",
"iso88597",
"iso_8859-7",
"iso_8859-7:1987",
"sun_eu_greek",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{2018}","\u{2019}","\u{a3}","\u{20ac}","\u{20af}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{37a}","\u{ab}","\u{ac}","\u{ad}",47=>"\u{2015}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{384}","\u{385}","\u{386}","\u{b7}","\u{388}","\u{389}","\u{38a}","\u{bb}","\u{38c}","\u{bd}","\u{38e}","\u{38f}","\u{390}","\u{391}","\u{392}","\u{393}","\u{394}","\u{395}","\u{396}","\u{397}","\u{398}","\u{399}","\u{39a}","\u{39b}","\u{39c}","\u{39d}","\u{39e}","\u{39f}","\u{3a0}","\u{3a1}",83=>"\u{3a3}","\u{3a4}","\u{3a5}","\u{3a6}","\u{3a7}","\u{3a8}","\u{3a9}","\u{3aa}","\u{3ab}","\u{3ac}","\u{3ad}","\u{3ae}","\u{3af}","\u{3b0}","\u{3b1}","\u{3b2}","\u{3b3}","\u{3b4}","\u{3b5}","\u{3b6}","\u{3b7}","\u{3b8}","\u{3b9}","\u{3ba}","\u{3bb}","\u{3bc}","\u{3bd}","\u{3be}","\u{3bf}","\u{3c0}","\u{3c1}","\u{3c2}","\u{3c3}","\u{3c4}","\u{3c5}","\u{3c6}","\u{3c7}","\u{3c8}","\u{3c9}","\u{3ca}","\u{3cb}","\u{3cc}","\u{3cd}","\u{3ce}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8216,8217,163,8364,8367,166,167,168,169,890,171,172,173,47=>8213,176,177,178,179,900,901,902,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,83=>931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",163=>"\xA3",166=>"\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD",176=>"\xB0","\xB1","\xB2","\xB3",183=>"\xB7",187=>"\xBB",189=>"\xBD",890=>"\xAA",900=>"\xB4","\xB5","\xB6",904=>"\xB8","\xB9","\xBA",908=>"\xBC",910=>"\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1",931=>"\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE",8213=>"\xAF",8216=>"\xA1","\xA2",8364=>"\xA4",8367=>"\xA5"];
}

28
lib/Encoding/ISO88598.php

@ -0,0 +1,28 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88598 extends SingleByteEncoding {
public const NAME = "ISO-8859-8";
public const LABELS = [
"csiso88598e",
"csisolatinhebrew",
"hebrew",
"iso-8859-8",
"iso-8859-8-e",
"iso-ir-138",
"iso8859-8",
"iso88598",
"iso_8859-8",
"iso_8859-8:1988",
"visual",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}",34=>"\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{d7}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{f7}","\u{bb}","\u{bc}","\u{bd}","\u{be}",95=>"\u{2017}","\u{5d0}","\u{5d1}","\u{5d2}","\u{5d3}","\u{5d4}","\u{5d5}","\u{5d6}","\u{5d7}","\u{5d8}","\u{5d9}","\u{5da}","\u{5db}","\u{5dc}","\u{5dd}","\u{5de}","\u{5df}","\u{5e0}","\u{5e1}","\u{5e2}","\u{5e3}","\u{5e4}","\u{5e5}","\u{5e6}","\u{5e7}","\u{5e8}","\u{5e9}","\u{5ea}",125=>"\u{200e}","\u{200f}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,34=>162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,95=>8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,125=>8206,8207];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",162=>"\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9",187=>"\xBB","\xBC","\xBD","\xBE",215=>"\xAA",247=>"\xBA",1488=>"\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA",8206=>"\xFD","\xFE",8215=>"\xDF"];
}

20
lib/Encoding/ISO88598I.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class ISO88598I extends SingleByteEncoding {
public const NAME = "ISO-8859-8-I";
public const LABELS = [
"csiso88598i",
"iso-8859-8-i",
"logical",
];
protected const TABLE_DEC_CHAR = ["\u{80}","\u{81}","\u{82}","\u{83}","\u{84}","\u{85}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{91}","\u{92}","\u{93}","\u{94}","\u{95}","\u{96}","\u{97}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}",34=>"\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{d7}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{f7}","\u{bb}","\u{bc}","\u{bd}","\u{be}",95=>"\u{2017}","\u{5d0}","\u{5d1}","\u{5d2}","\u{5d3}","\u{5d4}","\u{5d5}","\u{5d6}","\u{5d7}","\u{5d8}","\u{5d9}","\u{5da}","\u{5db}","\u{5dc}","\u{5dd}","\u{5de}","\u{5df}","\u{5e0}","\u{5e1}","\u{5e2}","\u{5e3}","\u{5e4}","\u{5e5}","\u{5e6}","\u{5e7}","\u{5e8}","\u{5e9}","\u{5ea}",125=>"\u{200e}","\u{200f}"];
protected const TABLE_DEC_CODE = [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,34=>162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,95=>8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,125=>8206,8207];
protected const TABLE_ENC = [128=>"\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",162=>"\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9",187=>"\xBB","\xBC","\xBD","\xBE",215=>"\xAA",247=>"\xBA",1488=>"\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA",8206=>"\xFD","\xFE",8215=>"\xDF"];
}

22
lib/Encoding/KOI8R.php

@ -0,0 +1,22 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class KOI8R extends SingleByteEncoding {
public const NAME = "KOI8-R";
public const LABELS = [
"cskoi8r",
"koi",
"koi8",
"koi8-r",
"koi8_r",
];
protected const TABLE_DEC_CHAR = ["\u{2500}","\u{2502}","\u{250c}","\u{2510}","\u{2514}","\u{2518}","\u{251c}","\u{2524}","\u{252c}","\u{2534}","\u{253c}","\u{2580}","\u{2584}","\u{2588}","\u{258c}","\u{2590}","\u{2591}","\u{2592}","\u{2593}","\u{2320}","\u{25a0}","\u{2219}","\u{221a}","\u{2248}","\u{2264}","\u{2265}","\u{a0}","\u{2321}","\u{b0}","\u{b2}","\u{b7}","\u{f7}","\u{2550}","\u{2551}","\u{2552}","\u{451}","\u{2553}","\u{2554}","\u{2555}","\u{2556}","\u{2557}","\u{2558}","\u{2559}","\u{255a}","\u{255b}","\u{255c}","\u{255d}","\u{255e}","\u{255f}","\u{2560}","\u{2561}","\u{401}","\u{2562}","\u{2563}","\u{2564}","\u{2565}","\u{2566}","\u{2567}","\u{2568}","\u{2569}","\u{256a}","\u{256b}","\u{256c}","\u{a9}","\u{44e}","\u{430}","\u{431}","\u{446}","\u{434}","\u{435}","\u{444}","\u{433}","\u{445}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{44f}","\u{440}","\u{441}","\u{442}","\u{443}","\u{436}","\u{432}","\u{44c}","\u{44b}","\u{437}","\u{448}","\u{44d}","\u{449}","\u{447}","\u{44a}","\u{42e}","\u{410}","\u{411}","\u{426}","\u{414}","\u{415}","\u{424}","\u{413}","\u{425}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{42f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{416}","\u{412}","\u{42c}","\u{42b}","\u{417}","\u{428}","\u{42d}","\u{429}","\u{427}","\u{42a}"];
protected const TABLE_DEC_CODE = [9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568,9569,1025,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066];
protected const TABLE_ENC = [160=>"\x9A",169=>"\xBF",176=>"\x9C",178=>"\x9D",183=>"\x9E",247=>"\x9F",1025=>"\xB3",1040=>"\xE1","\xE2","\xF7","\xE7","\xE4","\xE5","\xF6","\xFA","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF2","\xF3","\xF4","\xF5","\xE6","\xE8","\xE3","\xFE","\xFB","\xFD","\xFF","\xF9","\xF8","\xFC","\xE0","\xF1","\xC1","\xC2","\xD7","\xC7","\xC4","\xC5","\xD6","\xDA","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD2","\xD3","\xD4","\xD5","\xC6","\xC8","\xC3","\xDE","\xDB","\xDD","\xDF","\xD9","\xD8","\xDC","\xC0","\xD1",1105=>"\xA3",8729=>"\x95","\x96",8776=>"\x97",8804=>"\x98","\x99",8992=>"\x93","\x9B",9472=>"\x80",9474=>"\x81",9484=>"\x82",9488=>"\x83",9492=>"\x84",9496=>"\x85",9500=>"\x86",9508=>"\x87",9516=>"\x88",9524=>"\x89",9532=>"\x8A",9552=>"\xA0","\xA1","\xA2","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE",9600=>"\x8B",9604=>"\x8C",9608=>"\x8D",9612=>"\x8E",9616=>"\x8F","\x90","\x91","\x92",9632=>"\x94"];
}

19
lib/Encoding/KOI8U.php

@ -0,0 +1,19 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class KOI8U extends SingleByteEncoding {
public const NAME = "KOI8-U";
public const LABELS = [
"koi8-ru",
"koi8-u",
];
protected const TABLE_DEC_CHAR = ["\u{2500}","\u{2502}","\u{250c}","\u{2510}","\u{2514}","\u{2518}","\u{251c}","\u{2524}","\u{252c}","\u{2534}","\u{253c}","\u{2580}","\u{2584}","\u{2588}","\u{258c}","\u{2590}","\u{2591}","\u{2592}","\u{2593}","\u{2320}","\u{25a0}","\u{2219}","\u{221a}","\u{2248}","\u{2264}","\u{2265}","\u{a0}","\u{2321}","\u{b0}","\u{b2}","\u{b7}","\u{f7}","\u{2550}","\u{2551}","\u{2552}","\u{451}","\u{454}","\u{2554}","\u{456}","\u{457}","\u{2557}","\u{2558}","\u{2559}","\u{255a}","\u{255b}","\u{491}","\u{45e}","\u{255e}","\u{255f}","\u{2560}","\u{2561}","\u{401}","\u{404}","\u{2563}","\u{406}","\u{407}","\u{2566}","\u{2567}","\u{2568}","\u{2569}","\u{256a}","\u{490}","\u{40e}","\u{a9}","\u{44e}","\u{430}","\u{431}","\u{446}","\u{434}","\u{435}","\u{444}","\u{433}","\u{445}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{44f}","\u{440}","\u{441}","\u{442}","\u{443}","\u{436}","\u{432}","\u{44c}","\u{44b}","\u{437}","\u{448}","\u{44d}","\u{449}","\u{447}","\u{44a}","\u{42e}","\u{410}","\u{411}","\u{426}","\u{414}","\u{415}","\u{424}","\u{413}","\u{425}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{42f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{416}","\u{412}","\u{42c}","\u{42b}","\u{417}","\u{428}","\u{42d}","\u{429}","\u{427}","\u{42a}"];
protected const TABLE_DEC_CODE = [9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,1108,9556,1110,1111,9559,9560,9561,9562,9563,1169,1118,9566,9567,9568,9569,1025,1028,9571,1030,1031,9574,9575,9576,9577,9578,1168,1038,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066];
protected const TABLE_ENC = [160=>"\x9A",169=>"\xBF",176=>"\x9C",178=>"\x9D",183=>"\x9E",247=>"\x9F",1025=>"\xB3",1028=>"\xB4",1030=>"\xB6","\xB7",1038=>"\xBE",1040=>"\xE1","\xE2","\xF7","\xE7","\xE4","\xE5","\xF6","\xFA","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF2","\xF3","\xF4","\xF5","\xE6","\xE8","\xE3","\xFE","\xFB","\xFD","\xFF","\xF9","\xF8","\xFC","\xE0","\xF1","\xC1","\xC2","\xD7","\xC7","\xC4","\xC5","\xD6","\xDA","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD2","\xD3","\xD4","\xD5","\xC6","\xC8","\xC3","\xDE","\xDB","\xDD","\xDF","\xD9","\xD8","\xDC","\xC0","\xD1",1105=>"\xA3",1108=>"\xA4",1110=>"\xA6","\xA7",1118=>"\xAE",1168=>"\xBD","\xAD",8729=>"\x95","\x96",8776=>"\x97",8804=>"\x98","\x99",8992=>"\x93","\x9B",9472=>"\x80",9474=>"\x81",9484=>"\x82",9488=>"\x83",9492=>"\x84",9496=>"\x85",9500=>"\x86",9508=>"\x87",9516=>"\x88",9524=>"\x89",9532=>"\x8A",9552=>"\xA0","\xA1","\xA2",9556=>"\xA5",9559=>"\xA8","\xA9","\xAA","\xAB","\xAC",9566=>"\xAF","\xB0","\xB1","\xB2",9571=>"\xB5",9574=>"\xB8","\xB9","\xBA","\xBB","\xBC",9600=>"\x8B",9604=>"\x8C",9608=>"\x8D",9612=>"\x8E",9616=>"\x8F","\x90","\x91","\x92",9632=>"\x94"];
}

21
lib/Encoding/Macintosh.php

@ -0,0 +1,21 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Macintosh extends SingleByteEncoding {
public const NAME = "macintosh";
public const LABELS = [
"csmacintosh",
"mac",
"macintosh",
"x-mac-roman",
];
protected const TABLE_DEC_CHAR = ["\u{c4}","\u{c5}","\u{c7}","\u{c9}","\u{d1}","\u{d6}","\u{dc}","\u{e1}","\u{e0}","\u{e2}","\u{e4}","\u{e3}","\u{e5}","\u{e7}","\u{e9}","\u{e8}","\u{ea}","\u{eb}","\u{ed}","\u{ec}","\u{ee}","\u{ef}","\u{f1}","\u{f3}","\u{f2}","\u{f4}","\u{f6}","\u{f5}","\u{fa}","\u{f9}","\u{fb}","\u{fc}","\u{2020}","\u{b0}","\u{a2}","\u{a3}","\u{a7}","\u{2022}","\u{b6}","\u{df}","\u{ae}","\u{a9}","\u{2122}","\u{b4}","\u{a8}","\u{2260}","\u{c6}","\u{d8}","\u{221e}","\u{b1}","\u{2264}","\u{2265}","\u{a5}","\u{b5}","\u{2202}","\u{2211}","\u{220f}","\u{3c0}","\u{222b}","\u{aa}","\u{ba}","\u{3a9}","\u{e6}","\u{f8}","\u{bf}","\u{a1}","\u{ac}","\u{221a}","\u{192}","\u{2248}","\u{2206}","\u{ab}","\u{bb}","\u{2026}","\u{a0}","\u{c0}","\u{c3}","\u{d5}","\u{152}","\u{153}","\u{2013}","\u{2014}","\u{201c}","\u{201d}","\u{2018}","\u{2019}","\u{f7}","\u{25ca}","\u{ff}","\u{178}","\u{2044}","\u{20ac}","\u{2039}","\u{203a}","\u{fb01}","\u{fb02}","\u{2021}","\u{b7}","\u{201a}","\u{201e}","\u{2030}","\u{c2}","\u{ca}","\u{c1}","\u{cb}","\u{c8}","\u{cd}","\u{ce}","\u{cf}","\u{cc}","\u{d3}","\u{d4}","\u{f8ff}","\u{d2}","\u{da}","\u{db}","\u{d9}","\u{131}","\u{2c6}","\u{2dc}","\u{af}","\u{2d8}","\u{2d9}","\u{2da}","\u{b8}","\u{2dd}","\u{2db}","\u{2c7}"];
protected const TABLE_DEC_CODE = [196,197,199,201,209,214,220,225,224,226,228,227,229,231,233,232,234,235,237,236,238,239,241,243,242,244,246,245,250,249,251,252,8224,176,162,163,167,8226,182,223,174,169,8482,180,168,8800,198,216,8734,177,8804,8805,165,181,8706,8721,8719,960,8747,170,186,937,230,248,191,161,172,8730,402,8776,8710,171,187,8230,160,192,195,213,338,339,8211,8212,8220,8221,8216,8217,247,9674,255,376,8260,8364,8249,8250,64257,64258,8225,183,8218,8222,8240,194,202,193,203,200,205,206,207,204,211,212,63743,210,218,219,217,305,710,732,175,728,729,730,184,733,731,711];
protected const TABLE_ENC = [160=>"\xCA","\xC1","\xA2","\xA3",165=>"\xB4",167=>"\xA4","\xAC","\xA9","\xBB","\xC7","\xC2",174=>"\xA8","\xF8","\xA1","\xB1",180=>"\xAB","\xB5","\xA6","\xE1","\xFC",186=>"\xBC","\xC8",191=>"\xC0","\xCB","\xE7","\xE5","\xCC","\x80","\x81","\xAE","\x82","\xE9","\x83","\xE6","\xE8","\xED","\xEA","\xEB","\xEC",209=>"\x84","\xF1","\xEE","\xEF","\xCD","\x85",216=>"\xAF","\xF4","\xF2","\xF3","\x86",223=>"\xA7","\x88","\x87","\x89","\x8B","\x8A","\x8C","\xBE","\x8D","\x8F","\x8E","\x90","\x91","\x93","\x92","\x94","\x95",241=>"\x96","\x98","\x97","\x99","\x9B","\x9A","\xD6","\xBF","\x9D","\x9C","\x9E","\x9F",255=>"\xD8",305=>"\xF5",338=>"\xCE","\xCF",376=>"\xD9",402=>"\xC4",710=>"\xF6","\xFF",728=>"\xF9","\xFA","\xFB","\xFE","\xF7","\xFD",937=>"\xBD",960=>"\xB9",8211=>"\xD0","\xD1",8216=>"\xD4","\xD5","\xE2",8220=>"\xD2","\xD3","\xE3",8224=>"\xA0","\xE0","\xA5",8230=>"\xC9",8240=>"\xE4",8249=>"\xDC","\xDD",8260=>"\xDA",8364=>"\xDB",8482=>"\xAA",8706=>"\xB6",8710=>"\xC6",8719=>"\xB8",8721=>"\xB7",8730=>"\xC3",8734=>"\xB0",8747=>"\xBA",8776=>"\xC5",8800=>"\xAD",8804=>"\xB2","\xB3",9674=>"\xD7",63743=>"\xF0",64257=>"\xDE","\xDF"];
}

17
lib/Encoding/ModalCoder.php

@ -0,0 +1,17 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
interface ModalCoder {
/** Returns the encoding of $codePoint as a byte string
*
* @param int $codePoint The Unicode code point to encode. If less than 0 or greater than 1114111, an exception is thrown; if $codePoint is null this signals end-of-file
* @param bool $fatal Whether an exception will be thrown if the code point cannot be encoded into a character; if false HTML character references will be substituted
* @param mixed &$mode A reference keeping track of the current encoder mode. An uninitialized variable should be passed on first invocation, and that variable used for further invocations.
*/
public static function encode(?int $codePoint, bool $fatal = true, &$mode = null): string;
}

133
lib/Encoding/Replacement.php

@ -0,0 +1,133 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Replacement implements Decoder {
public const NAME = "replacement";
public const LABELS = [
"csiso2022kr",
"hz-gb-2312",
"iso-2022-cn",
"iso-2022-cn-ext",
"iso-2022-kr",
"replacement",
];
protected $len = 0;
protected $done = false;
protected $fatal = false;
public $posErr = 0;
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false) {
$this->len = strlen($string);
$this->fatal = $fatal;
}
public function posByte(): int {
return $this->done ? $this->len : 0;
}
public function posChar(): int {
return $this->done ? 1 : 0;
}
public function nextChar(): string {
if (!$this->eof()) {
try {
return $this->peekChar();
} finally {
$this->done = true;
$this->posErr = 1;
}
}
return "";
}
public function nextCode() {
if (!$this->eof()) {
try {
return $this->peekCode()[0];
} finally {
$this->done = true;
$this->posErr = 1;
}
}
return false;
}
public function seek(int $distance): int {
if ($distance > 0) {
if (!$this->eof()) {
$distance--;
$this->nextCode();
}
} elseif ($distance < 0) {
if ($this->eof()) {
$distance++;
$this->rewind();
}
}
return $distance;
}
public function rewind(): void {
$this->done = false;
}
public function peekChar(int $num = 1): string {
if (!$this->eof() && $num > 0) {
if ($this->fatal) {
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE);
}
return "\u{FFFD}";
}
return "";
}
public function peekCode(int $num = 1): array {
if (!$this->eof() && $num > 0) {
if ($this->fatal) {
throw new DecoderException("Unable to decode string", self::E_INVALID_BYTE);
}
return [0xFFFD];
}
return [];
}
public function lenByte(): int {
return $this->len;
}
public function lenChar(): int {
return (int) ($this->len > 0);
}
public function eof(): bool {
return $this->done || $this->len === 0;
}
public function chars(): \Generator {
if (!$this->eof()) {
yield 0 => $this->nextChar();
}
}
public function codes(): \Generator {
if (!$this->eof()) {
yield 0 => $this->nextCode();
}
}
public function asciiSpan(string $mask, int $length = null): string {
return "";
}
public function asciiSpanNot(string $mask, int $length = null): string {
return "";
}
}

161
lib/Encoding/ShiftJIS.php

File diff suppressed because one or more lines are too long

89
lib/Encoding/SingleByteEncoding.php

@ -0,0 +1,89 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
abstract class SingleByteEncoding extends AbstractEncoding implements Coder, Decoder {
protected $selfSynchronizing = true;
public function nextChar(): string {
// get the byte at the current position
$b = $this->string[$this->posChar] ?? "";
if ($b === "") {
return "";
}
$this->posChar++;
$this->posByte++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $b;
} else {
return static::TABLE_DEC_CHAR[$p - 128] ?? UTF8::encode($this->errDec($this->errMode, $this->posChar, $this->posChar));
}
}
public function nextCode() {
// get the byte at the current position
$b = $this->string[$this->posChar] ?? "";
if ($b === "") {
return false;
}
$this->posChar++;
$this->posByte++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $p;
} else {
return static::TABLE_DEC_CODE[$p - 128] ?? $this->errDec($this->errMode, $this->posChar, $this->posChar);
}
}
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 128) {
return chr($codePoint);
} else {
return static::TABLE_ENC[$codePoint] ?? static::errEnc(!$fatal, $codePoint);
}
}
public function seek(int $distance): int {
if ($distance > 0) {
while ($this->posChar < $this->lenByte && $distance > 0) {
$this->nextCode();
$distance--;
}
return $distance;
} elseif ($distance < 0) {
$distance = abs($distance);
while ($this->posChar > 0 && $distance > 0) {
$this->posChar--;
$this->posByte--;
$distance--;
}
return $distance;
} else {
return 0;
}
}
/** @codeCoverageIgnore */
protected function seekBack(int $distance): int {
// stub: not used
return 0;
}
public function lenChar(): int {
return $this->lenByte;
}
public function eof(): bool {
return $this->posChar >= $this->lenByte;
}
}

180
lib/Encoding/UTF16.php

@ -0,0 +1,180 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
abstract class UTF16 extends AbstractEncoding {
protected $selfSynchronizing = true;
protected $dirtyEOF = 0;
/** @var int The size of the string's byte order mark, if any */
protected $BOM = 0;
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false) {
$this->stateProps[] = "dirtyEOF";
parent::__construct($string, $fatal, $allowSurrogates);
if (substr($string, 0, 2) === (static::BE ? "\xFE\xFF" : "\xFF\xFE")) {
$this->BOM = 2;
$this->posByte = 2;
}
}
public function rewind(): void {
parent::rewind();
$this->posByte = $this->BOM;
}
public function nextCode() {
$lead_b = null;
$lead_s = null;
$this->posChar++;
while (($b = $this->string[$this->posByte++] ?? "") !== "") {
$b = ord($b);
if (is_null($lead_b)) {
$lead_b = $b;
continue;
} else {
if (static::BE) {
$code = ($lead_b << 8) + $b;
} else {
$code = ($b << 8) + $lead_b;
}
$lead_b = null;
if (!is_null($lead_s)) {
if ($code >= 0xDC00 && $code <= 0xDFFF) {
return 0x10000 + (($lead_s - 0xD800) << 10) + ($code - 0xDC00);
} elseif ($this->allowSurrogates) {
$this->posByte -= 2;
return $lead_s;
} else {
$this->posByte -= 2;
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
}
} else {
if ($code >= 0xD800 && $code <= 0xDBFF) {
$lead_s = $code;
continue;
} elseif ($code >= 0xDC00 && $code <= 0xDFFF) {
if ($this->allowSurrogates) {
return $code;
} else {
$this->posErr = $this->posChar;
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - 2);
}
} else {
return $code;
}
}
}
}
$this->posByte--;
if (($lead_b + $lead_s) == 0) {
// clean EOF
$this->posChar--;
return false;
} else {
// dirty EOF; note how many bytes the last character had
// properly synchronizing UTF-16 is possible without retaining this information, but retaining it makes the task easier
$this->dirtyEOF = ($lead_s && $lead_b ? 3 : ($lead_s ? 2 : 1));
return $this->errDec($this->errMode, $this->posChar - 1, $this->posByte - $this->dirtyEOF);
}
}
public function nextChar(): string {
// get the byte at the current position
$b = $this->string[$this->posByte] ?? "";
if ($b === "") {
// if the byte is end of input, simply return it
return "";
} else {
// otherwise return the serialization of the code point at the current position
return UTF8::encode($this->nextCode());
}
}
public function asciiSpan(string $mask, int $length = null): string {
// UTF-16 has no ASCII characters, so we must do things the hard way
$out = "";
$left = ($length === null) ? -1 : $length;
while ($left) {
$c1 = $this->string[$this->posByte] ?? "";
$c2 = $this->string[$this->posByte + 1] ?? "";
$b = ord(static::BE ? $c1 : $c2);
if (!$b) {
$c = static::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) !== false && $c1 !== "" && $c2 !== "") {
$out .= $c;
$this->posByte += 2;
$this->posChar++;
$left--;
} else {
break;
}
} else {
break;
}
}
return $out;
}
public function asciiSpanNot(string $mask, int $length = null): string {
// this is a copy of asciiSpan above with only the strpos check reversed
$out = "";
$left = ($length === null) ? -1 : $length;
while ($left) {
$c1 = $this->string[$this->posByte] ?? "";
$c2 = $this->string[$this->posByte + 1] ?? "";
$b = ord(static::BE ? $c1 : $c2);
if (!$b) {
$c = static::BE ? $c2 : $c1;
$b = ord($c);
if ($b < 0x80 && strpos($mask, $c) === false && $c1 !== "" && $c2 !== "") {
$out .= $c;
$this->posByte += 2;
$this->posChar++;
$left--;
} else {
break;
}
} else {
break;
}
}
return $out;
}
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
if ($this->dirtyEOF && $distance) {
$distance--;
$this->posChar--;
$this->posByte -= $this->dirtyEOF;
$this->dirtyEOF = 0;
}
while ($distance > 0 && $this->posChar > 0) {
$distance--;
$this->posChar--;
if ($this->posByte < 4) {
// if we're less than four bytes into the string, the previous character is necessarily double-byte
$this->posByte -= 2;
} else {
// otherwise go back four bytes and consume a character
$start = $this->posByte;
$this->posByte -= 4;
$this->posChar--;
$this->nextCode();
if ($this->posByte == $start) {
// if we're back at our starting position the character was four bytes
$this->posByte -= 4;
} else {
// otherwise we're already where we need to be
}
}
}
return $distance;
}
}

16
lib/Encoding/UTF16BE.php

@ -0,0 +1,16 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF16BE extends UTF16 {
protected const BE = true;
public const NAME = "UTF-16BE";
public const LABELS = [
"unicodefffe",
"utf-16be",
];
}

21
lib/Encoding/UTF16LE.php

@ -0,0 +1,21 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF16LE extends UTF16 {
protected const BE = false;
public const NAME = "UTF-16LE";
public const LABELS = [
"csunicode",
"iso-10646-ucs-2",
"ucs-2",
"unicode",
"unicodefeff",
"utf-16",
"utf-16le",
];
}

280
lib/Encoding/UTF8.php

@ -6,77 +6,38 @@
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class UTF8 {
const MODE_NULL = 0;
const MODE_REPLACE = 1;
const MODE_HTML = 2;
const MODE_FATAL_DEC = 3;
const MODE_FATAL_ENC = 4;
const E_INVALID_CODE_POINT = 1;
const E_INVALID_BYTE = 2;
const E_INVALID_MODE = 3;
const NAME = "UTF-8";
const LABELS = ["unicode-1-1-utf-8", "utf-8", "utf8"];
protected $string;
protected $posByte = 0;
protected $posChar = 0;
protected $lenByte = null;
protected $lenChar = null;
protected $errMode = self::MODE_REPLACE;
/** Constructs a new decoder
*
* If $fatal is true, an exception will be thrown whenever an invalid code sequence is encountered; otherwise replacement characters will be substituted
*/
public function __construct(string $string, bool $fatal = false) {
$this->string = $string;
$this->lenByte = strlen($string);
$this->errMode = $fatal ? self::MODE_FATAL_DEC : self::MODE_REPLACE;
}
/** Returns the current byte position of the decoder */
public function posByte(): int {
return $this->posByte;
}
/** Returns the current character position of the decoder */
public function posChar(): int {
return $this->posChar;
class UTF8 extends AbstractEncoding implements Coder, Decoder {
public const NAME = "UTF-8";
public const LABELS = [
"unicode-1-1-utf-8",
"unicode11utf8",
"unicode20utf8",
"utf-8",
"utf8",
"x-unicode20utf8",
];
protected $selfSynchronizing = true;
/** @var int The size of the string's byte order mark, if any */
protected $BOM = 0;
public function __construct(string $string, bool $fatal = false, bool $allowSurrogates = false) {
parent::__construct($string, $fatal, $allowSurrogates);
if (substr($string, 0, 3) === "\xEF\xBB\xBF") {
$this->BOM = 3;
$this->posByte = 3;
}
}
/** Retrieve the next character in the string, in UTF-8 encoding
*
* The returned character may be a replacement character, or the empty string if the end of the string has been reached
*/
public function nextChar(): string {
// get the byte at the current position
$b = @$this->string[$this->posByte];
if ($b === "") {
return "";
} elseif (ord($b) < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
$this->posChar++;
$this->posByte++;
return $b;
} else {
// otherwise return the serialization of the code point at the current position
return UTF8::encode($this->nextCode());
}
public function rewind(): void {
parent::rewind();
$this->posByte = $this->BOM;
}
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|bool
*/
public function nextCode() {
// this function effectively implements https://encoding.spec.whatwg.org/#utf-8-decoder
// optimization for ASCII characters
$b = @$this->string[$this->posByte];
$b = $this->string[$this->posByte] ?? "";
if ($b === "") {
return false;
} elseif (($b = ord($b)) < 0x80) {
@ -91,7 +52,7 @@ class UTF8 {
$lower = 0x80;
$upper = 0xBF;
while ($seen < $needed) {
$b = ord(@$this->string[$this->posByte++]);
$b = ord($this->string[$this->posByte++] ?? "");
if (!$seen) {
if ($b >= 0xC2 && $b <= 0xDF) { // two-byte character
$needed = 2;
@ -101,7 +62,7 @@ class UTF8 {
if ($b==0xE0) {
$lower = 0xA0;
} elseif ($b==0xED) {
$upper = 0x9F;
$upper = ($this->allowSurrogates) ? 0xBF : 0x9F;
}
$point = $b & 0xF;
} elseif ($b >= 0xF0 && $b <= 0xF4) { // four-byte character
@ -113,10 +74,10 @@ class UTF8 {
}
$point = $b & 0x7;
} else { // invalid byte
return self::err($this->errMode, [$this->posChar, $this->posByte]);
return $this->errDec($this->errMode, $this->posChar, $this->posByte);
}
} elseif ($b < $lower || $b > $upper) {
return self::err($this->errMode, [$this->posChar, $this->posByte--]);
return $this->errDec($this->errMode, $this->posChar, $this->posByte--);
} else {
$lower = 0x80;
$upper = 0xBF;
@ -127,12 +88,6 @@ class UTF8 {
return $point;
}
/** Returns the encoding of $codePoint as a byte string
*
* If $codePoint is less than 0 or greater than 1114111, an exception is thrown
*
* If $fatal is true, an exception will be thrown if the code point cannot be encoded into a character; otherwise HTML character references will be substituted. When encoding to UTF-8, all Unicode characters can be encoded, so the argument is ignored
*/
public static function encode(int $codePoint, bool $fatal = true): string {
// this function implements https://encoding.spec.whatwg.org/#utf-8-encoder
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
@ -157,166 +112,29 @@ class UTF8 {
return $bytes;
}
/** Advance $distance characters through the string
*
* If $distance is negative, the operation will be performed in reverse
*
* If the end (or beginning) of the string was reached before the end of the operation, the remaining number of requested characters is returned
*/
public function seek(int $distance): int {
if ($distance > 0) {
if ($this->posByte == strlen($this->string)) {
// if we're already at the end of the string, we can't go further
return $distance;
}
do {
// get the next code point; this automatically increments the character position
$p = $this->nextCode();
} while (--$distance && $p !== false); // stop after we have skipped the desired number of characters, or reached EOF
return $distance;
} elseif ($distance < 0) {
$distance = abs($distance);
if (!$this->posByte) {
// if we're already at the start of the string, we can't go further back
return $distance;
}
$mode = $this->errMode;
$this->errMode = self::MODE_NULL;
do {
$this->sync($this->posByte - 1);
// manually decrement the character position
$this->posChar--;
} while (--$distance && $this->posByte);
$this->errMode = $mode;
return $distance;
} else {
return 0;
}
}
/** Seeks to the start of the string
*
* This is usually faster than using the seek method for the same purpose
*/
public function rewind() {
$this->posByte = 0;
$this->posChar = 0;
}
/** Retrieves the next $num characters (in UTF-8 encoding) from the string without advancing the character pointer */
public function peekChar(int $num = 1): string {
$out = "";
$state = $this->stateSave();
try {
while ($num-- > 0 && ($b = $this->nextChar()) !== "") {
$out .= $b;
}
} finally {
$this->stateApply($state);
}
return $out;
}
/** Retrieves the next $num code points from the string, without advancing the character pointer */
public function peekCode(int $num = 1): array {
$out = [];
$state = $this->stateSave();
try {
while ($num-- > 0 && ($b = $this->nextCode()) !== false) {
$out[] = $b;
}
} finally {
$this->stateApply($state);
}
return $out;
}
/** Calculates the length of the string in code points
*
* Note that this may involve processing to the end of the string
*/
public function len(): int {
return $this->lenChar ?? (function() {
$state = $this->stateSave();
while ($this->nextCode() !== false);
$this->lenChar = $this->posChar;
$this->stateApply($state);
return $this->lenChar;
})();
}
/** Generates an iterator which steps through each character in the string */
public function chars(): \Generator {
while (($c = $this->nextChar()) !== "") {
yield ($this->posChar - 1) => $c;
}
}
/** Generates an iterator which steps through each code point in the string */
public function codes(): \Generator {
while (($c = $this->nextCode()) !== false) {
yield ($this->posChar - 1) => $c;
}
}
/** Synchronize to the byte offset of the start of the nearest character at or before byte offset $pos */
protected function sync(int $pos) {
$b = ord(@$this->string[$pos]);
if ($b < 0x80) {
// if the byte is an ASCII byte or the end of input, then this is already a synchronized position
$this->posByte = $pos;
} else {
$s = $pos;
while ($b >= 0x80 && $b <= 0xBF && $pos > 0 && ($s - $pos) < 3) { // go back at most three bytes, no further than the start of the string, and only as long as the byte remains a continuation byte
$b = ord(@$this->string[--$pos]);
}
$this->posByte = $pos;
// decrement the character position because nextCode() increments it
/** Implements backward seeking $distance characters */
protected function seekBack(int $distance): int {
while ($distance > 0 && $this->posChar > 0) {
$distance--;
$this->posChar--;
if (is_null($this->nextCode())) {
$this->posByte = $s;
$b = ord($this->string[$this->posByte - 1] ?? "");
if ($b < 0x80) {
// if the byte is an ASCII byte or the end of input, then this is already a synchronized position
$this->posByte--;
} else {
$this->posByte = ($this->posByte > $s) ? $pos : $s;
$s = $this->posByte;
$pos = $s - 1;
while ($b >= 0x80 && $b <= 0xBF && $pos > 0 && ($s - $pos) < 4) { // go back at most four bytes, no further than the start of the string, and only as long as the byte remains a continuation byte
$b = ord($this->string[--$pos] ?? "");
}
$this->posByte = $pos;
// decrement the character position because nextCode() increments it
$this->posChar--;
// check for overlong sequences: if the sequence is overlong consuming the character will yield an earlier position than where we started
$this->nextCode();
$this->posByte = ($this->posByte < $s) ? $s - 1 : $pos;
}
}
}
/** Returns a copy of the decoder's state to keep in memory */
protected function stateSave(): array {
return [
'posChar' => $this->posChar,
'posByte' => $this->posByte,
];
}
/** Sets the decoder's state to the values specified */
protected function stateApply(array $state) {
foreach ($state as $key => $value) {
$this->$key = $value;
}
}
/** Handles decoding and encoding errors */
protected static function err(int $mode, $data = null) {
switch ($mode) {
case self::MODE_NULL:
// used internally during backward seeking
return null;
case self::MODE_REPLACE:
// standard "replace" mode
return 0xFFFD;
case self::MODE_HTML: // @codeCoverageIgnore
// the "html" replacement mode; not applicable to Unicode transformation formats
return "&#".(string) $data.";"; // @codeCoverageIgnore
case self::MODE_FATAL_DEC:
// fatal replacement mode for decoders
throw new DecoderException("Invalid code sequence at character offset {$data[0]} (byte offset {$data[1]})", self::E_INVALID_BYTE);
case self::MODE_FATAL_ENC: // @codeCoverageIgnore
// fatal replacement mode for decoders; not applicable to Unicode transformation formats
throw new EncoderException("Code point $data not available in target encoding", self::E_INVALID_BYTE); // @codeCoverageIgnore
default:
// indicative of internal bug; should never be triggered
throw new DecoderException("Invalid replacement mode {$mode}", self::E_INVALID_MODE); // @codeCoverageIgnore
}
return $distance;
}
}

20
lib/Encoding/Windows1250.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1250 extends SingleByteEncoding {
public const NAME = "windows-1250";
public const LABELS = [
"cp1250",
"windows-1250",
"x-cp1250",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{83}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{88}","\u{2030}","\u{160}","\u{2039}","\u{15a}","\u{164}","\u{17d}","\u{179}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{98}","\u{2122}","\u{161}","\u{203a}","\u{15b}","\u{165}","\u{17e}","\u{17a}","\u{a0}","\u{2c7}","\u{2d8}","\u{141}","\u{a4}","\u{104}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{15e}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{17b}","\u{b0}","\u{b1}","\u{2db}","\u{142}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{105}","\u{15f}","\u{bb}","\u{13d}","\u{2dd}","\u{13e}","\u{17c}","\u{154}","\u{c1}","\u{c2}","\u{102}","\u{c4}","\u{139}","\u{106}","\u{c7}","\u{10c}","\u{c9}","\u{118}","\u{cb}","\u{11a}","\u{cd}","\u{ce}","\u{10e}","\u{110}","\u{143}","\u{147}","\u{d3}","\u{d4}","\u{150}","\u{d6}","\u{d7}","\u{158}","\u{16e}","\u{da}","\u{170}","\u{dc}","\u{dd}","\u{162}","\u{df}","\u{155}","\u{e1}","\u{e2}","\u{103}","\u{e4}","\u{13a}","\u{107}","\u{e7}","\u{10d}","\u{e9}","\u{119}","\u{eb}","\u{11b}","\u{ed}","\u{ee}","\u{10f}","\u{111}","\u{144}","\u{148}","\u{f3}","\u{f4}","\u{151}","\u{f6}","\u{f7}","\u{159}","\u{16f}","\u{fa}","\u{171}","\u{fc}","\u{fd}","\u{163}","\u{2d9}"];
protected const TABLE_DEC_CODE = [8364,129,8218,131,8222,8230,8224,8225,136,8240,352,8249,346,356,381,377,144,8216,8217,8220,8221,8226,8211,8212,152,8482,353,8250,347,357,382,378,160,711,728,321,164,260,166,167,168,169,350,171,172,173,174,379,176,177,731,322,180,181,182,183,184,261,351,187,317,733,318,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729];
protected const TABLE_ENC = [129=>"\x81",131=>"\x83",136=>"\x88",144=>"\x90",152=>"\x98",160=>"\xA0",164=>"\xA4",166=>"\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE",176=>"\xB0","\xB1",180=>"\xB4","\xB5","\xB6","\xB7","\xB8",187=>"\xBB",193=>"\xC1","\xC2",196=>"\xC4",199=>"\xC7",201=>"\xC9",203=>"\xCB",205=>"\xCD","\xCE",211=>"\xD3","\xD4",214=>"\xD6","\xD7",218=>"\xDA",220=>"\xDC","\xDD",223=>"\xDF",225=>"\xE1","\xE2",228=>"\xE4",231=>"\xE7",233=>"\xE9",235=>"\xEB",237=>"\xED","\xEE",243=>"\xF3","\xF4",246=>"\xF6","\xF7",250=>"\xFA",252=>"\xFC","\xFD",258=>"\xC3","\xE3","\xA5","\xB9","\xC6","\xE6",268=>"\xC8","\xE8","\xCF","\xEF","\xD0","\xF0",280=>"\xCA","\xEA","\xCC","\xEC",313=>"\xC5","\xE5",317=>"\xBC","\xBE",321=>"\xA3","\xB3","\xD1","\xF1",327=>"\xD2","\xF2",336=>"\xD5","\xF5",340=>"\xC0","\xE0",344=>"\xD8","\xF8","\x8C","\x9C",350=>"\xAA","\xBA","\x8A","\x9A","\xDE","\xFE","\x8D","\x9D",366=>"\xD9","\xF9","\xDB","\xFB",377=>"\x8F","\x9F","\xAF","\xBF","\x8E","\x9E",711=>"\xA1",728=>"\xA2","\xFF",731=>"\xB2",733=>"\xBD",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1251.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1251 extends SingleByteEncoding {
public const NAME = "windows-1251";
public const LABELS = [
"cp1251",
"windows-1251",
"x-cp1251",
];
protected const TABLE_DEC_CHAR = ["\u{402}","\u{403}","\u{201a}","\u{453}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{20ac}","\u{2030}","\u{409}","\u{2039}","\u{40a}","\u{40c}","\u{40b}","\u{40f}","\u{452}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{98}","\u{2122}","\u{459}","\u{203a}","\u{45a}","\u{45c}","\u{45b}","\u{45f}","\u{a0}","\u{40e}","\u{45e}","\u{408}","\u{a4}","\u{490}","\u{a6}","\u{a7}","\u{401}","\u{a9}","\u{404}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{407}","\u{b0}","\u{b1}","\u{406}","\u{456}","\u{491}","\u{b5}","\u{b6}","\u{b7}","\u{451}","\u{2116}","\u{454}","\u{bb}","\u{458}","\u{405}","\u{455}","\u{457}","\u{410}","\u{411}","\u{412}","\u{413}","\u{414}","\u{415}","\u{416}","\u{417}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{424}","\u{425}","\u{426}","\u{427}","\u{428}","\u{429}","\u{42a}","\u{42b}","\u{42c}","\u{42d}","\u{42e}","\u{42f}","\u{430}","\u{431}","\u{432}","\u{433}","\u{434}","\u{435}","\u{436}","\u{437}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{440}","\u{441}","\u{442}","\u{443}","\u{444}","\u{445}","\u{446}","\u{447}","\u{448}","\u{449}","\u{44a}","\u{44b}","\u{44c}","\u{44d}","\u{44e}","\u{44f}"];
protected const TABLE_DEC_CODE = [1026,1027,8218,1107,8222,8230,8224,8225,8364,8240,1033,8249,1034,1036,1035,1039,1106,8216,8217,8220,8221,8226,8211,8212,152,8482,1113,8250,1114,1116,1115,1119,160,1038,1118,1032,164,1168,166,167,1025,169,1028,171,172,173,174,1031,176,177,1030,1110,1169,181,182,183,1105,8470,1108,187,1112,1029,1109,1111,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103];
protected const TABLE_ENC = [152=>"\x98",160=>"\xA0",164=>"\xA4",166=>"\xA6","\xA7",169=>"\xA9",171=>"\xAB","\xAC","\xAD","\xAE",176=>"\xB0","\xB1",181=>"\xB5","\xB6","\xB7",187=>"\xBB",1025=>"\xA8","\x80","\x81","\xAA","\xBD","\xB2","\xAF","\xA3","\x8A","\x8C","\x8E","\x8D",1038=>"\xA1","\x8F","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE","\xFF",1105=>"\xB8","\x90","\x83","\xBA","\xBE","\xB3","\xBF","\xBC","\x9A","\x9C","\x9E","\x9D",1118=>"\xA2","\x9F",1168=>"\xA5","\xB4",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x88",8470=>"\xB9",8482=>"\x99"];
}

34
lib/Encoding/Windows1252.php

@ -0,0 +1,34 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1252 extends SingleByteEncoding {
public const NAME = "windows-1252";
public const LABELS = [
"ansi_x3.4-1968",
"ascii",
"cp1252",
"cp819",
"csisolatin1",
"ibm819",
"iso-8859-1",
"iso-ir-100",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"us-ascii",
"windows-1252",
"x-cp1252",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{2c6}","\u{2030}","\u{160}","\u{2039}","\u{152}","\u{8d}","\u{17d}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{2dc}","\u{2122}","\u{161}","\u{203a}","\u{153}","\u{9d}","\u{17e}","\u{178}","\u{a0}","\u{a1}","\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{aa}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{ba}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{bf}","\u{c0}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}","\u{d0}","\u{d1}","\u{d2}","\u{d3}","\u{d4}","\u{d5}","\u{d6}","\u{d7}","\u{d8}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{dd}","\u{de}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}","\u{f0}","\u{f1}","\u{f2}","\u{f3}","\u{f4}","\u{f5}","\u{f6}","\u{f7}","\u{f8}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{fd}","\u{fe}","\u{ff}"];
protected const TABLE_DEC_CODE = [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255];
protected const TABLE_ENC = [129=>"\x81",141=>"\x8D",143=>"\x8F","\x90",157=>"\x9D",160=>"\xA0","\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE","\xFF",338=>"\x8C","\x9C",352=>"\x8A","\x9A",376=>"\x9F",381=>"\x8E","\x9E",402=>"\x83",710=>"\x88",732=>"\x98",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1253.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1253 extends SingleByteEncoding {
public const NAME = "windows-1253";
public const LABELS = [
"cp1253",
"windows-1253",
"x-cp1253",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{88}","\u{2030}","\u{8a}","\u{2039}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{98}","\u{2122}","\u{9a}","\u{203a}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{385}","\u{386}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}",43=>"\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{2015}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{384}","\u{b5}","\u{b6}","\u{b7}","\u{388}","\u{389}","\u{38a}","\u{bb}","\u{38c}","\u{bd}","\u{38e}","\u{38f}","\u{390}","\u{391}","\u{392}","\u{393}","\u{394}","\u{395}","\u{396}","\u{397}","\u{398}","\u{399}","\u{39a}","\u{39b}","\u{39c}","\u{39d}","\u{39e}","\u{39f}","\u{3a0}","\u{3a1}",83=>"\u{3a3}","\u{3a4}","\u{3a5}","\u{3a6}","\u{3a7}","\u{3a8}","\u{3a9}","\u{3aa}","\u{3ab}","\u{3ac}","\u{3ad}","\u{3ae}","\u{3af}","\u{3b0}","\u{3b1}","\u{3b2}","\u{3b3}","\u{3b4}","\u{3b5}","\u{3b6}","\u{3b7}","\u{3b8}","\u{3b9}","\u{3ba}","\u{3bb}","\u{3bc}","\u{3bd}","\u{3be}","\u{3bf}","\u{3c0}","\u{3c1}","\u{3c2}","\u{3c3}","\u{3c4}","\u{3c5}","\u{3c6}","\u{3c7}","\u{3c8}","\u{3c9}","\u{3ca}","\u{3cb}","\u{3cc}","\u{3cd}","\u{3ce}"];
protected const TABLE_DEC_CODE = [8364,129,8218,402,8222,8230,8224,8225,136,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,157,158,159,160,901,902,163,164,165,166,167,168,169,43=>171,172,173,174,8213,176,177,178,179,900,181,182,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,83=>931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974];
protected const TABLE_ENC = [129=>"\x81",136=>"\x88",138=>"\x8A",140=>"\x8C","\x8D","\x8E","\x8F","\x90",152=>"\x98",154=>"\x9A",156=>"\x9C","\x9D","\x9E","\x9F","\xA0",163=>"\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE",176=>"\xB0","\xB1","\xB2","\xB3",181=>"\xB5","\xB6","\xB7",187=>"\xBB",189=>"\xBD",402=>"\x83",900=>"\xB4","\xA1","\xA2",904=>"\xB8","\xB9","\xBA",908=>"\xBC",910=>"\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1",931=>"\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC","\xDD","\xDE","\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE",8211=>"\x96","\x97","\xAF",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

29
lib/Encoding/Windows1254.php

@ -0,0 +1,29 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1254 extends SingleByteEncoding {
public const NAME = "windows-1254";
public const LABELS = [
"cp1254",
"csisolatin5",
"iso-8859-9",
"iso-ir-148",
"iso8859-9",
"iso88599",
"iso_8859-9",
"iso_8859-9:1989",
"l5",
"latin5",
"windows-1254",
"x-cp1254",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{2c6}","\u{2030}","\u{160}","\u{2039}","\u{152}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{2dc}","\u{2122}","\u{161}","\u{203a}","\u{153}","\u{9d}","\u{9e}","\u{178}","\u{a0}","\u{a1}","\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{aa}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{ba}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{bf}","\u{c0}","\u{c1}","\u{c2}","\u{c3}","\u{c4}","\u{c5}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{cc}","\u{cd}","\u{ce}","\u{cf}","\u{11e}","\u{d1}","\u{d2}","\u{d3}","\u{d4}","\u{d5}","\u{d6}","\u{d7}","\u{d8}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{130}","\u{15e}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{e3}","\u{e4}","\u{e5}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{ec}","\u{ed}","\u{ee}","\u{ef}","\u{11f}","\u{f1}","\u{f2}","\u{f3}","\u{f4}","\u{f5}","\u{f6}","\u{f7}","\u{f8}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{131}","\u{15f}","\u{ff}"];
protected const TABLE_DEC_CODE = [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,286,209,210,211,212,213,214,215,216,217,218,219,220,304,350,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,287,241,242,243,244,245,246,247,248,249,250,251,252,305,351,255];
protected const TABLE_ENC = [129=>"\x81",141=>"\x8D","\x8E","\x8F","\x90",157=>"\x9D","\x9E",160=>"\xA0","\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF",209=>"\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC",223=>"\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF",241=>"\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC",255=>"\xFF",286=>"\xD0","\xF0",304=>"\xDD","\xFD",338=>"\x8C","\x9C",350=>"\xDE","\xFE","\x8A","\x9A",376=>"\x9F",402=>"\x83",710=>"\x88",732=>"\x98",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1255.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1255 extends SingleByteEncoding {
public const NAME = "windows-1255";
public const LABELS = [
"cp1255",
"windows-1255",
"x-cp1255",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{2c6}","\u{2030}","\u{8a}","\u{2039}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{2dc}","\u{2122}","\u{9a}","\u{203a}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{a1}","\u{a2}","\u{a3}","\u{20aa}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{d7}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{f7}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{bf}","\u{5b0}","\u{5b1}","\u{5b2}","\u{5b3}","\u{5b4}","\u{5b5}","\u{5b6}","\u{5b7}","\u{5b8}","\u{5b9}","\u{5ba}","\u{5bb}","\u{5bc}","\u{5bd}","\u{5be}","\u{5bf}","\u{5c0}","\u{5c1}","\u{5c2}","\u{5c3}","\u{5f0}","\u{5f1}","\u{5f2}","\u{5f3}","\u{5f4}",96=>"\u{5d0}","\u{5d1}","\u{5d2}","\u{5d3}","\u{5d4}","\u{5d5}","\u{5d6}","\u{5d7}","\u{5d8}","\u{5d9}","\u{5da}","\u{5db}","\u{5dc}","\u{5dd}","\u{5de}","\u{5df}","\u{5e0}","\u{5e1}","\u{5e2}","\u{5e3}","\u{5e4}","\u{5e5}","\u{5e6}","\u{5e7}","\u{5e8}","\u{5e9}","\u{5ea}",125=>"\u{200e}","\u{200f}"];
protected const TABLE_DEC_CODE = [8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,156,157,158,159,160,161,162,163,8362,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,191,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1520,1521,1522,1523,1524,96=>1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,125=>8206,8207];
protected const TABLE_ENC = [129=>"\x81",138=>"\x8A",140=>"\x8C","\x8D","\x8E","\x8F","\x90",154=>"\x9A",156=>"\x9C","\x9D","\x9E","\x9F","\xA0","\xA1","\xA2","\xA3",165=>"\xA5","\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9",187=>"\xBB","\xBC","\xBD","\xBE","\xBF",215=>"\xAA",247=>"\xBA",402=>"\x83",710=>"\x88",732=>"\x98",1456=>"\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3",1488=>"\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA",1520=>"\xD4","\xD5","\xD6","\xD7","\xD8",8206=>"\xFD","\xFE",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8362=>"\xA4",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1256.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1256 extends SingleByteEncoding {
public const NAME = "windows-1256";
public const LABELS = [
"cp1256",
"windows-1256",
"x-cp1256",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{67e}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{2c6}","\u{2030}","\u{679}","\u{2039}","\u{152}","\u{686}","\u{698}","\u{688}","\u{6af}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{6a9}","\u{2122}","\u{691}","\u{203a}","\u{153}","\u{200c}","\u{200d}","\u{6ba}","\u{a0}","\u{60c}","\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{6be}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{61b}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{61f}","\u{6c1}","\u{621}","\u{622}","\u{623}","\u{624}","\u{625}","\u{626}","\u{627}","\u{628}","\u{629}","\u{62a}","\u{62b}","\u{62c}","\u{62d}","\u{62e}","\u{62f}","\u{630}","\u{631}","\u{632}","\u{633}","\u{634}","\u{635}","\u{636}","\u{d7}","\u{637}","\u{638}","\u{639}","\u{63a}","\u{640}","\u{641}","\u{642}","\u{643}","\u{e0}","\u{644}","\u{e2}","\u{645}","\u{646}","\u{647}","\u{648}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{649}","\u{64a}","\u{ee}","\u{ef}","\u{64b}","\u{64c}","\u{64d}","\u{64e}","\u{f4}","\u{64f}","\u{650}","\u{f7}","\u{651}","\u{f9}","\u{652}","\u{fb}","\u{fc}","\u{200e}","\u{200f}","\u{6d2}"];
protected const TABLE_DEC_CODE = [8364,1662,8218,402,8222,8230,8224,8225,710,8240,1657,8249,338,1670,1688,1672,1711,8216,8217,8220,8221,8226,8211,8212,1705,8482,1681,8250,339,8204,8205,1722,160,1548,162,163,164,165,166,167,168,169,1726,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,1563,187,188,189,190,1567,1729,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,215,1591,1592,1593,1594,1600,1601,1602,1603,224,1604,226,1605,1606,1607,1608,231,232,233,234,235,1609,1610,238,239,1611,1612,1613,1614,244,1615,1616,247,1617,249,1618,251,252,8206,8207,1746];
protected const TABLE_ENC = [160=>"\xA0",162=>"\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9",171=>"\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9",187=>"\xBB","\xBC","\xBD","\xBE",215=>"\xD7",224=>"\xE0",226=>"\xE2",231=>"\xE7","\xE8","\xE9","\xEA","\xEB",238=>"\xEE","\xEF",244=>"\xF4",247=>"\xF7",249=>"\xF9",251=>"\xFB","\xFC",338=>"\x8C","\x9C",402=>"\x83",710=>"\x88",1548=>"\xA1",1563=>"\xBA",1567=>"\xBF",1569=>"\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD8","\xD9","\xDA","\xDB",1600=>"\xDC","\xDD","\xDE","\xDF","\xE1","\xE3","\xE4","\xE5","\xE6","\xEC","\xED","\xF0","\xF1","\xF2","\xF3","\xF5","\xF6","\xF8","\xFA",1657=>"\x8A",1662=>"\x81",1670=>"\x8D",1672=>"\x8F",1681=>"\x9A",1688=>"\x8E",1705=>"\x98",1711=>"\x90",1722=>"\x9F",1726=>"\xAA",1729=>"\xC0",1746=>"\xFF",8204=>"\x9D","\x9E","\xFD","\xFE",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1257.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1257 extends SingleByteEncoding {
public const NAME = "windows-1257";
public const LABELS = [
"cp1257",
"windows-1257",
"x-cp1257",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{83}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{88}","\u{2030}","\u{8a}","\u{2039}","\u{8c}","\u{a8}","\u{2c7}","\u{b8}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{98}","\u{2122}","\u{9a}","\u{203a}","\u{9c}","\u{af}","\u{2db}","\u{9f}","\u{a0}",34=>"\u{a2}","\u{a3}","\u{a4}",38=>"\u{a6}","\u{a7}","\u{d8}","\u{a9}","\u{156}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{c6}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{f8}","\u{b9}","\u{157}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{e6}","\u{104}","\u{12e}","\u{100}","\u{106}","\u{c4}","\u{c5}","\u{118}","\u{112}","\u{10c}","\u{c9}","\u{179}","\u{116}","\u{122}","\u{136}","\u{12a}","\u{13b}","\u{160}","\u{143}","\u{145}","\u{d3}","\u{14c}","\u{d5}","\u{d6}","\u{d7}","\u{172}","\u{141}","\u{15a}","\u{16a}","\u{dc}","\u{17b}","\u{17d}","\u{df}","\u{105}","\u{12f}","\u{101}","\u{107}","\u{e4}","\u{e5}","\u{119}","\u{113}","\u{10d}","\u{e9}","\u{17a}","\u{117}","\u{123}","\u{137}","\u{12b}","\u{13c}","\u{161}","\u{144}","\u{146}","\u{f3}","\u{14d}","\u{f5}","\u{f6}","\u{f7}","\u{173}","\u{142}","\u{15b}","\u{16b}","\u{fc}","\u{17c}","\u{17e}","\u{2d9}"];
protected const TABLE_DEC_CODE = [8364,129,8218,131,8222,8230,8224,8225,136,8240,138,8249,140,168,711,184,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,175,731,159,160,34=>162,163,164,38=>166,167,216,169,342,171,172,173,174,198,176,177,178,179,180,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,729];
protected const TABLE_ENC = [129=>"\x81",131=>"\x83",136=>"\x88",138=>"\x8A",140=>"\x8C",144=>"\x90",152=>"\x98",154=>"\x9A",156=>"\x9C",159=>"\x9F","\xA0",162=>"\xA2","\xA3","\xA4",166=>"\xA6","\xA7","\x8D","\xA9",171=>"\xAB","\xAC","\xAD","\xAE","\x9D","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\x8F","\xB9",187=>"\xBB","\xBC","\xBD","\xBE",196=>"\xC4","\xC5","\xAF",201=>"\xC9",211=>"\xD3",213=>"\xD5","\xD6","\xD7","\xA8",220=>"\xDC",223=>"\xDF",228=>"\xE4","\xE5","\xBF",233=>"\xE9",243=>"\xF3",245=>"\xF5","\xF6","\xF7","\xB8",252=>"\xFC",256=>"\xC2","\xE2",260=>"\xC0","\xE0","\xC3","\xE3",268=>"\xC8","\xE8",274=>"\xC7","\xE7",278=>"\xCB","\xEB","\xC6","\xE6",290=>"\xCC","\xEC",298=>"\xCE","\xEE",302=>"\xC1","\xE1",310=>"\xCD","\xED",315=>"\xCF","\xEF",321=>"\xD9","\xF9","\xD1","\xF1","\xD2","\xF2",332=>"\xD4","\xF4",342=>"\xAA","\xBA",346=>"\xDA","\xFA",352=>"\xD0","\xF0",362=>"\xDB","\xFB",370=>"\xD8","\xF8",377=>"\xCA","\xEA","\xDD","\xFD","\xDE","\xFE",711=>"\x8E",729=>"\xFF",731=>"\x9E",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8364=>"\x80",8482=>"\x99"];
}

20
lib/Encoding/Windows1258.php

@ -0,0 +1,20 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows1258 extends SingleByteEncoding {
public const NAME = "windows-1258";
public const LABELS = [
"cp1258",
"windows-1258",
"x-cp1258",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{201a}","\u{192}","\u{201e}","\u{2026}","\u{2020}","\u{2021}","\u{2c6}","\u{2030}","\u{8a}","\u{2039}","\u{152}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{2dc}","\u{2122}","\u{9a}","\u{203a}","\u{153}","\u{9d}","\u{9e}","\u{178}","\u{a0}","\u{a1}","\u{a2}","\u{a3}","\u{a4}","\u{a5}","\u{a6}","\u{a7}","\u{a8}","\u{a9}","\u{aa}","\u{ab}","\u{ac}","\u{ad}","\u{ae}","\u{af}","\u{b0}","\u{b1}","\u{b2}","\u{b3}","\u{b4}","\u{b5}","\u{b6}","\u{b7}","\u{b8}","\u{b9}","\u{ba}","\u{bb}","\u{bc}","\u{bd}","\u{be}","\u{bf}","\u{c0}","\u{c1}","\u{c2}","\u{102}","\u{c4}","\u{c5}","\u{c6}","\u{c7}","\u{c8}","\u{c9}","\u{ca}","\u{cb}","\u{300}","\u{cd}","\u{ce}","\u{cf}","\u{110}","\u{d1}","\u{309}","\u{d3}","\u{d4}","\u{1a0}","\u{d6}","\u{d7}","\u{d8}","\u{d9}","\u{da}","\u{db}","\u{dc}","\u{1af}","\u{303}","\u{df}","\u{e0}","\u{e1}","\u{e2}","\u{103}","\u{e4}","\u{e5}","\u{e6}","\u{e7}","\u{e8}","\u{e9}","\u{ea}","\u{eb}","\u{301}","\u{ed}","\u{ee}","\u{ef}","\u{111}","\u{f1}","\u{323}","\u{f3}","\u{f4}","\u{1a1}","\u{f6}","\u{f7}","\u{f8}","\u{f9}","\u{fa}","\u{fb}","\u{fc}","\u{1b0}","\u{20ab}","\u{ff}"];
protected const TABLE_DEC_CODE = [8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,258,196,197,198,199,200,201,202,203,768,205,206,207,272,209,777,211,212,416,214,215,216,217,218,219,220,431,771,223,224,225,226,259,228,229,230,231,232,233,234,235,769,237,238,239,273,241,803,243,244,417,246,247,248,249,250,251,252,432,8363,255];
protected const TABLE_ENC = [129=>"\x81",138=>"\x8A",141=>"\x8D","\x8E","\x8F","\x90",154=>"\x9A",157=>"\x9D","\x9E",160=>"\xA0","\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE","\xBF","\xC0","\xC1","\xC2",196=>"\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB",205=>"\xCD","\xCE","\xCF",209=>"\xD1",211=>"\xD3","\xD4",214=>"\xD6","\xD7","\xD8","\xD9","\xDA","\xDB","\xDC",223=>"\xDF","\xE0","\xE1","\xE2",228=>"\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB",237=>"\xED","\xEE","\xEF",241=>"\xF1",243=>"\xF3","\xF4",246=>"\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC",255=>"\xFF",258=>"\xC3","\xE3",272=>"\xD0","\xF0",338=>"\x8C","\x9C",376=>"\x9F",402=>"\x83",416=>"\xD5","\xF5",431=>"\xDD","\xFD",710=>"\x88",732=>"\x98",768=>"\xCC","\xEC",771=>"\xDE",777=>"\xD2",803=>"\xF2",8211=>"\x96","\x97",8216=>"\x91","\x92","\x82",8220=>"\x93","\x94","\x84",8224=>"\x86","\x87","\x95",8230=>"\x85",8240=>"\x89",8249=>"\x8B","\x9B",8363=>"\xFE","\x80",8482=>"\x99"];
}

23
lib/Encoding/Windows874.php

@ -0,0 +1,23 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class Windows874 extends SingleByteEncoding {
public const NAME = "windows-874";
public const LABELS = [
"dos-874",
"iso-8859-11",
"iso8859-11",
"iso885911",
"tis-620",
"windows-874",
];
protected const TABLE_DEC_CHAR = ["\u{20ac}","\u{81}","\u{82}","\u{83}","\u{84}","\u{2026}","\u{86}","\u{87}","\u{88}","\u{89}","\u{8a}","\u{8b}","\u{8c}","\u{8d}","\u{8e}","\u{8f}","\u{90}","\u{2018}","\u{2019}","\u{201c}","\u{201d}","\u{2022}","\u{2013}","\u{2014}","\u{98}","\u{99}","\u{9a}","\u{9b}","\u{9c}","\u{9d}","\u{9e}","\u{9f}","\u{a0}","\u{e01}","\u{e02}","\u{e03}","\u{e04}","\u{e05}","\u{e06}","\u{e07}","\u{e08}","\u{e09}","\u{e0a}","\u{e0b}","\u{e0c}","\u{e0d}","\u{e0e}","\u{e0f}","\u{e10}","\u{e11}","\u{e12}","\u{e13}","\u{e14}","\u{e15}","\u{e16}","\u{e17}","\u{e18}","\u{e19}","\u{e1a}","\u{e1b}","\u{e1c}","\u{e1d}","\u{e1e}","\u{e1f}","\u{e20}","\u{e21}","\u{e22}","\u{e23}","\u{e24}","\u{e25}","\u{e26}","\u{e27}","\u{e28}","\u{e29}","\u{e2a}","\u{e2b}","\u{e2c}","\u{e2d}","\u{e2e}","\u{e2f}","\u{e30}","\u{e31}","\u{e32}","\u{e33}","\u{e34}","\u{e35}","\u{e36}","\u{e37}","\u{e38}","\u{e39}","\u{e3a}",95=>"\u{e3f}","\u{e40}","\u{e41}","\u{e42}","\u{e43}","\u{e44}","\u{e45}","\u{e46}","\u{e47}","\u{e48}","\u{e49}","\u{e4a}","\u{e4b}","\u{e4c}","\u{e4d}","\u{e4e}","\u{e4f}","\u{e50}","\u{e51}","\u{e52}","\u{e53}","\u{e54}","\u{e55}","\u{e56}","\u{e57}","\u{e58}","\u{e59}","\u{e5a}","\u{e5b}"];
protected const TABLE_DEC_CODE = [8364,129,130,131,132,8230,134,135,136,137,138,139,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,153,154,155,156,157,158,159,160,3585,3586,3587,3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,3604,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,3630,3631,3632,3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,95=>3647,3648,3649,3650,3651,3652,3653,3654,3655,3656,3657,3658,3659,3660,3661,3662,3663,3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675];
protected const TABLE_ENC = [129=>"\x81","\x82","\x83","\x84",134=>"\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90",152=>"\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xA0",3585=>"\xA1","\xA2","\xA3","\xA4","\xA5","\xA6","\xA7","\xA8","\xA9","\xAA","\xAB","\xAC","\xAD","\xAE","\xAF","\xB0","\xB1","\xB2","\xB3","\xB4","\xB5","\xB6","\xB7","\xB8","\xB9","\xBA","\xBB","\xBC","\xBD","\xBE","\xBF","\xC0","\xC1","\xC2","\xC3","\xC4","\xC5","\xC6","\xC7","\xC8","\xC9","\xCA","\xCB","\xCC","\xCD","\xCE","\xCF","\xD0","\xD1","\xD2","\xD3","\xD4","\xD5","\xD6","\xD7","\xD8","\xD9","\xDA",3647=>"\xDF","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB",8211=>"\x96","\x97",8216=>"\x91","\x92",8220=>"\x93","\x94",8226=>"\x95",8230=>"\x85",8364=>"\x80"];
}

19
lib/Encoding/XMacCyrillic.php

@ -0,0 +1,19 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class XMacCyrillic extends SingleByteEncoding {
public const NAME = "x-mac-cyrillic";
public const LABELS = [
"x-mac-cyrillic",
"x-mac-ukrainian",
];
protected const TABLE_DEC_CHAR = ["\u{410}","\u{411}","\u{412}","\u{413}","\u{414}","\u{415}","\u{416}","\u{417}","\u{418}","\u{419}","\u{41a}","\u{41b}","\u{41c}","\u{41d}","\u{41e}","\u{41f}","\u{420}","\u{421}","\u{422}","\u{423}","\u{424}","\u{425}","\u{426}","\u{427}","\u{428}","\u{429}","\u{42a}","\u{42b}","\u{42c}","\u{42d}","\u{42e}","\u{42f}","\u{2020}","\u{b0}","\u{490}","\u{a3}","\u{a7}","\u{2022}","\u{b6}","\u{406}","\u{ae}","\u{a9}","\u{2122}","\u{402}","\u{452}","\u{2260}","\u{403}","\u{453}","\u{221e}","\u{b1}","\u{2264}","\u{2265}","\u{456}","\u{b5}","\u{491}","\u{408}","\u{404}","\u{454}","\u{407}","\u{457}","\u{409}","\u{459}","\u{40a}","\u{45a}","\u{458}","\u{405}","\u{ac}","\u{221a}","\u{192}","\u{2248}","\u{2206}","\u{ab}","\u{bb}","\u{2026}","\u{a0}","\u{40b}","\u{45b}","\u{40c}","\u{45c}","\u{455}","\u{2013}","\u{2014}","\u{201c}","\u{201d}","\u{2018}","\u{2019}","\u{f7}","\u{201e}","\u{40e}","\u{45e}","\u{40f}","\u{45f}","\u{2116}","\u{401}","\u{451}","\u{44f}","\u{430}","\u{431}","\u{432}","\u{433}","\u{434}","\u{435}","\u{436}","\u{437}","\u{438}","\u{439}","\u{43a}","\u{43b}","\u{43c}","\u{43d}","\u{43e}","\u{43f}","\u{440}","\u{441}","\u{442}","\u{443}","\u{444}","\u{445}","\u{446}","\u{447}","\u{448}","\u{449}","\u{44a}","\u{44b}","\u{44c}","\u{44d}","\u{44e}","\u{20ac}"];
protected const TABLE_DEC_CODE = [1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,8224,176,1168,163,167,8226,182,1030,174,169,8482,1026,1106,8800,1027,1107,8734,177,8804,8805,1110,181,1169,1032,1028,1108,1031,1111,1033,1113,1034,1114,1112,1029,172,8730,402,8776,8710,171,187,8230,160,1035,1115,1036,1116,1109,8211,8212,8220,8221,8216,8217,247,8222,1038,1118,1039,1119,8470,1025,1105,1103,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,8364];
protected const TABLE_ENC = [160=>"\xCA",163=>"\xA3",167=>"\xA4",169=>"\xA9",171=>"\xC7","\xC2",174=>"\xA8",176=>"\xA1","\xB1",181=>"\xB5","\xA6",187=>"\xC8",247=>"\xD6",402=>"\xC4",1025=>"\xDD","\xAB","\xAE","\xB8","\xC1","\xA7","\xBA","\xB7","\xBC","\xBE","\xCB","\xCD",1038=>"\xD8","\xDA","\x80","\x81","\x82","\x83","\x84","\x85","\x86","\x87","\x88","\x89","\x8A","\x8B","\x8C","\x8D","\x8E","\x8F","\x90","\x91","\x92","\x93","\x94","\x95","\x96","\x97","\x98","\x99","\x9A","\x9B","\x9C","\x9D","\x9E","\x9F","\xE0","\xE1","\xE2","\xE3","\xE4","\xE5","\xE6","\xE7","\xE8","\xE9","\xEA","\xEB","\xEC","\xED","\xEE","\xEF","\xF0","\xF1","\xF2","\xF3","\xF4","\xF5","\xF6","\xF7","\xF8","\xF9","\xFA","\xFB","\xFC","\xFD","\xFE","\xDF",1105=>"\xDE","\xAC","\xAF","\xB9","\xCF","\xB4","\xBB","\xC0","\xBD","\xBF","\xCC","\xCE",1118=>"\xD9","\xDB",1168=>"\xA2","\xB6",8211=>"\xD0","\xD1",8216=>"\xD4","\xD5",8220=>"\xD2","\xD3","\xD7",8224=>"\xA0",8226=>"\xA5",8230=>"\xC9",8364=>"\xFF",8470=>"\xDC",8482=>"\xAA",8710=>"\xC6",8730=>"\xC3",8734=>"\xB0",8776=>"\xC5",8800=>"\xAD",8804=>"\xB2","\xB3"];
}

113
lib/Encoding/XUserDefined.php

@ -0,0 +1,113 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Encoding;
class XUserDefined extends AbstractEncoding implements Coder, Decoder {
public const NAME = "x-user-defined";
public const LABELS = ["x-user-defined"];
/** Retrieve the next character in the string, in UTF-8 encoding
*
* The returned character may be a replacement character, or the empty string if the end of the string has been reached
*/
public function nextChar(): string {
// get the byte at the current position
$b = $this->string[$this->posChar] ?? "";
if ($b === "") {
return "";
}
$this->posChar++;
$this->posByte++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $b;
} else {
return UTF8::encode(0xF700 + $p);
}
}
/** Decodes the next character from the string and returns its code point number
*
* If the end of the string has been reached, false is returned
*
* @return int|bool
*/
public function nextCode() {
// get the byte at the current position
$b = $this->string[$this->posChar] ?? "";
if ($b === "") {
return false;
}
$this->posChar++;
$this->posByte++;
$p = ord($b);
if ($p < 0x80) {
// if the byte is an ASCII character or end of input, simply return it
return $p;
} else {
return 0xF700 + $p;
}
}
/** Advance $distance characters through the string
*
* If $distance is negative, the operation will be performed in reverse
*
* If the end (or beginning) of the string was reached before the end of the operation, the remaining number of requested characters is returned
*/
public function seek(int $distance): int {
if ($distance > 0) {
while ($this->posChar < $this->lenByte && $distance > 0) {
$this->nextCode();
$distance--;
}
return $distance;
} elseif ($distance < 0) {
$distance = abs($distance);
while ($this->posChar > 0 && $distance > 0) {
$this->posChar--;
$this->posByte--;
$distance--;
}
return $distance;
} else {
return 0;
}
}
public static function encode(int $codePoint, bool $fatal = true): string {
if ($codePoint < 0 || $codePoint > 0x10FFFF) {
throw new EncoderException("Encountered code point outside Unicode range ($codePoint)", self::E_INVALID_CODE_POINT);
} elseif ($codePoint < 0x80) {
return chr($codePoint);
} elseif ($codePoint >= 0xF780 && $codePoint <= 0xF7FF) {
return chr($codePoint - 0xF780 + 0x80);
} else {
return self::errEnc(!$fatal, $codePoint);
}
}
/** @codeCoverageIgnore */
protected function seekBack(int $distance): int {
// stub: not used
return 0;
}
/** Calculates the length of the string in code points
*
* Note that this may involve processing to the end of the string
*/
public function lenChar(): int {
return $this->lenByte;
}
/** Returns whether the character pointer is at the end of the string */
public function eof(): bool {
return $this->posChar >= $this->lenByte;
}
}

16
robo

@ -1,10 +1,14 @@
#! /bin/sh
base=`dirname "$0"`
roboCommand="$1"
shift
if [ "$1" == "clean" ]; then
"$base/vendor/bin/robo" "$roboCommand" $*
if [ $# -eq 0 ]; then
"$base/vendor/bin/robo"
else
"$base/vendor/bin/robo" "$roboCommand" -- $*
fi
shift
ulimit -n 2048
if [ "$1" = "clean" ]; then
"$base/vendor/bin/robo" "$roboCommand" "$@"
else
"$base/vendor/bin/robo" "$roboCommand" -- "$@"
fi
fi

6
tests/bootstrap.php

@ -4,10 +4,14 @@
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\UTF8;
namespace MensBeam\Intl;
const NS_BASE = __NAMESPACE__."\\";
define(NS_BASE."BASE", dirname(__DIR__).DIRECTORY_SEPARATOR);
ini_set("memory_limit", "-1");
error_reporting(\E_ALL);
require_once BASE."vendor".DIRECTORY_SEPARATOR."autoload.php";
if (function_exists("xdebug_set_filter")) {
xdebug_set_filter(\XDEBUG_FILTER_CODE_COVERAGE, \XDEBUG_PATH_WHITELIST, [BASE."lib/"]);
}

233
tests/cases/Encoding/TestBig5.php

File diff suppressed because one or more lines are too long

243
tests/cases/Encoding/TestEUCJP.php

File diff suppressed because one or more lines are too long

222
tests/cases/Encoding/TestEUCKR.php

File diff suppressed because one or more lines are too long

315
tests/cases/Encoding/TestGB18030.php

File diff suppressed because one or more lines are too long

279
tests/cases/Encoding/TestISO2022JP.php

@ -0,0 +1,279 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\ISO2022JP;
use MensBeam\Intl\Encoding\Coder;
use MensBeam\Intl\Encoding\EncoderException;
class TestISO2022JP extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = ISO2022JP::class;
/*
Char 0 U+007A (1 byte) Offset 0
Esc: Katakana (3 bytes) Offset 1
Char 1 U+FF9C (1 byte) Offset 4
Char 2 U+FF9F (1 byte) Offset 5
Esc: Double-byte (3 bytes) Offset 6
Char 3 U+79FB (2 bytes) Offset 9
Char 4 U+67B8 (2 bytes) Offset 11
Char 5 U+9B91 (2 bytes) Offset 13
Esc: ASCII (3 bytes) Offset 15
Char 6 U+007E (1 byte) Offset 18
Esc: Roman (3 bytes) Offset 19
End of string at char 7, offset 22
*/
protected $seekString = "7A 1B2849 5C 5F 1B2440 305C 5B4E 723A 1B2842 7E 1B284A";
protected $seekCodes = [0x7A, 0xFF9C, 0xFF9F, 0x79FB, 0x67B8, 0x9B91, 0x7E];
protected $seekOffsets = [0, 1, 5, 6, 11, 13, 15, 19];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "1B284A 41 5A 1B2849 5C 5F 1B2842 30 39";
public function provideCodePoints() {
return [
'U+0020 (HTML)' => [false, [0x20], "20"],
'U+0020 (fatal)' => [true, [0x20], "20"],
'U+005C (HTML)' => [false, [0x5C], "5C"],
'U+005C (fatal)' => [true, [0x5C], "5C"],
'U+007E (HTML)' => [false, [0x7E], "7E"],
'U+007E (fatal)' => [true, [0x7E], "7E"],
'U+00A5 (HTML)' => [false, [0xA5], "1B 28 4A 5C 1B 28 42"],
'U+00A5 (fatal)' => [true, [0xA5], "1B 28 4A 5C 1B 28 42"],
'U+203E (HTML)' => [false, [0x203E], "1B 28 4A 7E 1B 28 42"],
'U+203E (fatal)' => [true, [0x203E], "1B 28 4A 7E 1B 28 42"],
'U+FF61 (HTML)' => [false, [0xFF61], "1B 24 42 21 23 1B 28 42"],
'U+FF61 (fatal)' => [true, [0xFF61], "1B 24 42 21 23 1B 28 42"],
'U+FF9F (HTML)' => [false, [0xFF9F], "1B 24 42 21 2C 1B 28 42"],
'U+FF9F (fatal)' => [true, [0xFF9F], "1B 24 42 21 2C 1B 28 42"],
'U+2212 (HTML)' => [false, [0x2212], "1B 24 42 21 5D 1B 28 42"],
'U+2212 (fatal)' => [true, [0x2212], "1B 24 42 21 5D 1B 28 42"],
'U+2116 (HTML)' => [false, [0x2116], "1B 24 42 2D 62 1B 28 42"],
'U+2116 (fatal)' => [true, [0x2116], "1B 24 42 2D 62 1B 28 42"],
'U+FFE2 (HTML)' => [false, [0xFFE2], "1B 24 42 22 4C 1B 28 42"],
'U+FFE2 (fatal)' => [true, [0xFFE2], "1B 24 42 22 4C 1B 28 42"],
'U+00C6 (HTML)' => [false, [0xC6], "26 23 31 39 38 3B"],
'U+00C6 (fatal)' => [true, [0xC6], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'U+FFFD (HTML)' => [false, [0xFFFD], "26 23 36 35 35 33 33 3B"],
'U+FFFD (fatal)' => [true, [0xFFFD], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'Roman (HTML)' => [false, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"],
'Roman (fatal)' => [true, [0xA5, 0x20, 0x203E], "1B 28 4A 5C 20 7E 1B 28 42"],
'Roman to ASCII (HTML)' => [false, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"],
'Roman to ASCII (fatal)' => [true, [0xA5, 0x5C], "1B 28 4A 5C 1B 28 42 5C"],
'Roman to error (HTML)' => [false, [0xA5, 0x80], "1B 28 4A 5C 26 23 31 32 38 3B 1B 28 42"],
'Roman to error (fatal)' => [true, [0xA5, 0x80], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'JIS (HTML)' => [false, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"],
'JIS (fatal)' => [true, [0x2116, 0xFFE2, 0x2212], "1B 24 42 2D 62 22 4C 21 5D 1B 28 42"],
'JIS to Roman (HTML)' => [false, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"],
'JIS to Roman (fatal)' => [true, [0x2116, 0xA5], "1B 24 42 2D 62 1B 28 4A 5C 1B 28 42"],
'JIS to ASCII 1 (HTML)' => [false, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"],
'JIS to ASCII 1 (fatal)' => [true, [0x2116, 0x20], "1B 24 42 2D 62 1B 28 42 20"],
'JIS to ASCII 2 (HTML)' => [false, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"],
'JIS to ASCII 2 (fatal)' => [true, [0x2116, 0x5C], "1B 24 42 2D 62 1B 28 42 5C"],
'JIS to error 1 (HTML)' => [false, [0x2116, 0x80], "1B 24 42 2D 62 1B 28 42 26 23 31 32 38 3B"],
'JIS to error 1 (fatal)' => [true, [0x2116, 0x80], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'JIS to error 2 (HTML)' => [false, [0x2116, 0x1B], "1B 24 42 2D 62 1B 28 42 26 23 36 35 35 33 33 3B"],
'JIS to error 2 (fatal)' => [true, [0x2116, 0x1B], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'Escape characters (HTML)' => [false, [0x1B, 0xE, 0xF], "26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B 26 23 36 35 35 33 33 3B"],
'Escape characters (fatal)' => [true, [0x1B, 0xE, 0xF], new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'-1 (HTML)' => [false, [-1], new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, [-1], new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, [0x110000], new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, [0x110000], new EncoderException("", Coder::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
'empty string' => ["", []],
'Implied ASCII mode' => ["00 30 5C 7E 21 5F", [0, 48, 92, 126, 33, 95]],
'Explicit ASCII mode' => ["1B2842 00 30 5C 7E 21 5F", [0, 48, 92, 126, 33, 95]],
'Roman mode' => ["1B284A 00 30 5C 7E 21 5F", [0, 48, 165, 8254, 33, 95]],
'Katakana mode' => ["1B2849 00 30 5C 7E 21 5F", [65533, 65392, 65436, 65533, 65377, 65439]],
'Double-byte mode 1' => ["1B2440 00 305C 7E21 5F", [65533, 31227, 65533, 65533]],
'Double-byte mode 2' => ["1B2442 00 305C 7E21 5F", [65533, 31227, 65533, 65533]],
'Multiple modes' => ["5C 1B2849 21 1B2440 305C 1B284A 5C 1B2842 5C", [92, 65377, 31227, 165, 92]],
'Double escape' => ["1B2849 1B2842 5C", [65533, 92]],
'Triple escape' => ["1B2849 1B2842 1B284A 5C", [65533, 65533, 165]],
'Trailing escape' => ["20 1B284A 30 33 1B2849", [32, 48, 51]],
'Truncated escape 1' => ["1B", [65533]],
'Truncated escape 2' => ["1B28", [65533, 40]],
'Truncated escape 3' => ["1B2820", [65533, 40, 32]],
'Truncated escape 4' => ["1B2020", [65533, 32, 32]],
'Invalid escape 1' => ["1B2840", [65533, 40, 64]],
'Invalid escape 2' => ["1B244A", [65533, 36, 74]],
'Invalid bytes' => ["80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF", [65533, 65533, 65533, 65533, 65533, 65533, 65533, 65533, 65533]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\ISO2022JP::encode
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
$out = "";
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
foreach ($input as $char) {
$out .= ISO2022JP::encode($char, $fatal, $mode);
}
$out .= ISO2022JP::encode(null, $fatal, $mode);
$this->assertSame($exp, bin2hex($out));
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::__construct
* @covers MensBeam\Intl\Encoding\ISO2022JP::nextCode
* @covers MensBeam\Intl\Encoding\ISO2022JP::modeSet
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::__construct
* @covers MensBeam\Intl\Encoding\ISO2022JP::nextChar
* @covers MensBeam\Intl\Encoding\ISO2022JP::modeSet
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::seekBack
*/
public function testSTepBackThroughAString(string $input, array $exp) {
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::seek
* @covers MensBeam\Intl\Encoding\ISO2022JP::posChar
* @covers MensBeam\Intl\Encoding\ISO2022JP::posByte
* @covers MensBeam\Intl\Encoding\ISO2022JP::rewind
*/
public function testSeekThroughAString() {
return parent::testSeekThroughAString();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::posChar
* @covers MensBeam\Intl\Encoding\ISO2022JP::posByte
* @covers MensBeam\Intl\Encoding\ISO2022JP::eof
*/
public function testTraversePastTheEndOfAString() {
return parent::testTraversePastTheEndOfAString();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::peekChar
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply
*/
public function testPeekAtCharacters() {
return parent::testPeekAtCharacters();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::peekCode
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply
*/
public function testPeekAtCodePoints() {
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::lenChar
* @covers MensBeam\Intl\Encoding\ISO2022JP::lenByte
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateSave
* @covers MensBeam\Intl\Encoding\ISO2022JP::stateApply
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::errDec
*/
public function testReplacementModes() {
return parent::testReplacementModes();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\ISO2022JP::rewind
* @covers MensBeam\Intl\Encoding\ISO2022JP::chars
* @covers MensBeam\Intl\Encoding\ISO2022JP::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::seekBack
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @covers MensBeam\Intl\Encoding\ISO2022JP::asciiSpanNot
*/
public function testExtractNegativeAsciiSpans() {
parent::testExtractNegativeAsciiSpans();
}
/**
* @group optional
*/
public function testPedanticallyDecodeSingleCharactersAsCodePoint() {
$series = [
];
foreach ($series as $test) {
foreach ($test[0] as $a => $input) {
$class = $this->testedClass;
$char = hex2bin($input);
$exp = $test[1][$a];
$s = new $class($char);
$this->assertSame($exp, $s->nextCode(), "Sequence $input did not decode to $exp.");
$this->assertFalse($s->nextCode(), "Sequence $input did not end after one character");
}
}
}
}

221
tests/cases/Encoding/TestReplacement.php

@ -0,0 +1,221 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\Replacement;
use MensBeam\Intl\Encoding\DecoderException;
class TestReplacement extends \MensBeam\Intl\Test\DecoderTest {
protected $testedClass = Replacement::class;
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'Arbitrary string 1' => ["20", [0xFFFD]],
'Arbitrary string 2' => ["64 8B 20 00 FF A5", [0xFFFD]],
];
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::__construct
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::__construct
* @covers MensBeam\Intl\Encoding\Replacement::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::seek
*/
public function testSTepBackThroughAString(string $input, array $exp) {
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @coversNothing
*/
public function testSeekThroughAString() {
$this->assertTrue(true);
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
* @covers MensBeam\Intl\Encoding\Replacement::seek
* @covers MensBeam\Intl\Encoding\Replacement::eof
*/
public function testTraversePastTheEndOfAString() {
$d = new Replacement("a");
$this->assertFalse($d->eof());
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$d->seek(1);
$this->assertTrue($d->eof());
$this->assertSame(1, $d->posChar());
$this->assertSame(1, $d->posByte());
$d->seek(1);
$this->assertTrue($d->eof());
$this->assertSame(1, $d->posChar());
$this->assertSame(1, $d->posByte());
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::peekChar
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testPeekAtCharacters() {
$d = new Replacement("A");
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame("\u{FFFD}", $d->peekChar(2112));
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame("", $d->peekChar(0));
$this->assertSame("", $d->peekChar(-2112));
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::peekCode
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testPeekAtCodePoints() {
$d = new Replacement("A");
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame([0xFFFD], $d->peekCode(2112));
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
$this->assertSame([], $d->peekCode(0));
$this->assertSame([], $d->peekCode(-2112));
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::lenChar
* @covers MensBeam\Intl\Encoding\Replacement::lenByte
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::nextChar
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
* @covers MensBeam\Intl\Encoding\Replacement::peekChar
* @covers MensBeam\Intl\Encoding\Replacement::peekCode
* @covers MensBeam\Intl\Encoding\Replacement::rewind
* @covers MensBeam\Intl\Encoding\Replacement::posChar
* @covers MensBeam\Intl\Encoding\Replacement::posByte
*/
public function testReplacementModes() {
$d = new Replacement("VVVVVV", true);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->peekCode();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(0, $d->posErr);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->nextCode();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(1, $d->posChar());
$this->assertSame(6, $d->posByte());
$d->rewind();
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->peekChar();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(0, $d->posChar());
$this->assertSame(0, $d->posByte());
try {
$p = $d->nextChar();
} catch (\Exception $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $d->posErr);
$this->assertSame(1, $d->posChar());
$this->assertSame(6, $d->posByte());
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::rewind
* @covers MensBeam\Intl\Encoding\Replacement::chars
* @covers MensBeam\Intl\Encoding\Replacement::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\Replacement::nextCode
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @coversNothing
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::asciiSpan
*/
public function testExtractAsciiSpans() {
$d = new Replacement("VVVVVV");
$this->assertSame("", $d->asciiSpan($this->allBytes()));
$d->nextChar();
$this->assertTrue($d->eof());
}
/**
* @covers MensBeam\Intl\Encoding\Replacement::asciiSpanNot
*/
public function testExtractNegativeAsciiSpans() {
$d = new Replacement("VVVVVV");
$this->assertSame("", $d->asciiSpanNot(""));
$d->nextChar();
$this->assertTrue($d->eof());
}
}

235
tests/cases/Encoding/TestShiftJIS.php

File diff suppressed because one or more lines are too long

339
tests/cases/Encoding/TestSingleByte.php

@ -0,0 +1,339 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\SingleByteEncoding;
use MensBeam\Intl\Encoding\EncoderException;
use MensBeam\Intl\Encoding\Encoder;
class TestSingleByte extends \MensBeam\Intl\Test\CoderDecoderTest {
// maps taken from https://github.com/web-platform-tests/wpt/blob/d6c29bef8d4bcdfe4f689defca73360b07647d71/encoding/single-byte-decoder.html
// ISO-8859-8 was duplicated for ISO-8859-8-I
protected static $maps = [
"IBM866" => [1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,9617,9618,9619,9474,9508,9569,9570,9558,9557,9571,9553,9559,9565,9564,9563,9488,9492,9524,9516,9500,9472,9532,9566,9567,9562,9556,9577,9574,9568,9552,9580,9575,9576,9572,9573,9561,9560,9554,9555,9579,9578,9496,9484,9608,9604,9612,9616,9600,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1025,1105,1028,1108,1031,1111,1038,1118,176,8729,183,8730,8470,164,9632,160],
"ISO-8859-2" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,728,321,164,317,346,167,168,352,350,356,377,173,381,379,176,261,731,322,180,318,347,711,184,353,351,357,378,733,382,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729],
"ISO-8859-3" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,294,728,163,164,null,292,167,168,304,350,286,308,173,null,379,176,295,178,179,180,181,293,183,184,305,351,287,309,189,null,380,192,193,194,null,196,266,264,199,200,201,202,203,204,205,206,207,null,209,210,211,212,288,214,215,284,217,218,219,220,364,348,223,224,225,226,null,228,267,265,231,232,233,234,235,236,237,238,239,null,241,242,243,244,289,246,247,285,249,250,251,252,365,349,729],
"ISO-8859-4" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,312,342,164,296,315,167,168,352,274,290,358,173,381,175,176,261,731,343,180,297,316,711,184,353,275,291,359,330,382,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,298,272,325,332,310,212,213,214,215,216,370,218,219,220,360,362,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,299,273,326,333,311,244,245,246,247,248,371,250,251,252,361,363,729],
"ISO-8859-5" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,173,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,8470,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,167,1118,1119],
"ISO-8859-6" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,null,null,164,null,null,null,null,null,null,null,1548,173,null,null,null,null,null,null,null,null,null,null,null,null,null,1563,null,null,null,1567,null,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,null,null,null,null,null,1600,1601,1602,1603,1604,1605,1606,1607,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617,1618,null,null,null,null,null,null,null,null,null,null,null,null,null],
"ISO-8859-7" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8216,8217,163,8364,8367,166,167,168,169,890,171,172,173,null,8213,176,177,178,179,900,901,902,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null],
"ISO-8859-8" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null],
"ISO-8859-8-I" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null],
"ISO-8859-10" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,274,290,298,296,310,167,315,272,352,358,381,173,362,330,176,261,275,291,299,297,311,183,316,273,353,359,382,8213,363,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,207,208,325,332,211,212,213,214,360,216,370,218,219,220,221,222,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,239,240,326,333,243,244,245,246,361,248,371,250,251,252,253,254,312],
"ISO-8859-13" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8221,162,163,164,8222,166,167,216,169,342,171,172,173,174,198,176,177,178,179,8220,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,8217],
"ISO-8859-14" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,7682,7683,163,266,267,7690,167,7808,169,7810,7691,7922,173,174,376,7710,7711,288,289,7744,7745,182,7766,7809,7767,7811,7776,7923,7812,7813,7777,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,372,209,210,211,212,213,214,7786,216,217,218,219,220,221,374,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,373,241,242,243,244,245,246,7787,248,249,250,251,252,253,375,255],
"ISO-8859-15" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,8364,165,352,167,353,169,170,171,172,173,174,175,176,177,178,179,381,181,182,183,382,185,186,187,338,339,376,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255],
"ISO-8859-16" => [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,261,321,8364,8222,352,167,353,169,536,171,377,173,378,379,176,177,268,322,381,8221,182,183,382,269,537,187,338,339,376,380,192,193,194,258,196,262,198,199,200,201,202,203,204,205,206,207,272,323,210,211,212,336,214,346,368,217,218,219,220,280,538,223,224,225,226,259,228,263,230,231,232,233,234,235,236,237,238,239,273,324,242,243,244,337,246,347,369,249,250,251,252,281,539,255],
"KOI8-R" => [9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568,9569,1025,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066],
"KOI8-U" => [9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,1108,9556,1110,1111,9559,9560,9561,9562,9563,1169,1118,9566,9567,9568,9569,1025,1028,9571,1030,1031,9574,9575,9576,9577,9578,1168,1038,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066],
"macintosh" => [196,197,199,201,209,214,220,225,224,226,228,227,229,231,233,232,234,235,237,236,238,239,241,243,242,244,246,245,250,249,251,252,8224,176,162,163,167,8226,182,223,174,169,8482,180,168,8800,198,216,8734,177,8804,8805,165,181,8706,8721,8719,960,8747,170,186,937,230,248,191,161,172,8730,402,8776,8710,171,187,8230,160,192,195,213,338,339,8211,8212,8220,8221,8216,8217,247,9674,255,376,8260,8364,8249,8250,64257,64258,8225,183,8218,8222,8240,194,202,193,203,200,205,206,207,204,211,212,63743,210,218,219,217,305,710,732,175,728,729,730,184,733,731,711],
"windows-874" => [8364,129,130,131,132,8230,134,135,136,137,138,139,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,153,154,155,156,157,158,159,160,3585,3586,3587,3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,3604,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,3630,3631,3632,3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,null,null,null,null,3647,3648,3649,3650,3651,3652,3653,3654,3655,3656,3657,3658,3659,3660,3661,3662,3663,3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,null,null,null,null],
"windows-1250" => [8364,129,8218,131,8222,8230,8224,8225,136,8240,352,8249,346,356,381,377,144,8216,8217,8220,8221,8226,8211,8212,152,8482,353,8250,347,357,382,378,160,711,728,321,164,260,166,167,168,169,350,171,172,173,174,379,176,177,731,322,180,181,182,183,184,261,351,187,317,733,318,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729],
"windows-1251" => [1026,1027,8218,1107,8222,8230,8224,8225,8364,8240,1033,8249,1034,1036,1035,1039,1106,8216,8217,8220,8221,8226,8211,8212,152,8482,1113,8250,1114,1116,1115,1119,160,1038,1118,1032,164,1168,166,167,1025,169,1028,171,172,173,174,1031,176,177,1030,1110,1169,181,182,183,1105,8470,1108,187,1112,1029,1109,1111,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103],
"windows-1252" => [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255],
"windows-1253" => [8364,129,8218,402,8222,8230,8224,8225,136,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,157,158,159,160,901,902,163,164,165,166,167,168,169,null,171,172,173,174,8213,176,177,178,179,900,181,182,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null],
"windows-1254" => [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,286,209,210,211,212,213,214,215,216,217,218,219,220,304,350,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,287,241,242,243,244,245,246,247,248,249,250,251,252,305,351,255],
"windows-1255" => [8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,156,157,158,159,160,161,162,163,8362,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,191,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1520,1521,1522,1523,1524,null,null,null,null,null,null,null,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null],
"windows-1256" => [8364,1662,8218,402,8222,8230,8224,8225,710,8240,1657,8249,338,1670,1688,1672,1711,8216,8217,8220,8221,8226,8211,8212,1705,8482,1681,8250,339,8204,8205,1722,160,1548,162,163,164,165,166,167,168,169,1726,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,1563,187,188,189,190,1567,1729,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,215,1591,1592,1593,1594,1600,1601,1602,1603,224,1604,226,1605,1606,1607,1608,231,232,233,234,235,1609,1610,238,239,1611,1612,1613,1614,244,1615,1616,247,1617,249,1618,251,252,8206,8207,1746],
"windows-1257" => [8364,129,8218,131,8222,8230,8224,8225,136,8240,138,8249,140,168,711,184,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,175,731,159,160,null,162,163,164,null,166,167,216,169,342,171,172,173,174,198,176,177,178,179,180,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,729],
"windows-1258" => [8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,258,196,197,198,199,200,201,202,203,768,205,206,207,272,209,777,211,212,416,214,215,216,217,218,219,220,431,771,223,224,225,226,259,228,229,230,231,232,233,234,235,769,237,238,239,273,241,803,243,244,417,246,247,248,249,250,251,252,432,8363,255],
"x-mac-cyrillic" => [1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,8224,176,1168,163,167,8226,182,1030,174,169,8482,1026,1106,8800,1027,1107,8734,177,8804,8805,1110,181,1169,1032,1028,1108,1031,1111,1033,1113,1034,1114,1112,1029,172,8730,402,8776,8710,171,187,8230,160,1035,1115,1036,1116,1109,8211,8212,8220,8221,8216,8217,247,8222,1038,1118,1039,1119,8470,1025,1105,1103,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,8364],
];
protected static $classes = [
'IBM866' => \MensBeam\Intl\Encoding\IBM866::class,
'ISO-8859-2' => \MensBeam\Intl\Encoding\ISO88592::class,
'ISO-8859-3' => \MensBeam\Intl\Encoding\ISO88593::class,
'ISO-8859-4' => \MensBeam\Intl\Encoding\ISO88594::class,
'ISO-8859-5' => \MensBeam\Intl\Encoding\ISO88595::class,
'ISO-8859-6' => \MensBeam\Intl\Encoding\ISO88596::class,
'ISO-8859-7' => \MensBeam\Intl\Encoding\ISO88597::class,
'ISO-8859-8' => \MensBeam\Intl\Encoding\ISO88598::class,
'ISO-8859-8-I' => \MensBeam\Intl\Encoding\ISO88598I::class,
'ISO-8859-10' => \MensBeam\Intl\Encoding\ISO885910::class,
'ISO-8859-13' => \MensBeam\Intl\Encoding\ISO885913::class,
'ISO-8859-14' => \MensBeam\Intl\Encoding\ISO885914::class,
'ISO-8859-15' => \MensBeam\Intl\Encoding\ISO885915::class,
'ISO-8859-16' => \MensBeam\Intl\Encoding\ISO885916::class,
'KOI8-R' => \MensBeam\Intl\Encoding\KOI8R::class,
'KOI8-U' => \MensBeam\Intl\Encoding\KOI8U::class,
'macintosh' => \MensBeam\Intl\Encoding\Macintosh::class,
'windows-874' => \MensBeam\Intl\Encoding\Windows874::class,
'windows-1250' => \MensBeam\Intl\Encoding\Windows1250::class,
'windows-1251' => \MensBeam\Intl\Encoding\Windows1251::class,
'windows-1252' => \MensBeam\Intl\Encoding\Windows1252::class,
'windows-1253' => \MensBeam\Intl\Encoding\Windows1253::class,
'windows-1254' => \MensBeam\Intl\Encoding\Windows1254::class,
'windows-1255' => \MensBeam\Intl\Encoding\Windows1255::class,
'windows-1256' => \MensBeam\Intl\Encoding\Windows1256::class,
'windows-1257' => \MensBeam\Intl\Encoding\Windows1257::class,
'windows-1258' => \MensBeam\Intl\Encoding\Windows1258::class,
'x-mac-cyrillic' => \MensBeam\Intl\Encoding\XMacCyrillic::class,
];
protected $testedClass = SingleByteEncoding::class;
/* Single-byte encodings don't have complex seeking, so this string is generic */
protected $seekString = "30 31 32 33 34 35 36";
protected $seekCodes = [0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36];
protected $seekOffsets = [0, 1, 2, 3, 4, 5, 6, 7];
/* This string is supposed to contain an invalid character sequence sandwiched between two null characters; this is different for each single-byte encoding (and many do not have invalid characters) */
protected $brokenChar = "";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A 80 FF 30 39";
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testEncodeCodePoints($fatal, $input, bin2hex($exp));
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp, string $class = SingleByteEncoding::class) {
$out = "";
foreach ($input as $code) {
$out .= $class::encode($code, $fatal);
}
$this->assertSame(bin2hex($exp), bin2hex($out));
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::__construct
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp, string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::__construct
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp, string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testSTepBackThroughAString(string $input, array $exp, string $class = SingleByteEncoding::class) {
// this test has no meaning for single-byte encodings
$this->testedClass = $class;
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::seek
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posByte
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::rewind
*/
public function testSeekThroughAString(string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testSeekThroughAString();
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posByte
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::eof
*/
public function testTraversePastTheEndOfAString(string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testTraversePastTheEndOfAString();
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::peekChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posByte
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateSave
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateApply
*/
public function testPeekAtCharacters(string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testPeekAtCharacters();
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::peekCode
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::posByte
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateSave
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateApply
*/
public function testPeekAtCodePoints(string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::lenChar
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::lenByte
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateSave
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::stateApply
*/
public function testGetStringLength(string $input, array $points, string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testGetStringLength($input, $points);
}
/**
* @dataProvider provideBrokenStrings
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::errDec
*/
public function testReplacementModes(string $input = "", string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
$this->brokenChar = $input;
return parent::testReplacementModes();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::rewind
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::chars
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::codes
*/
public function testIterateThroughAString(string $input, array $exp, string $class = SingleByteEncoding::class) {
$this->testedClass = $class;
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $exp, $class = null) {
$this->testedClass = $class;
return parent::testIterateThroughAStringAllowingSurrogates($input, $exp, $exp);
}
/**
* @dataProvider provideClasses
* @coversNothing
*/
public function testSeekBackOverRandomData($class = null) {
$this->testedClass = $class;
return parent::testSeekBackOverRandomData();
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::asciiSpan
*/
public function testExtractAsciiSpans($class = null) {
$this->testedClass = $class;
parent::testExtractAsciiSpans();
}
/**
* @dataProvider provideClasses
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::asciiSpan
*/
public function testExtractNegativeAsciiSpans($class = null) {
$this->testedClass = $class;
parent::testExtractNegativeAsciiSpans();
}
public function provideClasses() {
foreach (self::$classes as $name => $class) {
yield $name => [$class];
}
}
public function provideInvalids() {
$exc1 = new EncoderException("", SingleByteEncoding::E_INVALID_CODE_POINT);
$exc2 = new EncoderException("", SingleByteEncoding::E_UNAVAILABLE_CODE_POINT);
foreach (self::$classes as $name => $class) {
yield "$name point < 0 (fatal mode)" => [$class, true, -1, $exc1];
yield "$name point > 0x10FFFF (fatal mode)" => [$class, true, 0x110000, $exc1];
yield "$name point unavailable (fatal mode)" => [$class, true, 0xFFFD, $exc2];
yield "$name point < 0 (HTML mode)" => [$class, false, -1, $exc1];
yield "$name point > 0x10FFFF (HTML mode)" => [$class, false, 0x110000, $exc1];
yield "$name point unavailable (HTML mode)" => [$class, false, 0xFFFD, "&#65533;"];
}
}
public function provideCodePoints() {
foreach (self::$classes as $name => $class) {
$bytes = "";
$codes = [];
for ($a = 0; $a < 128; $a++) {
$bytes .= chr($a);
$codes[] = $a;
}
for ($a = 0; $a < 128; $a++) {
if (is_null(self::$maps[$name][$a])) {
continue;
}
$bytes .= chr($a + 128);
$codes[] = self::$maps[$name][$a];
}
yield "$name (fatal)" => [true, $codes, $bytes, $class];
yield "$name (HTML)" => [false, $codes, $bytes, $class];
}
}
public function provideStrings() {
$bytes = (function() {
$out = "";
for ($a = 0; $a < 256; $a++) {
$out .= bin2hex(chr($a));
}
return $out;
})();
foreach (self::$classes as $name => $class) {
$codes = array_merge(range(0, 127), array_map(function($v) {
return $v ?? 0xFFFD;
}, self::$maps[$name]));
yield $name => [$bytes, $codes, $class];
}
}
public function provideBrokenStrings() {
foreach ($this->provideStrings() as $name => $test) {
$codes = $test[1];
$class = $test[2];
if (($bump = array_search(0xFFFD, $codes, true)) === false) {
// if the encoding uses all 128 high byte values, this test is non-operative
yield $name => ["", $class];
} else {
$byte = strtoupper(bin2hex(chr($bump)));
yield $name => ["00 $byte 00", $class];
}
}
}
/**
* @dataProvider provideInvalids
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::encode
* @covers MensBeam\Intl\Encoding\SingleByteEncoding::errEnc
*/
public function testEncodeInvalidCodePoints(string $class, bool $mode, int $input, $exp) {
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
}
$out = $class::encode($input, $mode);
$this->assertSame($exp, $out);
}
}

49
tests/cases/Encoding/TestUTF16BE.php

@ -0,0 +1,49 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF16BE;
class TestUTF16BE extends TestUTF16LE {
protected $testedClass = UTF16BE::class;
/*
Byte Order Mark (2 bytes) Offset 0
Char 0 U+007A (2 bytes) Offset 2
Char 1 U+00A2 (2 bytes) Offset 4
Char 2 U+6C34 (2 bytes) Offset 6
Char 3 U+1D11E (4 bytes) Offset 8
Char 4 U+F8FF (2 bytes) Offset 12
Char 5 U+10FFFD (4 bytes) Offset 14
Char 6 U+FFFE (2 bytes) Offset 18
End of string at char 7, offset 20
*/
protected $seekString = "FEFF 007A 00A2 6C34 D834DD1E F8FF DBFFDFFD FFFE";
protected $seekCodes = [0x007A, 0x00A2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE];
protected $seekOffsets = [2, 4, 6, 8, 12, 14, 18, 20];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "0000 DC00 0000";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "0041 005A 6C34 D834DD1E 0030 0039";
protected $lowerA = "\x00a";
public function provideStrings() {
foreach (parent::provideStrings() as $name => $test) {
if (sizeof($test) == 2) {
$test[] = null;
}
list($string, $codes, $altCodes) = $test;
$words = explode(" ", $string);
foreach ($words as $a => $word) {
if (strlen($word) == 4) {
$words[$a] = $word[2].$word[3].$word[0].$word[1];
}
}
$string = implode(" ", $words);
yield $name => [$string, $codes, $altCodes];
}
}
}

170
tests/cases/Encoding/TestUTF16LE.php

@ -0,0 +1,170 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF16LE;
class TestUTF16LE extends \MensBeam\Intl\Test\DecoderTest {
protected $testedClass = UTF16LE::class;
/*
Byte Order Mark (2 bytes) Offset 0
Char 0 U+007A (2 bytes) Offset 2
Char 1 U+00A2 (2 bytes) Offset 4
Char 2 U+6C34 (2 bytes) Offset 6
Char 3 U+1D11E (4 bytes) Offset 8
Char 4 U+F8FF (2 bytes) Offset 12
Char 5 U+10FFFD (4 bytes) Offset 14
Char 6 U+FFFE (2 bytes) Offset 18
End of string at char 7, offset 20
*/
protected $seekString = "FFFE 7A00 A200 346C 34D81EDD FFF8 FFDBFDDF FEFF";
protected $seekCodes = [0x007A, 0x00A2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE];
protected $seekOffsets = [2, 4, 6, 8, 12, 14, 18, 20];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "0000 00DC 0000";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "4100 5A00 346C 34D81EDD 3000 3900";
protected $lowerA = "a\x00";
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::__construct
* @covers MensBeam\Intl\Encoding\UTF16::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::__construct
* @covers MensBeam\Intl\Encoding\UTF16::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::seekBack
*/
public function testSTepBackThroughAString(string $input, array $exp) {
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::seek
* @covers MensBeam\Intl\Encoding\UTF16::posChar
* @covers MensBeam\Intl\Encoding\UTF16::posByte
* @covers MensBeam\Intl\Encoding\UTF16::rewind
*/
public function testSeekThroughAString() {
return parent::testSeekThroughAString();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::posChar
* @covers MensBeam\Intl\Encoding\UTF16::posByte
* @covers MensBeam\Intl\Encoding\UTF16::eof
*/
public function testTraversePastTheEndOfAString() {
return parent::testTraversePastTheEndOfAString();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::peekChar
* @covers MensBeam\Intl\Encoding\UTF16::stateSave
* @covers MensBeam\Intl\Encoding\UTF16::stateApply
*/
public function testPeekAtCharacters() {
return parent::testPeekAtCharacters();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::peekCode
* @covers MensBeam\Intl\Encoding\UTF16::stateSave
* @covers MensBeam\Intl\Encoding\UTF16::stateApply
*/
public function testPeekAtCodePoints() {
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::lenChar
* @covers MensBeam\Intl\Encoding\UTF16::lenByte
* @covers MensBeam\Intl\Encoding\UTF16::stateSave
* @covers MensBeam\Intl\Encoding\UTF16::stateApply
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::errDec
*/
public function testReplacementModes() {
return parent::testReplacementModes();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::rewind
* @covers MensBeam\Intl\Encoding\UTF16::chars
* @covers MensBeam\Intl\Encoding\UTF16::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF16::nextCode
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::seekBack
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @covers MensBeam\Intl\Encoding\UTF16::asciiSpanNot
*/
public function testExtractNegativeAsciiSpans() {
parent::testExtractNegativeAsciiSpans();
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["6100 6200 6300 3100 3200 3300", [97, 98, 99, 49, 50, 51]],
'mixed sample' => ["7A00 A200 346C 34D8 1EDD FFF8 FFDB FDDF FEFF", [122, 162, 27700, 119070, 63743, 1114109, 65534]],
// unexpected EOF
'EOF in BMP character' => ["0000 FF", [0, 65533]],
'EOF after lead surrogate' => ["0000 34D8", [0, 65533]],
'EOF in trail surrogate' => ["0000 34D8 1E", [0, 65533]],
// invalid UTF-16 surrogates
'lead surrogate without trail' => ["34D8 0000", [65533, 0], [0xD834, 0]],
'trail surrogate without lead' => ["1EDD 0000", [65533, 0], [0xDD1E, 0]],
'double lead surrogate' => ["34D8 34D8 1EDD", [65533, 119070], [0xD834, 119070]],
'double trail surrogate' => ["34D8 1EDD 1EDD", [119070, 65533], [119070, 0xDD1E]],
];
}
}

509
tests/cases/Encoding/TestUTF8.php

@ -1,409 +1,244 @@
<?php
/** @license MIT
* Copyright 2017 J. King, Dustin Wilson et al.
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\UTF8;
use MensBeam\Intl\Encoding\Coder;
use MensBeam\Intl\Encoding\EncoderException;
use MensBeam\Intl\Encoding\DecoderException;
class TestUTF8 extends \PHPUnit\Framework\TestCase {
class TestUTF8 extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = UTF8::class;
/*
Byte Order Mark (3 bytes) Offset 0
Char 0 U+007A (1 byte) Offset 3
Char 1 U+00A2 (2 bytes) Offset 4
Char 2 U+6C34 (3 bytes) Offset 6
Char 3 U+1D11E (4 bytes) Offset 9
Char 4 U+F8FF (3 bytes) Offset 13
Char 5 U+10FFFD (4 bytes) Offset 16
Char 6 U+FFFE (3 bytes) Offset 20
End of string at char 7, offset 23
*/
protected $seekString = "EFBBBF 7A C2A2 E6B0B4 F09D849E EFA3BF F48FBFBD EFBFBE";
protected $seekCodes = [0x007A, 0x00A2, 0x6C34, 0x1D11E, 0xF8FF, 0x10FFFD, 0xFFFE];
protected $seekOffsets = [3, 4, 6, 9, 13, 16, 20, 23];
/* This string contains an invalid character sequence sandwiched between two null characters */
protected $brokenChar = "00 FF 00";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A E6B0B4 F09D849E 30 39";
public function provideCodePoints() {
return [
'U+007A (HTML)' => [false, 0x7A, "7A"],
'U+007A (fatal)' => [true, 0x7A, "7A"],
'U+00A2 (HTML)' => [false, 0xA2, "C2 A2"],
'U+00A2 (fatal)' => [true, 0xA2, "C2 A2"],
'U+6C34 (HTML)' => [false, 0x6C34, "E6 B0 B4"],
'U+6C34 (fatal)' => [true, 0x6C34, "E6 B0 B4"],
'U+1D11E (HTML)' => [false, 0x1D11E, "F0 9D 84 9E"],
'U+1D11E (fatal)' => [true, 0x1D11E, "F0 9D 84 9E"],
'U+F8FF (HTML)' => [false, 0xF8FF, "EF A3 BF"],
'U+F8FF (fatal)' => [true, 0xF8FF, "EF A3 BF"],
'U+10FFFD (HTML)' => [false, 0x10FFFD, "F4 8F BF BD"],
'U+10FFFD (fatal)' => [true, 0x10FFFD, "F4 8F BF BD"],
'U+FFFE (HTML)' => [false, 0xFFFE, "EF BF BE"],
'U+FFFE (fatal)' => [true, 0xFFFE, "EF BF BE"],
'-1 (HTML)' => [false, -1, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]],
'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]],
'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]],
// various invalid sequences
'invalid code' => ["FF", [65533]],
'ends early' => ["C0", [65533]],
'ends early 2' => ["E0", [65533]],
'invalid trail' => ["C0 00", [65533, 0]],
'invalid trail 2' => ["C0 C0", [65533, 65533]],
'invalid trail 3' => ["E0 00", [65533, 0]],
'invalid trail 4' => ["E0 C0", [65533, 65533]],
'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]],
'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]],
'> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]],
'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]],
'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]],
'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]],
'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]],
'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]],
'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]],
'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]],
'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]],
'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]],
// UTF-16 surrogates
// surrogates have alternate outputs for when surrogates are being allowed
'lead surrogate' => ["ED A0 80", [65533, 65533, 65533], [0xD800]],
'trail surrogate' => ["ED B0 80", [65533, 65533, 65533], [0xDC00]],
'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533], [0xD800, 0xDC00]],
// self-sync edge cases
'trailing continuation' => ["0A 80 80", [10, 65533, 65533]],
'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\UTF8::encode
*/
public function testEncodeCodePoints(int $input, $exp) {
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
}
$out = UTF8::encode($input);
$this->assertSame(bin2hex($exp), bin2hex($out));
* @covers MensBeam\Intl\Encoding\UTF8::errEnc
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\UTF8::encode
* @covers MensBeam\Intl\Encoding\UTF8::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::__construct
* @covers MensBeam\Intl\Encoding\UTF8::nextCode
*/
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
$s = new UTF8($input);
$out = [];
while (($p = $s->nextCode()) !== false) {
$out[] = $p;
}
$this->assertEquals($exp, $out);
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::__construct
* @covers MensBeam\Intl\Encoding\UTF8::nextChar
*/
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
$out = [];
$exp = array_map(function($v) {
return \IntlChar::chr($v);
}, $exp);
$s = new UTF8($input);
while (($c = $s->nextChar()) !== "") {
$out[] = $c;
}
$this->assertEquals($exp, $out);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::rewind
* @covers MensBeam\Intl\Encoding\UTF8::chars
* @covers MensBeam\Intl\Encoding\UTF8::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
$out = [];
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
foreach ($s->codes() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame($exp[$a], $p, "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
$exp = array_map(function($v) {
return \IntlChar::chr($v);
}, $exp);
foreach ($s->chars() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame(bin2hex($exp[$a]), bin2hex($p), "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::sync
*/
public function testSTepBackThroughAString(string $input, array $points) {
$s = new UTF8($input);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
while (($p1 = $s->nextCode()) !== false) {
$this->assertSame(0, $s->seek(-1));
$p2 = $s->nextCode();
$this->assertSame($p1, $p2, "Mismatch at character position $a");
$this->assertSame(++$a, $s->posChar(), "Character position should be $a");
}
* @covers MensBeam\Intl\Encoding\UTF8::seekBack
*/
public function testSTepBackThroughAString(string $input, array $exp) {
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::seek
* @covers MensBeam\Intl\Encoding\UTF8::posChar
* @covers MensBeam\Intl\Encoding\UTF8::posByte
*/
* @covers MensBeam\Intl\Encoding\UTF8::rewind
*/
public function testSeekThroughAString() {
/*
Char 0 U+007A (1 byte) Offset 0
Char 1 U+00A2 (2 bytes) Offset 1
Char 2 U+6C34 (3 bytes) Offset 3
Char 3 U+1D11E (4 bytes) Offset 6
Char 4 U+F8FF (3 bytes) Offset 10
Char 5 U+10FFFD (4 bytes) Offset 13
Char 6 U+FFFE (3 bytes) Offset 17
End of string at char 7, offset 20
*/
$input = "\x7A\xC2\xA2\xE6\xB0\xB4\xF0\x9D\x84\x9E\xEF\xA3\xBF\xF4\x8F\xBF\xBD\xEF\xBF\xBE";
$s = new UTF8($input);
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertSame(0, $s->seek(0));
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertSame(1, $s->seek(-1));
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertSame(0, $s->seek(1));
$this->assertSame(1, $s->posChar());
$this->assertSame(1, $s->posByte());
$this->assertSame(0, $s->seek(2));
$this->assertSame(3, $s->posChar());
$this->assertSame(6, $s->posByte());
$this->assertSame(0, $s->seek(4));
$this->assertSame(7, $s->posChar());
$this->assertSame(20, $s->posByte());
$this->assertSame(1, $s->seek(1));
$this->assertSame(7, $s->posChar());
$this->assertSame(20, $s->posByte());
$this->assertSame(0, $s->seek(-3));
$this->assertSame(4, $s->posChar());
$this->assertSame(10, $s->posByte());
$this->assertSame(6, $s->seek(-10));
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
return parent::testSeekThroughAString();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::posChar
* @covers MensBeam\Intl\Encoding\UTF8::posByte
*/
* @covers MensBeam\Intl\Encoding\UTF8::eof
*/
public function testTraversePastTheEndOfAString() {
$s = new UTF8("a");
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertSame("a", $s->nextChar());
$this->assertSame(1, $s->posChar());
$this->assertSame(1, $s->posByte());
$this->assertSame("", $s->nextChar());
$this->assertSame(1, $s->posChar());
$this->assertSame(1, $s->posByte());
$s = new UTF8("a");
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertSame(ord("a"), $s->nextCode());
$this->assertSame(1, $s->posChar());
$this->assertSame(1, $s->posByte());
$this->assertSame(false, $s->nextCode());
$this->assertSame(1, $s->posChar());
$this->assertSame(1, $s->posByte());
return parent::testTraversePastTheEndOfAString();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::peekChar
*/
* @covers MensBeam\Intl\Encoding\UTF8::stateSave
* @covers MensBeam\Intl\Encoding\UTF8::stateApply
*/
public function testPeekAtCharacters() {
/*
Char 0 U+007A (1 byte) Offset 0
Char 1 U+00A2 (2 bytes) Offset 1
Char 2 U+6C34 (3 bytes) Offset 3
Char 3 U+1D11E (4 bytes) Offset 6
Char 4 U+F8FF (3 bytes) Offset 10
Char 5 U+10FFFD (4 bytes) Offset 13
Char 6 U+FFFE (3 bytes) Offset 17
End of string at char 7, offset 20
*/
$input = "\x7A\xC2\xA2\xE6\xB0\xB4\xF0\x9D\x84\x9E\xEF\xA3\xBF\xF4\x8F\xBF\xBD\xEF\xBF\xBE";
$s = new UTF8($input);
$s->seek(2);
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$this->assertSame(bin2hex("\u{6C34}"), bin2hex($s->peekChar()));
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$this->assertSame(bin2hex("\u{6C34}\u{1D11E}"), bin2hex($s->peekChar(2)));
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$s->seek(3);
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
$this->assertSame(bin2hex("\u{10FFFD}\u{FFFE}"), bin2hex($s->peekChar(3)));
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
$this->assertSame("", $s->peekChar(-5));
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
return parent::testPeekAtCharacters();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::peekCode
*/
* @covers MensBeam\Intl\Encoding\UTF8::stateSave
* @covers MensBeam\Intl\Encoding\UTF8::stateApply
*/
public function testPeekAtCodePoints() {
/*
Char 0 U+007A (1 byte) Offset 0
Char 1 U+00A2 (2 bytes) Offset 1
Char 2 U+6C34 (3 bytes) Offset 3
Char 3 U+1D11E (4 bytes) Offset 6
Char 4 U+F8FF (3 bytes) Offset 10
Char 5 U+10FFFD (4 bytes) Offset 13
Char 6 U+FFFE (3 bytes) Offset 17
End of string at char 7, offset 20
*/
$input = "\x7A\xC2\xA2\xE6\xB0\xB4\xF0\x9D\x84\x9E\xEF\xA3\xBF\xF4\x8F\xBF\xBD\xEF\xBF\xBE";
$s = new UTF8($input);
$s->seek(2);
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$this->assertSame([0x6C34], $s->peekCode());
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$this->assertSame([0x6C34, 0x1D11E], $s->peekCode(2));
$this->assertSame(2, $s->posChar());
$this->assertSame(3, $s->posByte());
$s->seek(3);
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
$this->assertSame([0x10FFFD, 0xFFFE], $s->peekCode(3));
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
$this->assertSame([], $s->peekCode(-5));
$this->assertSame(5, $s->posChar());
$this->assertSame(13, $s->posByte());
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::len
* @covers MensBeam\Intl\Encoding\UTF8::lenChar
* @covers MensBeam\Intl\Encoding\UTF8::lenByte
* @covers MensBeam\Intl\Encoding\UTF8::stateSave
* @covers MensBeam\Intl\Encoding\UTF8::stateApply
*/
*/
public function testGetStringLength(string $input, array $points) {
$s = new UTF8($input);
$s->seek(1);
$posChar = $s->posChar();
$posByte = $s->posByte();
$this->assertSame(sizeof($points), $s->len());
$this->assertSame($posChar, $s->posChar());
$this->assertSame($posByte, $s->posByte());
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::err
*/
* @covers MensBeam\Intl\Encoding\UTF8::errDec
*/
public function testReplacementModes() {
$input = "\x30\xFF\x30";
// officially test replacement characters and null replacement (already effectively tested by other tests)
$s = new UTF8($input, false);
$s->seek(1);
$this->assertSame(0xFFFD, $s->nextCode());
$s->seek(-2);
// test fatal mode
$s = new UTF8($input, true);
$s->seek(1);
try {
$p = $s->nextCode();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(2, $s->posChar());
$this->assertSame(0x30, $s->nextCode());
$s->seek(-2);
$this->assertSame(1, $s->posChar());
try {
$p = $s->peekCode();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $s->posChar());
try {
$p = $s->peekChar();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $s->posChar());
return parent::testReplacementModes();
}
public function provideCodePoints() {
return [
"122" => [122, "\x7A"],
"162" => [162, "\xC2\xA2"],
"27700" => [27700, "\xE6\xB0\xB4"],
"119070" => [119070, "\xF0\x9D\x84\x9E"],
"63743" => [63743, "\xEF\xA3\xBF"],
"1114109" => [1114109, "\xF4\x8F\xBF\xBD"],
"65534" => [65534, "\xEF\xBF\xBE"],
"-1" => [-1, new EncoderException("", UTF8::E_INVALID_CODE_POINT)],
"1114112" => [1114112, new EncoderException("", UTF8::E_INVALID_CODE_POINT)],
];
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::rewind
* @covers MensBeam\Intl\Encoding\UTF8::chars
* @covers MensBeam\Intl\Encoding\UTF8::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
public function provideStrings() {
return [
// control samples
'empty string' => ["", []],
'sanity check' => ["\x61\x62\x63\x31\x32\x33", [97, 98, 99, 49, 50, 51]],
'multibyte control' => ["\xE5\x8F\xA4\xE6\xB1\xA0\xE3\x82\x84\xE8\x9B\x99\xE9\xA3\x9B\xE3\x81\xB3\xE8\xBE\xBC\xE3\x82\x80\xE6\xB0\xB4\xE3\x81\xAE\xE9\x9F\xB3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]],
'mixed sample' => ["\x7A\xC2\xA2\xE6\xB0\xB4\xF0\x9D\x84\x9E\xEF\xA3\xBF\xF4\x8F\xBF\xBD\xEF\xBF\xBE", [122, 162, 27700, 119070, 63743, 1114109, 65534]],
// various invalid sequences
'invalid code' => ["\xFF", [65533]],
'ends early' => ["\xC0", [65533]],
'ends early 2' => ["\xE0", [65533]],
'invalid trail' => ["\xC0\x00", [65533, 0]],
'invalid trail 2' => ["\xC0\xC0", [65533, 65533]],
'invalid trail 3' => ["\xE0\x00", [65533, 0]],
'invalid trail 4' => ["\xE0\xC0", [65533, 65533]],
'invalid trail 5' => ["\xE0\x80\x00", [65533, 65533, 0]],
'invalid trail 6' => ["\xE0\x80\xC0", [65533, 65533, 65533]],
'> 0x10FFFF' => ["\xFC\x80\x80\x80\x80\x80", [65533, 65533, 65533, 65533, 65533, 65533]],
'obsolete lead byte' => ["\xFE\x80\x80\x80\x80\x80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 2 bytes' => ["\xC0\x80", [65533, 65533]],
'overlong U+0000 - 3 bytes' => ["\xE0\x80\x80", [65533, 65533, 65533]],
'overlong U+0000 - 4 bytes' => ["\xF0\x80\x80\x80", [65533, 65533, 65533, 65533]],
'overlong U+0000 - 5 bytes' => ["\xF8\x80\x80\x80\x80", [65533, 65533, 65533, 65533, 65533]],
'overlong U+0000 - 6 bytes' => ["\xFC\x80\x80\x80\x80\x80", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 2 bytes' => ["\xC1\xBF", [65533, 65533]],
'overlong U+007F - 3 bytes' => ["\xE0\x81\xBF", [65533, 65533, 65533]],
'overlong U+007F - 4 bytes' => ["\xF0\x80\x81\xBF", [65533, 65533, 65533, 65533]],
'overlong U+007F - 5 bytes' => ["\xF8\x80\x80\x81\xBF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+007F - 6 bytes' => ["\xFC\x80\x80\x80\x81\xBF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 3 bytes' => ["\xE0\x9F\xBF", [65533, 65533, 65533]],
'overlong U+07FF - 4 bytes' => ["\xF0\x80\x9F\xBF", [65533, 65533, 65533, 65533]],
'overlong U+07FF - 5 bytes' => ["\xF8\x80\x80\x9F\xBF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+07FF - 6 bytes' => ["\xFC\x80\x80\x80\x9F\xBF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 4 bytes' => ["\xF0\x8F\xBF\xBF", [65533, 65533, 65533, 65533]],
'overlong U+FFFF - 5 bytes' => ["\xF8\x80\x8F\xBF\xBF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+FFFF - 6 bytes' => ["\xFC\x80\x80\x8F\xBF\xBF", [65533, 65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 5 bytes' => ["\xF8\x84\x8F\xBF\xBF", [65533, 65533, 65533, 65533, 65533]],
'overlong U+10FFFF - 6 bytes' => ["\xFC\x80\x84\x8F\xBF\xBF", [65533, 65533, 65533, 65533, 65533, 65533]],
// UTF-16 surrogates
'lead surrogate' => ["\xED\xA0\x80", [65533, 65533, 65533]],
'trail surrogate' => ["\xED\xB0\x80", [65533, 65533, 65533]],
'surrogate pair' => ["\xED\xA0\x80\xED\xB0\x80", [65533, 65533, 65533, 65533, 65533, 65533]],
// self-sync edge cases
'trailing continuation' => ["\x0A\x80\x80", [10, 65533, 65533]],
'trailing continuation 2' => ["\xE5\x8F\xA4\x80", [21476, 65533]],
];
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\UTF8::nextCode
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::seekBack
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @covers MensBeam\Intl\Encoding\UTF8::asciiSpanNot
*/
public function testExtractNegativeAsciiSpans() {
parent::testExtractNegativeAsciiSpans();
}
}

202
tests/cases/Encoding/TestXUserDefined.php

@ -0,0 +1,202 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase\Encoding;
use MensBeam\Intl\Encoding\XUserDefined;
use MensBeam\Intl\Encoding\Coder;
use MensBeam\Intl\Encoding\EncoderException;
class TestXUserDefined extends \MensBeam\Intl\Test\CoderDecoderTest {
protected $testedClass = XUserDefined::class;
/* X-user-defined doesn't have complex seeking, so this string is generic */
protected $seekString = "30 31 32 33 34 35 36";
protected $seekCodes = [0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36];
protected $seekOffsets = [0, 1, 2, 3, 4, 5, 6, 7];
/* This string is supposed to contain an invalid character sequence sandwiched between two null characters, but x-user-defined has no invalid characters */
protected $brokenChar = "";
/* This string conatins the ASCII characters "A" and "Z" followed by two arbitrary non-ASCII characters, followed by the two ASCII characters "0" and "9" */
protected $spanString = "41 5A 80 FF 30 39";
public function provideCodePoints() {
return [
'U+0064 (HTML)' => [false, 0x64, "64"],
'U+0064 (fatal)' => [true, 0x64, "64"],
'U+F780 (HTML)' => [false, 0xF780, "80"],
'U+F780 (fatal)' => [true, 0xF780, "80"],
'U+F7FF (HTML)' => [false, 0xF7FF, "FF"],
'U+F7FF (fatal)' => [true, 0xF7FF, "FF"],
'U+00CA (HTML)' => [false, 0xCA, bin2hex("&#202;")],
'U+00CA (fatal)' => [true, 0xCA, new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)],
'-1 (HTML)' => [false, -1, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'-1 (fatal)' => [true, -1, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (HTML)' => [false, 0x110000, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
'0x110000 (fatal)' => [true, 0x110000, new EncoderException("", Coder::E_INVALID_CODE_POINT)],
];
}
public function provideStrings() {
$a_bytes = [];
$a_codes = [];
for ($a = 0; $a < 0x80; $a++) {
$a_bytes[] = strtoupper(bin2hex(chr($a)));
$a_codes[] = $a;
}
$p_bytes = [];
$p_codes = [];
for ($a = 0; $a < 0x80; $a++) {
$p_bytes[] = strtoupper(bin2hex(chr(0x80 + $a)));
$p_codes[] = 0xF780 + $a;
}
$a_bytes = implode(" ", $a_bytes);
$p_bytes = implode(" ", $p_bytes);
return [
'empty string' => ["", []],
'ASCI bytes' => [$a_bytes, $a_codes],
'private-use bytes' => [$p_bytes, $p_codes],
];
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\Encoder
* @covers MensBeam\Intl\Encoding\XUserDefined::encode
* @covers MensBeam\Intl\Encoding\XUserDefined::errEnc
*/
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
return parent::testEncodeCodePoints($fatal, $input, $exp);
}
/**
* @dataProvider provideCodePoints
* @covers MensBeam\Intl\Encoding\XUserDefined::encode
* @covers MensBeam\Intl\Encoding\XUserDefined::errEnc
*/
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
return parent::testEncodeCodePointsStatically($fatal, $input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::__construct
* @covers MensBeam\Intl\Encoding\XUserDefined::nextCode
*/
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsCodePoints($input, $exp);
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::__construct
* @covers MensBeam\Intl\Encoding\XUserDefined::nextChar
*/
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
return parent::testDecodeMultipleCharactersAsStrings($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testSTepBackThroughAString(string $input, array $exp) {
// this test has no meaning for x-user-defined
return parent::testSTepBackThroughAString($input, $exp);
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::seek
* @covers MensBeam\Intl\Encoding\XUserDefined::posChar
* @covers MensBeam\Intl\Encoding\XUserDefined::posByte
* @covers MensBeam\Intl\Encoding\XUserDefined::rewind
*/
public function testSeekThroughAString() {
return parent::testSeekThroughAString();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::posChar
* @covers MensBeam\Intl\Encoding\XUserDefined::posByte
* @covers MensBeam\Intl\Encoding\XUserDefined::eof
*/
public function testTraversePastTheEndOfAString() {
return parent::testTraversePastTheEndOfAString();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::peekChar
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testPeekAtCharacters() {
return parent::testPeekAtCharacters();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::peekCode
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testPeekAtCodePoints() {
return parent::testPeekAtCodePoints();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::lenChar
* @covers MensBeam\Intl\Encoding\XUserDefined::lenByte
* @covers MensBeam\Intl\Encoding\XUserDefined::stateSave
* @covers MensBeam\Intl\Encoding\XUserDefined::stateApply
*/
public function testGetStringLength(string $input, array $points) {
return parent::testGetStringLength($input, $points);
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::errDec
*/
public function testReplacementModes() {
return parent::testReplacementModes();
}
/**
* @dataProvider provideStrings
* @covers MensBeam\Intl\Encoding\XUserDefined::rewind
* @covers MensBeam\Intl\Encoding\XUserDefined::chars
* @covers MensBeam\Intl\Encoding\XUserDefined::codes
*/
public function testIterateThroughAString(string $input, array $exp) {
return parent::testIterateThroughAString($input, $exp);
}
/**
* @dataProvider provideStrings
* @coversNothing
*/
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
return parent::testIterateThroughAStringAllowingSurrogates($input, $strictExp, $relaxedExp);
}
/**
* @coversNothing
*/
public function testSeekBackOverRandomData() {
return parent::testSeekBackOverRandomData();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::asciiSpan
*/
public function testExtractAsciiSpans() {
parent::testExtractAsciiSpans();
}
/**
* @covers MensBeam\Intl\Encoding\XUserDefined::asciiSpanNot
*/
public function testExtractNegativeAsciiSpans() {
parent::testExtractNegativeAsciiSpans();
}
}

92
tests/cases/TestEncoding.php

@ -0,0 +1,92 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\TestCase;
use MensBeam\Intl\Encoding;
use MensBeam\Intl\Encoding\Encoder;
use MensBeam\Intl\Encoding\UTF16BE;
use MensBeam\Intl\Encoding\UTF16LE;
use MensBeam\Intl\Encoding\UTF8;
class TestEncoding extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideLabelData */
public function testMatchALabelToAnEncoding(string $label, array $exp) {
$this->assertSame($exp, Encoding::matchLabel($label));
$this->assertSame($exp, Encoding::matchLabel(strtoupper($label)));
$this->assertSame($exp, Encoding::matchLabel(" $label\n\n\r\t"));
}
public function testFailToMatchALabelToAnEncoding() {
$this->assertNull(Encoding::matchLabel("Not a label"));
}
/** @dataProvider provideLabelData */
public function testCreateADecoderFromALabel(string $label, array $data) {
$this->assertInstanceOf($data['class'], Encoding::createDecoder($label, ""));
$this->assertInstanceOf($data['class'], Encoding::createDecoder(strtoupper($label), ""));
$this->assertInstanceOf($data['class'], Encoding::createDecoder(" $label\n\n\r\t", ""));
}
/** @dataProvider provideBOMSniffings */
public function testCreateADecoderWhileSniffingBOM(string $label, string $string, string $class) {
$this->assertInstanceOf($class, Encoding::createDecoder($label, $string));
}
public function testFailToCreateADecoderFromALabel() {
$this->assertNull(Encoding::createDecoder("Not a label", ""));
}
/** @dataProvider provideLabelData */
public function testCreateAnEncoderFromALabel(string $label, array $data) {
if ($data['encoder']) {
$this->assertInstanceOf(Encoder::class, Encoding::createEncoder($label));
$this->assertInstanceOf(Encoder::class, Encoding::createEncoder(strtoupper($label)));
$this->assertInstanceOf(Encoder::class, Encoding::createEncoder(" $label\n\n\r\t"));
} else {
$this->assertNull(Encoding::createEncoder($label));
$this->assertNull(Encoding::createEncoder(strtoupper($label)));
$this->assertNull(Encoding::createEncoder(" $label\n\n\r\t"));
}
}
public function testFailToCreateAnEncoderFromALabel() {
$this->assertNull(Encoding::createEncoder("Not a label"));
}
public function provideLabelData() {
$ns = "MensBeam\\Intl\\Encoding\\";
$labels = [];
$names = [];
foreach (new \GlobIterator(\MensBeam\Intl\BASE."/lib/Encoding/*.php", \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
$file = basename($file, ".php");
$className = $ns.$file;
$class = new \ReflectionClass($className);
if ($class->implementsInterface(\MensBeam\Intl\Encoding\Decoder::class) && $class->isInstantiable()) {
$name = $class->getConstant("NAME");
$names[$name] = $className;
foreach ($class->getConstant("LABELS") as $label) {
$labels[$label] = $name;
}
}
}
foreach ($labels as $label => $name) {
$class = $names[$name];
$encoder = !in_array($name, ["UTF-16LE", "UTF-16BE", "replacement"]);
yield [(string) $label, ['label' => (string) $label, 'name' => $name, 'class' => $class, 'encoder' => $encoder]];
}
}
public function provideBOMSniffings() {
return [
'No BOM' => ["UTF-8", "Hello world!", UTF8::class],
'UTF-8 BOM' => ["Shift_JIS", "\xEF\xBB\xBFA", UTF8::class],
'UTF-16BE BOM' => ["UTF-8", "\xFE\xFF\x00A", UTF16BE::class],
'UTF-16LE BOM' => ["UTF-8", "\xFF\xFEA\x00", UTF16LE::class],
'GB18030 BOM' => ["UTF-8", "\x84\x31\x95\x33A", UTF8::class],
];
}
}

48
tests/lib/CoderDecoderTest.php

@ -0,0 +1,48 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Test;
use \MensBeam\Intl\Encoding\Encoder;
abstract class CoderDecoderTest extends DecoderTest {
public function testEncodeCodePoints(bool $fatal, $input, $exp) {
$class = $this->testedClass;
$label = $class::NAME;
$e = new Encoder($label, $fatal);
$input = (array) $input;
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
$out = $e->encode($input);
$this->assertSame($exp, bin2hex($out));
$out = "";
foreach ($input as $c) {
$out .= $e->encodeChar($c);
}
$out .= $e->finalize();
$this->assertSame($exp, bin2hex($out));
}
public function testEncodeCodePointsStatically(bool $fatal, $input, $exp) {
$class = $this->testedClass;
if (!method_exists($class, "encode")) {
$this->assertTrue(true);
return;
}
if ($exp instanceof \Throwable) {
$this->expectException(get_class($exp));
$this->expectExceptionCode($exp->getCode());
} else {
$exp = strtolower(str_replace(" ", "", $exp));
}
$out = $class::encode($input, $fatal);
$this->assertSame($exp, bin2hex($out));
}
}

404
tests/lib/DecoderTest.php

@ -0,0 +1,404 @@
<?php
/** @license MIT
* Copyright 2018 J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\Intl\Test;
use MensBeam\Intl\Encoding\DecoderException;
use MensBeam\Intl\Encoding\ISO2022JP;
use MensBeam\Intl\Encoding\UTF16BE;
use MensBeam\Intl\Encoding\UTF16LE;
use MensBeam\Intl\Encoding\UTF8;
abstract class DecoderTest extends \PHPUnit\Framework\TestCase {
protected $random = "L51yGwEFuatjbZi7wgNC80qYncvauVm1Lh8vCSK/KJs6QxoynMU8TCamx5TNhbjeh5VpWqQ0Q1j/W6u4O/InxBDxk8g83azJFQHzU+L7Npk0bkdofFv2AHDI2SUlXotYeEOnkKa/c6eQiDk8NapS0LGnb64ypKASacAMp6s2wSUU03l6iVVapHsNBgYs0cD++vnG8ckgbGsV3KkE3Lh601u6jviDyeRwbTxLZcUfSS2uIzrvvGWFfw6D4/FOa3uTR1k2Ya6jT+T/F+OdMgWlUPouuAVgLuvFxj9v9ZBnI+FAFc0kX4aT/JoTuBGMm8YS4xPVvczdrPXCUijML5TZrU201uFqeB9LDDWULp1Ai9d41fcD/8GBFrzlpXPIV+hsSJ4HvWswXdDeVKWgSMrQ78pf+zwvD66TA4FjMiEsLLpf9bb+mPiS2Aa3BP0JpjPwi0gdBu8QipLXNGFUUGW/15jGlj3eNynELRAtvyYZnoYIYShsN1TIU+buw8hHOp9iKsKT+fqPaEuuLLtlJ/cqhcxaZhbaWRB6vCQW9mO7f8whl7cpbBOO+NwDDCJZCsULh7rINF2omkexfOZzQSt/LC3yw+Pzqrf5Pmp5YgpMvoNgHcY1FkpsHc48IHMsJ+gex2zltIG51TQBAhy/fWF0KIqd+IPT+qngVGYIw/WuXj0LaK7XIVp33tc6fzuXNv+GUzYwpv4k9ry8R/DW8EX572FXFA49HHxbytSIJLD/+KpE2CE1WOr3ONwOXm6WduUBmFi4bwlRrCKnHqnFtLztVdLwMOauFa8N822XoAnWvHs+8R1DLHtgUyZas3ktp/qjMp5oVsb2PO+VpPFHIighHySgljrPl+sKaPULh7P/rAHXOuS9p9zTZKHrQ4nccl8SnYZlHKdioWo1NK5LRZB0PXYH8Ytu8aWVBmb4lAlpAFbSTqtOhydUJ/lyM29STG5mTV3rbG6tWMsUXBpaX4PrGCnhj40RVdz0BzsgvzLu4PNI+s3TJ6ZKV4hGS5on040xMDC2423DpKHPNa7mbl7J036dFt0JcYeGu07maGxssJnwLbebg5cm36Ecea7cTBWEGFMqiFjLoBEu0Y2CfF/GEbwqOf55/p1ewaZMrunFKd/Mj89qyYU5bp6mwmXSwj10psAA+qtXYm3XzRrLHKfCuiukyPEtvI+RdjbQDtMP1vF5qkmjlQLHXvEDpviJMaqvIPkjGrZkvAej1JX5yka50z0od9LLz8TIernjLLoVZ+cWtpd3kchO6w+zTpIOups4HdD66zaiPJrXIrJwi5bIgwTOWLhVs3ufZ0loFjlWWUh5FlTW+oWl1AD4h/yPBHWglqfMaTTqH75B4XEriy+Bw9k=";
protected $lowerA = "a";
public function testDecodeMultipleCharactersAsCodePoints(string $input, array $exp) {
$class = $this->testedClass;
$input = $this->prepString($input);
$s = new $class($input);
$out = [];
$a = 0;
$this->assertSame($a, $s->posChar());
while (($p = $s->nextCode()) !== false) {
$this->assertSame(++$a, $s->posChar());
$out[] = $p;
}
$this->assertSame($exp, $out);
$this->assertSame(strlen($input), $s->posByte());
}
public function testDecodeMultipleCharactersAsStrings(string $input, array $exp) {
$class = $this->testedClass;
$exp = array_map(function($v) {
return \IntlChar::chr($v);
}, $exp);
$input = $this->prepString($input);
$s = new $class($input);
$out = [];
while (($p = $s->nextChar()) !== "") {
$out[] = $p;
}
$this->assertSame($exp, $out);
$this->assertSame(strlen($input), $s->posByte());
}
public function testSTepBackThroughAString(string $input, array $exp) {
$class = $this->testedClass;
$input = $this->prepString($input);
$s = new $class($input);
$exp = array_reverse($exp);
$act = [];
$pos = 0;
while ($s->nextCode() !== false) {
$this->assertSame(++$pos, $s->posChar());
}
$this->assertSame(sizeof($exp), $pos);
while ($s->posChar()) {
$this->assertSame(0, $s->seek(-1), "Error stepping back to position ".($pos - 1));
$this->assertSame(--$pos, $s->posChar());
$act[] = $s->nextCode();
$s->seek(-1);
}
$this->assertEquals($exp, $act);
}
public function testSeekThroughAString() {
$class = $this->testedClass;
if (!$this->seekString) {
$this->markTestSkipped();
return;
}
$input = $this->prepString($this->seekString);
$off = $this->seekOffsets;
$s = new $class($input);
$bom = [
UTF8::class => 3,
UTF16BE::class => 2,
UTF16LE::class => 2,
][$this->testedClass] ?? 0;
$this->assertSame(0, $s->posChar());
$this->assertSame($bom, $s->posByte());
$this->assertSame(0, $s->seek(0));
$this->assertSame(0, $s->posChar());
$this->assertSame($bom, $s->posByte());
$this->assertSame(1, $s->seek(-1));
$this->assertSame(0, $s->posChar());
$this->assertSame($bom, $s->posByte());
$this->assertSame(0, $s->seek(1));
$this->assertSame(1, $s->posChar());
$this->assertSame($off[1], $s->posByte());
$this->assertSame(0, $s->seek(2));
$this->assertSame(3, $s->posChar());
$this->assertSame($off[3], $s->posByte());
$this->assertSame(0, $s->seek(4));
$this->assertSame(7, $s->posChar());
$this->assertSame($off[7], $s->posByte());
$this->assertSame(1, $s->seek(1));
$this->assertSame(7, $s->posChar());
if ($this->testedClass !== ISO2022JP::class) {
$this->assertSame($off[7], $s->posByte());
} else {
$this->assertSame($off[7] + 3, $s->posByte());
}
$this->assertSame(0, $s->seek(-3));
$this->assertSame(4, $s->posChar());
$this->assertSame($off[4], $s->posByte());
$this->assertSame(6, $s->seek(-10));
$this->assertSame(0, $s->posChar());
$this->assertSame($bom, $s->posByte());
$this->assertSame(0, $s->seek(5));
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
$s->rewind();
$this->assertSame(0, $s->posChar());
$this->assertSame($bom, $s->posByte());
}
public function testTraversePastTheEndOfAString() {
$class = $this->testedClass;
$s = new $class($this->lowerA);
$l = strlen($this->lowerA);
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertFalse($s->eof());
$this->assertSame("a", $s->nextChar());
$this->assertSame(1, $s->posChar());
$this->assertSame($l, $s->posByte());
$this->assertTrue($s->eof());
$this->assertSame("", $s->nextChar());
$this->assertSame(1, $s->posChar());
$this->assertSame($l, $s->posByte());
$this->assertTrue($s->eof());
$s = new $class($this->lowerA);
$this->assertSame(0, $s->posChar());
$this->assertSame(0, $s->posByte());
$this->assertFalse($s->eof());
$this->assertSame(ord("a"), $s->nextCode());
$this->assertSame(1, $s->posChar());
$this->assertSame($l, $s->posByte());
$this->assertTrue($s->eof());
$this->assertSame(false, $s->nextCode());
$this->assertSame(1, $s->posChar());
$this->assertSame($l, $s->posByte());
$this->assertTrue($s->eof());
}
public function testPeekAtCharacters() {
$class = $this->testedClass;
if (!$this->seekString) {
$this->markTestSkipped();
return;
}
$input = $this->prepString($this->seekString);
$off = $this->seekOffsets;
$codes = $this->seekCodes;
$s = new $class($input);
$s->seek(2);
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$this->assertSame(bin2hex(\IntlChar::chr($codes[2])), bin2hex($s->peekChar()));
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$this->assertSame(bin2hex(\IntlChar::chr($codes[2]).\IntlChar::chr($codes[3])), bin2hex($s->peekChar(2)));
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$s->seek(3);
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
$this->assertSame(bin2hex(\IntlChar::chr($codes[5]).\IntlChar::chr($codes[6])), bin2hex($s->peekChar(3)));
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
$this->assertSame("", $s->peekChar(-5));
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
}
public function testPeekAtCodePoints() {
$class = $this->testedClass;
if (!$this->seekString) {
$this->markTestSkipped();
return;
}
$input = $this->prepString($this->seekString);
$off = $this->seekOffsets;
$codes = $this->seekCodes;
$s = new $class($input);
$s->seek(2);
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$this->assertSame([$codes[2]], $s->peekCode());
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$this->assertSame([$codes[2], $codes[3]], $s->peekCode(2));
$this->assertSame(2, $s->posChar());
$this->assertSame($off[2], $s->posByte());
$s->seek(3);
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
$this->assertSame([$codes[5], $codes[6]], $s->peekCode(3));
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
$this->assertSame([], $s->peekCode(-5));
$this->assertSame(5, $s->posChar());
$this->assertSame($off[5], $s->posByte());
}
public function testGetStringLength(string $input, array $points) {
$class = $this->testedClass;
$input = $this->prepString($input);
$s = new $class($input);
$s->seek(1);
$posChar = $s->posChar();
$posByte = $s->posByte();
$this->assertSame(sizeof($points), $s->lenChar());
$this->assertSame($posChar, $s->posChar());
$this->assertSame($posByte, $s->posByte());
$this->assertSame(strlen($input), $s->lenByte());
$this->assertSame($posChar, $s->posChar());
$this->assertSame($posByte, $s->posByte());
}
public function testReplacementModes() {
if (!$this->brokenChar) {
// decoder for this encoding never produces errors
$this->assertTrue(true);
return;
}
$class = $this->testedClass;
$input = $this->prepString($this->brokenChar);
// officially test replacement characters (already effectively tested by other tests)
$s = new $class($input, false);
$s->seek(1);
$this->assertSame(0xFFFD, $s->nextCode());
$s->seek(-2);
// test fatal mode
$s = new $class($input, true);
$s->seek(1);
try {
$p = $s->nextCode();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(2, $s->posChar());
$this->assertSame(0x00, $s->nextCode());
$this->assertSame(3, $s->posChar());
$this->assertSame(0, $s->seek(-2));
$this->assertSame(1, $s->posChar());
try {
$p = $s->peekCode();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $s->posChar());
try {
$p = $s->peekChar();
} catch (DecoderException $e) {
$p = $e;
} finally {
$this->assertInstanceOf(DecoderException::class, $p);
}
$this->assertSame(1, $s->posChar());
}
public function testIterateThroughAString(string $input, array $exp) {
$this->iterateThroughAString($input, $exp, false);
}
public function testIterateThroughAStringAllowingSurrogates(string $input, array $strictExp, array $relaxedExp = null) {
$exp = $relaxedExp ?? $strictExp;
$this->iterateThroughAString($input, $exp, true);
}
public function testSeekBackOverRandomData() {
$class = $this->testedClass;
$bytes = base64_decode($this->random);
$i = new $class($bytes);
$fwd = [];
do {
$fwd[] = [$i->posByte(), $i->nextCode()];
} while ($i->posByte() < strlen($bytes));
while (sizeof($fwd)) {
list($expPos, $expCode) = array_pop($fwd);
$this->assertSame(0, $i->seek(-1), "Start of string reached prematureley");
$this->assertSame($expPos, $i->posByte(), "Position desynchronized");
$this->assertSame($expCode, $i->peekCode(1)[0], "Incorrect character decoded at byte position $expPos");
}
}
protected function iterateThroughAString(string $input, array $exp, bool $allowSurrogates) {
$class = $this->testedClass;
$input = $this->prepString($input);
$s = new $class($input, false, $allowSurrogates);
$a = 0;
$this->assertTrue(true); // prevent risky test of empty string
foreach ($s->codes() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame($exp[$a], $p, "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->codes() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
$exp = array_map(function($v) {
return \IntlChar::chr($v);
}, $exp);
foreach ($s->chars() as $index => $p) {
$this->assertSame($a, $index, "Character key at index $a reported incorrectly");
$this->assertSame(bin2hex($exp[$a]), bin2hex($p), "Character at index $a decoded incorrectly");
$a++;
}
$a = 0;
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(0, $a);
$s->rewind();
foreach ($s->chars() as $p) {
$a++;
}
$this->assertSame(sizeof($exp), $a);
}
public function testExtractAsciiSpans() {
$allBytes = $this->allBytes();
$class = $this->testedClass;
$d = new $class($this->prepString($this->spanString));
$this->assertSame("", $d->asciiSpan("az"));
$this->assertSame("A", $d->asciiSpan("AZ", 1));
$this->assertSame("Z", $d->asciiSpan("AZ"));
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("", $d->asciiSpan($allBytes));
$d->nextChar();
$this->assertSame("09", $d->asciiSpan($allBytes));
}
public function testExtractNegativeAsciiSpans() {
$class = $this->testedClass;
$d = new $class($this->prepString($this->spanString));
$this->assertSame("", $d->asciiSpanNot("AZ"));
$this->assertSame("A", $d->asciiSpanNot("az", 1));
$this->assertSame("Z", $d->asciiSpanNot("az"));
$this->assertSame("", $d->asciiSpanNot(""));
$d->nextChar();
$this->assertSame("", $d->asciiSpanNot(""));
$d->nextChar();
$this->assertSame("09", $d->asciiSpanNot(""));
}
protected function prepString(string $str): string {
return hex2bin(str_replace(" ", "", $str));
}
protected function allBytes(): string {
$out = "";
for ($a = 0x00; $a <= 0xFF; $a++) {
$out .= chr($a);
}
return $out;
}
}

15
tests/phpunit.xml

@ -7,7 +7,6 @@
convertWarningsToExceptions="false"
beStrictAboutTestsThatDoNotTestAnything="true"
beStrictAboutOutputDuringTests="true"
beStrictAboutTestSize="true"
stopOnError="true">
<filter>
@ -17,8 +16,20 @@
</filter>
<testsuites>
<testsuite name="UTF-8">
<testsuite name="Encoding">
<file>cases/TestEncoding.php</file>
<file>cases/Encoding/TestUTF8.php</file>
<file>cases/Encoding/TestUTF16LE.php</file>
<file>cases/Encoding/TestUTF16BE.php</file>
<file>cases/Encoding/TestSingleByte.php</file>
<file>cases/Encoding/TestXUserDefined.php</file>
<file>cases/Encoding/TestEUCJP.php</file>
<file>cases/Encoding/TestGB18030.php</file>
<file>cases/Encoding/TestBig5.php</file>
<file>cases/Encoding/TestEUCKR.php</file>
<file>cases/Encoding/TestShiftJIS.php</file>
<file>cases/Encoding/TestISO2022JP.php</file>
<file>cases/Encoding/TestReplacement.php</file>
</testsuite>
</testsuites>
</phpunit>

256
tools/mkindex.php

@ -0,0 +1,256 @@
<?php
declare(strict_types=1);
// This script produces the index lookup tables
// for a given encoding from the source data at WHATWG
$labels = [
'big5' => "big5",
'euc-jp' => "eucjp",
'euc-kr' => "euckr",
'gb18030' => "gb18030",
'ibm866' => "single_byte",
'iso-2022-jp' => "iso2022jp",
'iso-8859-10' => "single_byte",
'iso-8859-13' => "single_byte",
'iso-8859-14' => "single_byte",
'iso-8859-15' => "single_byte",
'iso-8859-16' => "single_byte",
'iso-8859-2' => "single_byte",
'iso-8859-3' => "single_byte",
'iso-8859-4' => "single_byte",
'iso-8859-5' => "single_byte",
'iso-8859-6' => "single_byte",
'iso-8859-7' => "single_byte",
'iso-8859-8' => "single_byte",
'koi8-r' => "single_byte",
'koi8-u' => "single_byte",
'macintosh' => "single_byte",
'shift-jis' => "shiftjis",
'windows-1250' => "single_byte",
'windows-1251' => "single_byte",
'windows-1252' => "single_byte",
'windows-1253' => "single_byte",
'windows-1254' => "single_byte",
'windows-1255' => "single_byte",
'windows-1256' => "single_byte",
'windows-1257' => "single_byte",
'windows-1258' => "single_byte",
'windows-874' => "single_byte",
'x-mac-cyrillic' => "single_byte",
];
$label = $argv[1] ?? "";
$label = trim(strtolower($label));
if (!isset($labels[$label])) {
die("Invalid label specified. Must be one of: ".json_encode(array_keys($labels))."\n");
}
($labels[$label])($label);
// encoding-specific output generators
function single_byte(string $label) {
$table = read_index($label, "https://encoding.spec.whatwg.org/index-$label.txt");
$dec_char = serialize_char_array($table);
$dec_code = serialize_point_array($table);
$enc = serialize_single_byte_array($table);
echo "const TABLE_DEC_CHAR = $dec_char;\n";
echo "const TABLE_DEC_CODE = $dec_code;\n";
echo "const TABLE_ENC = $enc;\n";
}
function gb18030(string $label) {
$gbk = read_index($label, "https://encoding.spec.whatwg.org/index-$label.txt");
$dec_gbk = serialize_point_array($gbk);
$enc_gbk = serialize_point_array(make_override_array($gbk));
$ranges = read_index($label, "https://encoding.spec.whatwg.org/index-$label-ranges.txt");
$dec_max = [];
$dec_off = [];
foreach ($ranges as $pointer => $code) {
// gather the range starts in one array; they will actually be used as range ends
$dec_max[] = $pointer;
// gather the starting code points in another array
$dec_off[] = $code;
}
// fudge the top of the ranges
// see https://encoding.spec.whatwg.org/#index-gb18030-ranges-code-point Step 1
// we also add 0x110000 (one beyond the top of the Unicode range) to the offsets for encoding
$penult = array_pop($dec_max);
$dec_max = array_merge($dec_max, [39420, $penult, 1237576]);
array_splice($dec_off, -1, 0, "null");
$dec_off[] = 0x110000;
$dec_max = "[".implode(",", $dec_max)."]";
$dec_off = "[".implode(",", $dec_off)."]";
echo "const TABLE_CODES = $dec_gbk;\n";
echo "const TABLE_POINTERS = $enc_gbk;\n";
echo "const TABLE_RANGES = $dec_max;\n";
echo "const TABLE_OFFSETS = $dec_off;\n";
}
function big5(string $label) {
// Big5 has unusually complex encoding requirements
// see https://encoding.spec.whatwg.org/#index-big5-pointer for particulars
$table = read_index($label, "https://encoding.spec.whatwg.org/index-$label.txt");
$specials = <<<ARRAY_LITERAL
[
1133 => [0x00CA, 0x0304],
1135 => [0x00CA, 0x030C],
1164 => [0x00EA, 0x0304],
1166 => [0x00EA, 0x030C],
]
ARRAY_LITERAL;
// split Hong Kong Supplement code points from the rest of Big5
$stop = (0xA1 - 0x81) * 157;
$hk = [];
$nhk = [];
foreach ($table as $pointer => $code) {
if ($pointer < $stop) {
$hk[$pointer] = $code;
} else {
$nhk[$pointer] = $code;
}
}
// search the Big5 rump for duplicates
$dupes = make_override_array($nhk);
// remove those duplicates which should use the last code point
foreach ([0x2550, 0x255E, 0x2561, 0x256A, 0x5341, 0x5345] as $code) {
unset($dupes[$code]);
}
// serialize and print; Hong Kong characters are kept separate as they are not used in encoding
$codes_tw = serialize_point_array($nhk);
$codes_hk = serialize_point_array($hk);
$enc = serialize_point_array($dupes);
echo "const TABLE_DOUBLES = $specials;\n";
echo "const TABLE_CODES_TW = $codes_tw;\n";
echo "const TABLE_CODES_HK = $codes_hk;\n";
echo "const TABLE_POINTERS = $enc;\n";
}
function euckr(string $label) {
$codes = serialize_point_array(read_index($label, "https://encoding.spec.whatwg.org/index-$label.txt"));
echo "const TABLE_CODES = $codes;\n";
}
function eucjp(string $label) {
$jis0212 = serialize_point_array(read_index("jis0212", "https://encoding.spec.whatwg.org/index-jis0212.txt"));
$table = read_index("jis0208", "https://encoding.spec.whatwg.org/index-jis0208.txt");
$dupes = serialize_point_array(make_override_array($table));
$jis0208 = serialize_point_array($table);
echo "const TABLE_JIS0208 = $jis0208;\n";
echo "const TABLE_JIS0212 = $jis0212;\n";
echo "const TABLE_POINTERS = $dupes;\n";
}
function iso2022jp(string $label) {
$kana = serialize_point_array(read_index("jis0208", "https://encoding.spec.whatwg.org/index-iso-2022-jp-katakana.txt"));
$table = read_index("jis0208", "https://encoding.spec.whatwg.org/index-jis0208.txt");
$dupes = serialize_point_array(make_override_array($table));
$jis0208 = serialize_point_array($table);
echo "const TABLE_JIS0208 = $jis0208;\n";
echo "const TABLE_KATAKANA = $kana;\n";
echo "const TABLE_POINTERS = $dupes;\n";
}
function shiftjis(string $label) {
$table = read_index($label, "https://encoding.spec.whatwg.org/index-jis0208.txt");
// exclude a range of pointers from encoding consideration
$dec = [];
$shared = [];
foreach ($table as $pointer => $code) {
if ($pointer < 8272 || $pointer > 8835) {
$shared[$pointer] = $code;
} else {
$dec[$pointer] = $code;
}
}
// search the encoder set for duplicates
$dupes = make_override_array($shared);
// serialize and print; the $shared set is used for both encoding and decoding; the $dec set is used only for decoding
$codes = serialize_point_array($shared);
$codes_extra = serialize_point_array($dec);
$enc = serialize_point_array($dupes);
echo "const TABLE_CODES = $codes;\n";
echo "const TABLE_CODES_EXTRA = $codes_extra;\n";
echo "const TABLE_POINTERS = $enc;\n";
}
// generic helper functions
function read_index(string $label, string $url): array {
$data = file_get_contents($url) or die("index file for '$label' could not be retrieved from network.");
// find lines that contain data
preg_match_all("/^\s*(\d+)\s+0x([0-9A-Z]+)/m", $data, $matches, \PREG_SET_ORDER);
$out = [];
foreach ($matches as list($match, $index, $code)) {
$out[(int) $index] = (int) hexdec($code);
}
return $out;
}
function serialize_point_array(array $table): string {
$out = [];
$i = 0;
foreach ($table as $index => $code) {
// non-sequential indices must be printed, but others can be omitted
if ($index === $i) {
$key = "";
} else {
$key = "$index=>";
$i = $index;
}
$out[] = $key.$code;
$i++;
}
return "[".implode(",", $out)."]";
}
function serialize_char_array(array $table): string {
$out = [];
$i = 0;
foreach ($table as $index => $code) {
// non-sequential indices must be printed, but others can be omitted
if ($index === $i) {
$key = "";
} else {
$key = "$index=>";
$i = $index;
}
$out[] = $key."\"\\u{".$code."}\"";
$i++;
}
return "[".implode(",", $out)."]";
}
// this is only used for single-byte encoders; other encoders instead flip their decoder arrays with overrides for duplicates or special cases
function serialize_single_byte_array(array $table): string {
$out = [];
foreach ($table as $index => $code) {
$byte = strtoupper(str_pad(dechex($index + 128), 2, "0", \STR_PAD_LEFT));
$out[$code] = "\"\\x$byte\"";
}
ksort($out);
$i = 0;
foreach ($out as $index => $value) {
if ($index == $i) {
$key = "";
} else {
$key = "$index=>";
$i = $index;
}
$out[$index] = "$key$value";
$i++;
}
return "[".implode(",", $out)."]";
}
// indexes with duplicate code points by default need to match the lowest pointer when encoding
// PHP's array_flip() function retains the last duplicate rather than the first, so we have to find duplicates
function make_override_array(array $table): array {
$out = [];
$dupes = array_keys(array_filter(array_count_values($table), function($v) {
return $v > 1;
}));
foreach ($dupes as $code_point) {
$out[$code_point] = array_search($code_point, $table);
}
ksort($out);
return $out;
}

40
tools/mklabels.php

@ -0,0 +1,40 @@
<?php
// this script read and names and labels from each concrete
// class in the Encoding set and generates tables mapping labels
// to names and names to classes
use MensBeam\Intl\Encoding\Decoder;
define("BASE", dirname(__DIR__).DIRECTORY_SEPARATOR);
require_once BASE."vendor".DIRECTORY_SEPARATOR."autoload.php";
$ns = "\\MensBeam\\Intl\\Encoding\\";
$labels = [];
$names = [];
foreach (new \GlobIterator(BASE."/lib/Encoding/*.php", \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
$file = basename($file, ".php");
$className = $ns.$file;
$class = new \ReflectionClass($className);
if ($class->implementsInterface(Decoder::class) && $class->isInstantiable()) {
$name = $class->getConstant("NAME");
$names[$name] = $className;
foreach ($class->getConstant("LABELS") as $label) {
$labels[$label] = $name;
}
}
}
$labelList = [];
foreach ($labels as $k => $v) {
$labelList[] = "'$k'=>\"$v\"";
}
$labelList = "const LABEL_MAP = [".implode(",", $labelList)."];";
$nameList = [];
foreach ($names as $k => $v) {
$nameList[] = "'$k'=>$v::class";
}
$nameList = "const NAME_MAP = [".implode(",", $nameList)."];";
echo "$labelList\n";
echo "$nameList\n";

72
tools/mktest.php

@ -0,0 +1,72 @@
<?php
declare(strict_types=1);
// this script generates a test series from the Web Platform test suite which exercises the index tables of multi-byte encodings with single characters
// they are pedantic sets of tests, and so the test suite itself only uses this series in optional tests
$tests = [
'gb18030' => [
// the Web Platform test suite does not have tests for gb18030, but a pull request was made in 2016 with a set of tests
'two-byte GBK' => "https://raw.githubusercontent.com/web-platform-tests/wpt/5847108cb16dc0047331da3f746652f35b3e9c90/encoding/legacy-mb-schinese/gb18030/gb18030_chars.html",
'four-byte Han' => "https://raw.githubusercontent.com/web-platform-tests/wpt/5847108cb16dc0047331da3f746652f35b3e9c90/encoding/legacy-mb-schinese/gb18030/gb18030_extra_han_chars.html",
'four-byte Hangul' => "https://raw.githubusercontent.com/web-platform-tests/wpt/5847108cb16dc0047331da3f746652f35b3e9c90/encoding/legacy-mb-schinese/gb18030/gb18030_extra_hangul_chars.html",
'four-byte miscellaneous' => "https://raw.githubusercontent.com/web-platform-tests/wpt/5847108cb16dc0047331da3f746652f35b3e9c90/encoding/legacy-mb-schinese/gb18030/gb18030_extra_misc_chars.html",
'four-byte private use' => "https://raw.githubusercontent.com/web-platform-tests/wpt/5847108cb16dc0047331da3f746652f35b3e9c90/encoding/legacy-mb-schinese/gb18030/gb18030_extra_pua_chars.html",
],
'big5' => [
'standard characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-tchinese/big5/big5_chars.html",
'extended characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-tchinese/big5/big5_chars_extra.html",
],
'euc-kr' => [
'characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-korean/euc-kr/euckr_chars.html",
],
'euc-jp' => [
'characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-japanese/euc-jp/eucjp_chars.html",
],
'iso-2022-jp' => [
'characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp_chars.html",
],
'shiftjis' => [
'characters' => "https://raw.githubusercontent.com/web-platform-tests/wpt/master/encoding/legacy-mb-japanese/shift_jis/sjis_chars.html",
],
];
$label = $argv[1] ?? "";
$label = trim(strtolower($label));
if (!isset($tests[$label])) {
die("Invalid label specified. Must be one of: ".json_encode(array_keys($tests)));
}
foreach ($tests[$label] as $name => $url) {
$data = make_test($label, $url);
$in = $data[0];
$out = $data[1];
echo "'$name' => [[$in], [$out]],\n";
}
function make_test(string $label, string $url): array {
// retrieve the test data
$data = file_get_contents($url) or die("Could not retrieve $label test $url");
// find the data
preg_match_all('/<span data-cp="([^"]+)" data-bytes="([^"]+)">/s', $data, $matches, \PREG_SET_ORDER);
// set up
$in = $out = [];
// loop through each match
foreach ($matches as $match) {
$bytes = str_replace(" ", "", $match[2]);
$code = hexdec($match[1]);
if ($label=="gb18030" && $bytes=="A8BC") { // this test is incorrect or out of date; both Vivaldi and Firefox yield code point 7743
$code = 7743;
} elseif ($label=="euc-jp") { // three tests are out of date
$code = ["5C" => 92, "7E" => 126, "A1DD" => 65293][$bytes] ?? $code;
} elseif ($label=="shiftjis") { // three tests are incorrect
$code = ["5C" => 92, "7E" => 126, "817C" => 0xFF0D][$bytes] ?? $code;
}
// convert the code point to decimal
$out[] = $code;
// convert the hex bytes to PHP notation
$in[] = '"'.$bytes.'"';
}
$in = implode(",", $in);
$out = implode(",", $out);
return [$in, $out];
}

57
tools/test-big5.html

@ -0,0 +1,57 @@
<!DOCTYPE html>
<meta charset=big5>
<script>
var sampleStrings = {
'empty string': "",
// valid single characters
'sanity check': "40",
'two-byte character': "D7 D7",
// invalid sequences
'EOF after first byte': "D7",
'low byte after first byte': "D7 39",
'0x80 as first byte': "80 D7 00",
'0xFF as first byte': "FF D7 00",
'invalid high byte as first byte': "81 D7 00",
'0x7F after first byte': "D7 7F",
'0xFF after first byte': "D7 FF",
'invalid high byte after first byte': "D7 81",
'broken string': "00 FF 00",
// double sequences
'double-characters low': "88 62 88 64",
'double-characters high': "88 A3 88 A5",
// mixed string
'mixed string': "7A D7 AA A4 F4 88 62 88 A5",
'mixed string 2': "62 D7 D7 D7 D7 62",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+00CA': 0xCA,
'U+3007': 0x3007,
'U+5341': 0x5341,
'U+2561': 0x2561,
'U+256D': 0x256D,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
/*
Char 0 U+007A (1 byte) Offset 0
Char 1 U+86CC (2 bytes) Offset 1
Char 2 U+6C34 (2 bytes) Offset 3
Char 3 U+00CA (0 bytes) Offset 5
Char 4 U+0304 (2 bytes) Offset 5
Char 5 U+00EA (0 bytes) Offset 7
Char 6 U+030C (2 bytes) Offset 7
End of string at char 7, offset 9
*/
0x007A,
0x86CC,
0x6C34,
// these four should be replaced with bytes 8862 88A5, which together produce four characters
0x00CA,
0x0304,
0x00EA,
0x030C,
];
</script>
<script src="test.js"></script>

57
tools/test-eucjp.html

@ -0,0 +1,57 @@
<!DOCTYPE html>
<meta charset=euc-jp>
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data -->
<script>
var sampleStrings = {
'empty string': "",
// sanity checks
'sanity check': "40",
'former ASCII deviations': "5C 7E",
'changed multibyte index': "A1DD",
// JIS X 0201
'JIS X 0201 range': "8EA1 8EDF",
'JIS X 0201 bogus range': "8EA0 8EE0",
'JIS X 0201 truncated character 1': "8E",
'JIS X 0201 truncated character 2': "8E 20",
'JIS X 0201 truncated character 3': "8E FF",
// JIS X 0212
'JIS X 0212 assigned range': "8FA2AF 8FEDE3",
'JIS X 0212 total range': "8FA1A1 8FFEFE",
'JIS X 0212 bogus range 1': "8FA0A1 8FFFFE",
'JIS X 0212 bogus range 2': "8FA1A0 8FFEFF",
'JIS X 0212 truncated character 1': "8FA2",
'JIS X 0212 truncated character 2': "8FA2 20",
'JIS X 0212 truncated character 3': "8FA2 FF",
// JIS X 0208
'JIS X 0208 assigned range': "A1A1 FCFE",
'JIS X 0208 total range': "A1A1 FEFE",
'JIS X 0208 bogus range': "A1A0 A0FE",
'JIS X 0208 truncated character 1': "A1",
'JIS X 0208 truncated character 2': "A1 20",
'JIS X 0208 truncated character 3': "A1 FF",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+00A5': 0xA5,
'U+203E': 0x203E,
'U+3088': 0x3088,
'U+FF96': 0xFF96,
'U+2212': 0x2212,
'U+00E6': 0xE6,
'U+FFE2': 0xFFE2,
'U+2116': 0x2116,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0xFF96,
0x3088,
0xFF0D,
0x005C,
0xFF9B,
/* This code point is not encodable and must be done manually entered as 8FB0EF */
0x4F58,
];
</script>
<script src="test.js"></script>

38
tools/test-euckr.html

@ -0,0 +1,38 @@
<!DOCTYPE html>
<meta charset=euc-kr>
<script>
var sampleStrings = {
'empty string': "",
// valid single characters
'sanity check': "40",
'two-byte character': "D7 D7",
// invalid sequences
'EOF after first byte': "D7",
'low byte after first byte': "D7 39",
'0x80 as first byte': "80 D7 00",
'0xFF as first byte': "FF D7 00",
'0x7F after first byte': "D7 7F",
'0xFF after first byte': "D7 FF",
'non-character': "A5 DC",
// mixed string
'mixed string': "7A D7 AA A4 F4 88 62 88 A5",
'mixed string 2': "62 D7 D7 D7 D7 62",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+00CA': 0x00CA,
'U+ACF2': 0xACF2,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0xACF2,
0x0020,
0x6C34,
0x0391,
0x03C9,
0x002A,
];
</script>
<script src="test.js"></script>

77
tools/test-gb18030.html

@ -0,0 +1,77 @@
<!DOCTYPE html>
<meta charset=gb18030>
<script>
var sampleStrings = {
'empty string': "",
// valid single characters
'sanity check': "40",
'special case for 0x80': "80",
'four-byte special case': "81 35 F4 37",
'two-byte character': "A8 4E",
'four-byte character': "82 31 A2 37",
// cut sequences
'EOF after first byte': "82",
'EOF after second byte': "82 30",
'EOF after third byte': "82 30 81",
// invalid sequences
'bad first byte': "FF 35 F4 37",
'bad second byte': "81 FF F4 37",
'bad third byte': "81 35 FF 37",
'bad fourth byte': "81 35 F4 FF",
'control first byte': "00 35 F4 37",
'control second byte': "81 00 F4 37",
'control third byte': "81 35 00 37",
'control fourth byte': "81 35 F4 00",
// invalid sequences with clean EOF
'bad first byte (padded)': "FF 35 F4 37 00 00 00 00",
'bad second byte (padded)': "81 FF F4 37 00 00 00 00",
'bad third byte (padded)': "81 35 FF 37 00 00 00 00",
'bad fourth byte (padded)': "81 35 F4 FF 00 00 00 00",
'control first byte (padded)': "00 35 F4 37 00 00 00 00",
'control second byte (padded)': "81 00 F4 37 00 00 00 00",
'control third byte (padded)': "81 35 00 37 00 00 00 00",
'control fourth byte (padded)': "81 35 F4 00 00 00 00 00",
// out-of-range sequences
'void sequence': "84 32 A4 39",
'void sequence 2': "FE 39 FE 39",
// backward seeking tests
'seek test 1': "81 81 81 30",
'seek test 2': "81 81 80",
'seek test 3': "81 81 00",
'seek test 4': "81 81 81 00",
'seek test 5': "81 30 30 30",
'seek test 6': "81 30 81 81",
'seek test 7': "30 30 81 81",
'seek test 8': "F8 83 FE 80",
'seek test 1 (padded)': "00 00 00 00 81 81 81 30 00 00 00 00",
'seek test 2 (padded)': "00 00 00 00 81 81 80 00 00 00 00",
'seek test 3 (padded)': "00 00 00 00 81 81 00 00 00 00 00",
'seek test 4 (padded)': "00 00 00 00 81 81 81 00 00 00 00 00",
'seek test 5 (padded)': "00 00 00 00 81 30 30 30 00 00 00 00",
'seek test 6 (padded)': "00 00 00 00 81 30 81 81 00 00 00 00",
'seek test 7 (padded)': "00 00 00 00 30 30 81 81 00 00 00 00",
'seek test 8 (padded)': "00 00 00 00 F8 83 FE 80 00 00 00 00",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+20AC': 0x20AC,
'U+2164': 0x2164,
'U+3A74': 0x3A74,
'U+E7C7': 0xE7C7,
'U+1D11E': 0x1D11E,
'U+E5E5': 0xE5E5,
'U+3000': 0x3000,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0x00A2,
0x6C34,
0x1D11E,
0xF8FF,
0x10FFFD,
0xFFFE,
];
</script>
<script src="test.js"></script>

17
tools/test-gbk.html

@ -0,0 +1,17 @@
<!DOCTYPE html>
<meta charset=gbk>
<script>
var sampleCharacters = {
'U+0064': 0x64,
'U+20AC': 0x20AC,
'U+2164': 0x2164,
'U+3A74': 0x3A74,
'U+E7C7': 0xE7C7,
'U+1D11E': 0x1D11E,
'U+E5E5': 0xE5E5,
'U+3000': 0x3000,
'-1': -1,
'0x110000': 0x110000,
};
</script>
<script src="test.js"></script>

46
tools/test-iso2022jp.html

@ -0,0 +1,46 @@
<!DOCTYPE html>
<meta charset=iso-2022-jp>
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data -->
<script>
var sampleStrings = {
'empty string': "",
'Implied ASCII mode': "00 30 5C 7E 21 5F",
'Explicit ASCII mode': "1B2842 00 30 5C 7E 21 5F",
'Roman mode': "1B284A 00 30 5C 7E 21 5F",
'Katakana mode': "1B2849 00 30 5C 7E 21 5F",
'Double-byte mode 1': "1B2440 00 30 5C 7E 21 5F",
'Double-byte mode 2': "1B2442 00 30 5C 7E 21 5F",
'Multiple modes': "5C 1B2849 21 1B2440 305C 1B284A 5C 1B2842 5C",
'Double escape': "1B2849 1B2842 5C",
'Triple escape': "1B2849 1B2842 1B284A 5C",
'Trailing escape': "20 1B284A 30 33 1B2849",
'Invalid bytes': "80 FF 1B2849 00 20 7F 1B2442 00 2100 FF FF",
};
var sampleCharacters = {
'U+0020': [0x20],
'U+005C': [0x5C],
'U+007E': [0x7E],
'U+00A5': [0xA5],
'U+203E': [0x203E],
'U+FF61': [0xFF61],
'U+FF9F': [0xFF9F],
'U+2212': [0x2212],
'U+2116': [0x2116],
'U+FFE2': [0xFFE2],
'U+00C6': [0xC6],
'U+FFFD': [0xFFFD],
'Roman': [0xA5, 0x20, 0x203E],
'Roman to ASCII': [0xA5, 0x5C],
'Roman to error': [0xA5, 0x80],
'JIS': [0x2116, 0xFFE2, 0x2212],
'JIS to Roman': [0x2116, 0xA5],
'JIS to ASCII 1': [0x2116, 0x20],
'JIS to ASCII 2': [0x2116, 0x5C],
'JIS to error 1': [0x2116, 0x80],
'JIS to error 2': [0x2116, 0x1B], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158
'Escape characters': [0x1B, 0xE, 0xF], // Even Firefox is wrong here; see https://github.com/web-platform-tests/wpt/pull/26158
'-1': [-1],
'0x110000': [0x110000],
};
</script>
<script src="test.js"></script>

42
tools/test-shiftjis.html

@ -0,0 +1,42 @@
<!DOCTYPE html>
<meta charset=shift_jis>
<!-- Chromium does NOT produce correct results as of this writing; use Firefox to generate test data -->
<script>
var sampleStrings = {
'empty string': "",
'sanity check': "40",
'former ASCII deviations': "5C 7E",
'JIS X 0201 range': "A1 DF",
'EUDC range': "F040 F9FC",
'JIS X 0208 assigned range': "8140 FC4B",
'JIS X 0208 total range': "8140 FCFC",
'JIS X 0208 truncated character 1': "81",
'JIS X 0208 truncated character 2': "81 20",
'JIS X 0208 truncated character 3': "81 FF",
};
var sampleCharacters = {
'U+0064': 0x64,
'U+00A5': 0xA5,
'U+203E': 0x203E,
'U+3088': 0x3088,
'U+FF96': 0xFF96,
'U+2212': 0x2212,
'U+00E6': 0xE6,
'U+FFE2': 0xFFE2,
'U+2116': 0x2116,
'U+E000': 0xE000,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0xFF96,
0x3088,
0xFF0D,
0x005C,
0xFF9B,
/* This code point is not encodable and must be done manually entered as F040 */
0xE000,
];
</script>
<script src="test.js"></script>

20
tools/test-utf16.html

@ -0,0 +1,20 @@
<!DOCTYPE html>
<meta charset=utf-16>
<script>
var sampleStrings = {
// control samples
'empty string': "",
'sanity check': "6100 6200 6300 3100 3200 3300",
'mixed sample': "7A00 A200 346C 34D8 1EDD FFF8 FFDB FDDF FEFF",
// unexpected EOF
'EOF in BMP character': "0000 FF",
'EOF after lead surrogate': "0000 34D8",
'EOF in trail surrogate': "0000 34D8 1E",
// invalid UTF-16 surrogates
'lead surrogate without trail': "34D8 0000",
'trail surrogate without lead': "1EDD 0000",
'double lead surrogate': "34D8 34D8 1EDD",
'double trail surrogate': "34D8 1EDD 1EDD",
};
</script>
<script src="test.js"></script>

70
tools/test-utf8.html

@ -0,0 +1,70 @@
<!DOCTYPE html>
<meta charset=utf-8>
<script>
var sampleStrings = {
// control samples
'empty string': "",
'sanity check': "61 62 63 31 32 33",
'multibyte control': "E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3",
'mixed sample': "7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE",
// various invalid sequences
'invalid code': "FF",
'ends early': "C0",
'ends early 2': "E0",
'invalid trail': "C0 00",
'invalid trail 2': "C0 C0",
'invalid trail 3': "E0 00",
'invalid trail 4': "E0 C0",
'invalid trail 5': "E0 80 00",
'invalid trail 6': "E0 80 C0",
'> 0x10FFFF': "FC 80 80 80 80 80",
'obsolete lead byte': "FE 80 80 80 80 80",
'overlong U+0000 - 2 bytes': "C0 80",
'overlong U+0000 - 3 bytes': "E0 80 80",
'overlong U+0000 - 4 bytes': "F0 80 80 80",
'overlong U+0000 - 5 bytes': "F8 80 80 80 80",
'overlong U+0000 - 6 bytes': "FC 80 80 80 80 80",
'overlong U+007F - 2 bytes': "C1 BF",
'overlong U+007F - 3 bytes': "E0 81 BF",
'overlong U+007F - 4 bytes': "F0 80 81 BF",
'overlong U+007F - 5 bytes': "F8 80 80 81 BF",
'overlong U+007F - 6 bytes': "FC 80 80 80 81 BF",
'overlong U+07FF - 3 bytes': "E0 9F BF",
'overlong U+07FF - 4 bytes': "F0 80 9F BF",
'overlong U+07FF - 5 bytes': "F8 80 80 9F BF",
'overlong U+07FF - 6 bytes': "FC 80 80 80 9F BF",
'overlong U+FFFF - 4 bytes': "F0 8F BF BF",
'overlong U+FFFF - 5 bytes': "F8 80 8F BF BF",
'overlong U+FFFF - 6 bytes': "FC 80 80 8F BF BF",
'overlong U+10FFFF - 5 bytes': "F8 84 8F BF BF",
'overlong U+10FFFF - 6 bytes': "FC 80 84 8F BF BF",
// UTF-16 surrogates
'lead surrogate': "ED A0 80",
'trail surrogate': "ED B0 80",
'surrogate pair': "ED A0 80 ED B0 80",
// self-sync edge cases
'trailing continuation': "0A 80 80",
'trailing continuation 2': "E5 8F A4 80",
};
var sampleCharacters = {
'U+007A': 0x007A,
'U+00A2': 0x00A2,
'U+6C34': 0x6C34,
'U+1D11E': 0x1D11E,
'U+F8FF': 0xF8FF,
'U+10FFFD': 0x10FFFD,
'U+FFFE': 0xFFFE,
'-1': -1,
'0x110000': 0x110000,
};
var seekCodePoints = [
0x007A,
0x00A2,
0x6C34,
0x1D11E,
0xF8FF,
0x10FFFD,
0xFFFE,
];
</script>
<script src="test.js"></script>

177
tools/test.js

@ -0,0 +1,177 @@
"use strict";
// set out the output pre-formatted text element
window.out = document.createElement("pre");
document.documentElement.appendChild(out);
var encoding = document.getElementsByTagName("meta")[0].getAttribute("charset");
function encodeCodePoint(code, fatal) {
if (code < 0 || code > 0x10FFFF) {
return 'new EncoderException("", Coder::E_INVALID_CODE_POINT)';
} else {
var l = document.createElement("a");
l.href = "http://example.com/?" + String.fromCodePoint(code) + "#";
var bytes = [];
let url = l.search.substr(1);
for (let a = 0; a < url.length; a++) {
if ((url.charAt(a) == "%" && url.substr(a, 6) == "%26%23") || url.charAt(a) == "&") {
// character cannot be encoded
if (fatal) {
return 'new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)';
} else {
return decodeURIComponent(url);
}
} else if (url.charAt(a) == "%") {
bytes.push(url.charAt(a + 1) + url.charAt(a + 2));
a = a + 2;
} else {
bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0"));
}
}
}
return bytes;
}
function encodeCodePoints(codes, fatal) {
for (let a = 0; a < codes.length; a++) {
if (codes[a] < 0 || codes[a] > 0x10FFFF) {
return 'new EncoderException("", Coder::E_INVALID_CODE_POINT)';
}
}
var l = document.createElement("a");
l.href = "http://example.com/?" + String.fromCodePoint(...codes) + "#";
var bytes = [];
let url = decodeURIComponent(l.search.substr(1));
if (fatal && url.indexOf("&#") > -1) {
return 'new EncoderException("", Coder::E_UNAVAILABLE_CODE_POINT)';
}
for (let a = 0; a < url.length; a++) {
bytes.push(url.charCodeAt(a).toString(16).padStart(2, "0").toUpperCase());
}
return bytes;
}
function wrapCodePoint(code, fatal) {
if (typeof code === "number") {
var out = encodeCodePoint(code, fatal);
} else {
var out = encodeCodePoints(code, fatal);
}
if (Array.isArray(out)) {
return ('"' + out.join(" ") + '"').toUpperCase();
} else if (out.charAt(0) == "&") {
return 'bin2hex("' + out + '")';
} else {
return out;
}
}
if(typeof sampleStrings != 'undefined') {
var decoder = new TextDecoder(encoding);
for (let name in sampleStrings) {
let input = sampleStrings[name].replace(/\s/g, "");
let bytes = [];
for (let a = 0; a < input.length; a = a + 2) {
bytes.push(parseInt(input.substr(a, 2), 16));
}
let text = decoder.decode(new Uint8Array(bytes));
let codes = [];
for (let a = 0; a < text.length; a++) {
let point = text.codePointAt(a);
if (point >= 55296 && point <= 57343) {
// non-BMP characters have trailing low surrogates in JavaScript strings
continue;
}
codes.push(point);
}
codes = codes.join(", ");
bytes = sampleStrings[name];
let line = "'" + name + "' => [" + '"' + bytes + '", [' + codes + "]],\n";
out.appendChild(document.createTextNode(line));
}
out.appendChild(document.createTextNode("\n\n"));
}
if(typeof sampleCharacters != 'undefined') {
for (name in sampleCharacters) {
let code = sampleCharacters[name];
if (typeof code == "number" && code > -1 && code % 1 == 0) {
var displayCode = "0x" + code.toString(16).toUpperCase();
} else if (typeof code !== "number") {
var displayCode = [...code];
for (let a = 0; a < displayCode.length; a++) {
if (displayCode[a] > -1 && displayCode[a] % 1 == 0) {
displayCode[a] = "0x" + displayCode[a].toString(16).toUpperCase();
}
}
displayCode = "[" + displayCode.join(", ") + "]";
} else {
var displayCode = code;
}
let line1 = "'" + name + " (HTML)' => [false, " + displayCode + ", " + wrapCodePoint(code, false) + "],\n";
let line2 = "'" + name + " (fatal)' => [true, " + displayCode + ", " + wrapCodePoint(code, true) + "],\n";
out.appendChild(document.createTextNode(line1));
out.appendChild(document.createTextNode(line2));
}
out.appendChild(document.createTextNode("\n\n"));
}
if(typeof seekCodePoints != 'undefined') {
// first gather statistics on the encoding of the specified array of code points
var stats = [];
var a = 0;
var offset = 0;
for (let b = 0; b < seekCodePoints.length; b++) {
let code = seekCodePoints[b];
stats[a] = {
'code': code,
'offset': offset,
'length': 0,
'bytes': "",
};
let bytes = encodeCodePoint(code, true);
if (Array.isArray(bytes)) {
stats[a].length = bytes.length;
stats[a].bytes = bytes.join("").toUpperCase();
offset = offset + bytes.length;
} else {
stats[a].length = 1;
stats[a].bytes = "()";
offset = offset + 1;
}
a++;
}
var end = [a, offset];
// summarize the statistics in a comment
var comment = "/*\n";
for (let a = 0; a < stats.length; a++) {
let length = (stats[a].length == 1) ? "(1 byte) " : "(" + stats[a].length + " bytes)";
comment = comment + " Char " + a + " U+" + stats[a].code.toString(16).padStart(4, "0").padEnd(6, " ").toUpperCase() + " " + length + " Offset " + stats[a].offset + "\n";
}
comment = comment + " End of string at char " + end[0] + ", offset " + end[1] + "\n";
comment = comment + "*/\n";
// build the encoded byte string
var bytes = [];
for (let char of stats) {
bytes.push(char.bytes);
}
bytes = 'protected $seekString = "' + bytes.join(" ") + '";' + "\n";
// build the array of code points
var codes = [];
for (let char of stats) {
codes.push("0x" + char.code.toString(16).toUpperCase());
}
codes = 'protected $seekCodes = [' + codes.join(", ") + "];\n";
// build the array of offsets
var offs = [];
for (let char of stats) {
offs.push(char.offset);
}
offs.push(end[1]);
offs = 'protected $seekOffsets = [' + offs.join(", ") + "];\n";
// output the results
out.appendChild(document.createTextNode(comment));
out.appendChild(document.createTextNode(bytes));
out.appendChild(document.createTextNode(codes));
out.appendChild(document.createTextNode(offs));
}

1970
vendor-bin/csfixer/composer.lock

File diff suppressed because it is too large

2
vendor-bin/phpunit/composer.json

@ -1,5 +1,5 @@
{
"require": {
"phpunit/phpunit": "^6.5"
"phpunit/phpunit": "^8.5 | ^9.0"
}
}

1332
vendor-bin/phpunit/composer.lock

File diff suppressed because it is too large

2
vendor-bin/robo/composer.json

@ -1,5 +1,5 @@
{
"require": {
"consolidation/robo": "^1.1"
"consolidation/robo": "^4.0"
}
}

1844
vendor-bin/robo/composer.lock

File diff suppressed because it is too large
Loading…
Cancel
Save