[122, "7A"], "162" => [162, "C2 A2"], "27700" => [27700, "E6 B0 B4"], "119070" => [119070, "F0 9D 84 9E"], "63743" => [63743, "EF A3 BF"], "1114109" => [1114109, "F4 8F BF BD"], "65534" => [65534, "EF BF BE"], "-1" => [-1, new EncoderException("", UTF8::E_INVALID_CODE_POINT)], "1114112" => [1114112, new EncoderException("", UTF8::E_INVALID_CODE_POINT)], ]; foreach ($series as $name => $test) { yield "$name (fatal)" => array_merge([true], $test); yield "$name (HTML)" => array_merge([false], $test); } } public function provideStrings() { return [ // control samples 'empty string' => ["", []], 'sanity check' => ["61 62 63 31 32 33", [97, 98, 99, 49, 50, 51]], 'multibyte control' => ["E5 8F A4 E6 B1 A0 E3 82 84 E8 9B 99 E9 A3 9B E3 81 B3 E8 BE BC E3 82 80 E6 B0 B4 E3 81 AE E9 9F B3", [21476, 27744, 12420, 34521, 39131, 12403, 36796, 12416, 27700, 12398, 38899]], 'mixed sample' => ["7A C2 A2 E6 B0 B4 F0 9D 84 9E EF A3 BF F4 8F BF BD EF BF BE", [122, 162, 27700, 119070, 63743, 1114109, 65534]], // various invalid sequences 'invalid code' => ["FF", [65533]], 'ends early' => ["C0", [65533]], 'ends early 2' => ["E0", [65533]], 'invalid trail' => ["C0 00", [65533, 0]], 'invalid trail 2' => ["C0 C0", [65533, 65533]], 'invalid trail 3' => ["E0 00", [65533, 0]], 'invalid trail 4' => ["E0 C0", [65533, 65533]], 'invalid trail 5' => ["E0 80 00", [65533, 65533, 0]], 'invalid trail 6' => ["E0 80 C0", [65533, 65533, 65533]], '> 0x10FFFF' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], 'obsolete lead byte' => ["FE 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], 'overlong U+0000 - 2 bytes' => ["C0 80", [65533, 65533]], 'overlong U+0000 - 3 bytes' => ["E0 80 80", [65533, 65533, 65533]], 'overlong U+0000 - 4 bytes' => ["F0 80 80 80", [65533, 65533, 65533, 65533]], 'overlong U+0000 - 5 bytes' => ["F8 80 80 80 80", [65533, 65533, 65533, 65533, 65533]], 'overlong U+0000 - 6 bytes' => ["FC 80 80 80 80 80", [65533, 65533, 65533, 65533, 65533, 65533]], 'overlong U+007F - 2 bytes' => ["C1 BF", [65533, 65533]], 'overlong U+007F - 3 bytes' => ["E0 81 BF", [65533, 65533, 65533]], 'overlong U+007F - 4 bytes' => ["F0 80 81 BF", [65533, 65533, 65533, 65533]], 'overlong U+007F - 5 bytes' => ["F8 80 80 81 BF", [65533, 65533, 65533, 65533, 65533]], 'overlong U+007F - 6 bytes' => ["FC 80 80 80 81 BF", [65533, 65533, 65533, 65533, 65533, 65533]], 'overlong U+07FF - 3 bytes' => ["E0 9F BF", [65533, 65533, 65533]], 'overlong U+07FF - 4 bytes' => ["F0 80 9F BF", [65533, 65533, 65533, 65533]], 'overlong U+07FF - 5 bytes' => ["F8 80 80 9F BF", [65533, 65533, 65533, 65533, 65533]], 'overlong U+07FF - 6 bytes' => ["FC 80 80 80 9F BF", [65533, 65533, 65533, 65533, 65533, 65533]], 'overlong U+FFFF - 4 bytes' => ["F0 8F BF BF", [65533, 65533, 65533, 65533]], 'overlong U+FFFF - 5 bytes' => ["F8 80 8F BF BF", [65533, 65533, 65533, 65533, 65533]], 'overlong U+FFFF - 6 bytes' => ["FC 80 80 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], 'overlong U+10FFFF - 5 bytes' => ["F8 84 8F BF BF", [65533, 65533, 65533, 65533, 65533]], 'overlong U+10FFFF - 6 bytes' => ["FC 80 84 8F BF BF", [65533, 65533, 65533, 65533, 65533, 65533]], // UTF-16 surrogates 'lead surrogate' => ["ED A0 80", [65533, 65533, 65533]], 'trail surrogate' => ["ED B0 80", [65533, 65533, 65533]], 'surrogate pair' => ["ED A0 80 ED B0 80", [65533, 65533, 65533, 65533, 65533, 65533]], // self-sync edge cases 'trailing continuation' => ["0A 80 80", [10, 65533, 65533]], 'trailing continuation 2' => ["E5 8F A4 80", [21476, 65533]], ]; } }