Browse Source

Four more states

master
J. King 6 years ago
parent
commit
7d13a6c3b7
  1. 158
      lib/URI.php

158
lib/URI.php

@ -359,6 +359,19 @@ class URI {
const CHR_ASCII_ALPHA = 3;
const CHR_ASCII_ALPHANUM = 4;
// percent-encoding character set identifiers
const PE_CONTROL = 1;
const PE_FRAGMENT = 2;
const PE_PATH = 3;
const PE_USERINFO = 4;
const PE_SET = [
self::PE_CONTROL => [],
self::PE_FRAGMENT => [" ", '"', '`', "<", ">"],
self::PE_PATH => [" ", '"', '`', "<", ">", "#", "?", "{", "}"],
self::PE_USERINFO => [" ", '"', '`', "<", ">", "#", "?", "{", "}", "/", ":", ";", "=", "@", "[", "]", "\\"],
];
// error condition identifiers
const ERR_LEADING_OR_TRAILING_WS = 1;
const ERR_EMBEDDED_NEWLINE_OR_TAB = 2;
@ -367,6 +380,9 @@ class URI {
const ERR_RELATIVE_URL = 5;
const ERR_SCHEME_EXPECTING_SLASH = 6;
const ERR_BACKSLASH_FORBIDDEN = 7;
const ERR_UNEXPECTED_SLASH = 8;
const ERR_UNEXPECTED_AT = 9;
const ERR_AUTHORITY_WITHOUT_HOST = 10;
// parser state identifiers
const ST_SCHEME_START = 1;
@ -383,6 +399,9 @@ class URI {
const ST_AUTHORITY = 12;
const ST_PATH = 13;
const ST_RELATIVE_SLASH = 14;
const ST_HOST = 15;
const ST_HOSTNAME = 16;
const ST_FILE_HOST = 17;
public static $confUseAllSchemePorts = false;
@ -683,6 +702,130 @@ class URI {
}
}
break;
# relative slash state
case self::ST_RELATIVE_SLASH:
if ($this->isSpecial($url) && ($c=="/" || $c=="\\")) {
# If url is special and c is U+002F (/) or U+005C (\), then:
# If c is U+005C (\), validation error.
if ($c=="\\") {
$url->err[] = [$pointer, $pos, self::ERR_BACKSLASH_FORBIDDEN];
}
# Set state to special authority ignore slashes state.
$state = self::ST_SPECIAL_AUTHORITY_IGNORE_SLASHES;
} elseif ($c="/") {
# Otherwise, if c is U+002F (/), then set state to authority state.
$state = self::ST_AUTHORITY;
} else {
# Otherwise,
$this->map($url, $base, [
# set url’s username to base’s username,
"username",
# url’s password to base’s password,
"password",
# url’s host to base’s host,
"host",
# url’s port to base’s port,
"port",
]);
# state to path state, and then, decrease pointer by one.
$state = self::ST_PATH;
goto processChar;
}
break;
# special authority slashes state
case self::ST_SPECIAL_AUTHORITY_SLASHES:
if ($c=="/" && substr($input, $posNext, 1)=="/") {
# If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by one.
$state = self::ST_SPECIAL_AUTHORITY_IGNORE_SLASHES;
// this has the effect of increasing the pointer by one
$pos = $posNext;
$c = UTF8::get($input, $pos, $posNext);
} else {
# Otherwise, validation error, set state to special authority ignore slashes state, and decrease pointer by one.
$url->err[] = [$pointer, $pos, self::ERR_SCHEME_EXPECTING_SLASH];
$state = self::ST_SPECIAL_AUTHORITY_IGNORE_SLASHES;
goto processChar;
}
break;
# special authority ignore slashes state
case self::ST_SPECIAL_AUTHORITY_IGNORE_SLASHES:
if ($c != "/" && $c != "\\") {
# If c is neither U+002F (/) nor U+005C (\), then set state to authority state and decrease pointer by one.
$state = self::ST_AUTHORITY;
goto processChar;
} else {
# Otherwise, validation error.
$url->err[] = [$pointer, $pos, self::ERR_UNEXPECTED_SLASH];
}
break;
# authority state
case self::ST_AUTHORITY:
if($c=="@") {
# If c is U+0040 (@), then:
# Validation error.
$url->err[] = [$pointer, $pos, self::ERR_UNEXPECTED_AT];
# If the @ flag is set, prepend "%40" to buffer.
if ($flagAtSign) {
$buffer = "%40".$buffer;
}
# Set the @ flag.
$flagAtSign = true;
# For each codePoint in buffer:
$bPos = 0;
$bEof = strlen($buffer);
while ($bPos < $bEof) {
$codePoint = UTF8::get($buffer, $bPos, $bPosNext);
# If codePoint is U+003A (:) and passwordTokenSeenFlag is unset, then set passwordTokenSeenFlag and continue.
if ($codePoint==":" && !$flagPasswordTokenSeen) {
$flagPasswordTokenSeen = true;
// "continue" in the specification means going to the next character
$bPos = $bPosNext;
continue;
}
# Let encodedCodePoints be the result of running UTF-8 percent encode codePoint using the userinfo percent-encode set.
$encodedCodePoints = $this->percentEncode($codePoint, self::PE_USERINFO);
if ($flagPasswordTokenSeen) {
# If passwordTokenSeenFlag is set, then append encodedCodePoints to url’s password.
$url->password .= $encodedCodePoints;
} else {
# Otherwise, append encodedCodePoints to url’s username.
$url->username .= $encodedCodePoints;
}
}
# Set buffer to the empty string.
$buffer = "";
} elseif (
# Otherwise, if one of the following is true
in_array($c, ["/", "?", "#", ""]) || # c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
($this->isSpecial($url) && $c=="\\") # url is special and c is U+005C (\)
) {
# then:
# If @ flag is set and buffer is the empty string, validation error, return failure.
if ($flagAtSign && $buffer = "") {
$url->err[] = [$pointer, $pos, self::ERR_AUTHORITY_WITHOUT_HOST];
$url->failure = true;
return $url;
}
# Decrease pointer by the number of code points in buffer plus one,
// DEVIATION: as with decreasing the pointer by one to reprocess characters, we'll ignore the "plus one" here for self-consitency
// we first count the number of characters in the buffer
$c = UTF8::len($buffer);
// then decrease the advisorty character pointer by that amount
$pointer -= $c;
// then seek back the same number of characters
$pos = UTF8::seek($input, -$c, $pos);
// and finally consume that character to get the position of the next character to continue the loop correctly
$c = UTF8::get($input, $pos, $posNext);
# set buffer to the empty string, and set state to host state.
$buffer = "";
$state = self::ST_HOST;
// and reprocess the first character in the erstwhile buffer
goto processChar;
} else {
# Otherwise, append c to buffer.
$buffer .= $c;
}
break;
// invalid or unimplemented state
default:
// FIXME: this should be an error, but until the whole state machine is implemented, we stop processing instead
@ -732,4 +875,19 @@ class URI {
$test = ($est instanceof URI) ? $test->scheme : $test;
return array_key_exists($test, self::SCHEME_SPECIAL);
}
protected function percentEncode(string $bytes, int $set): string {
if (!isset(self::PE_SET[$set])) {
throw new \Exception;
}
$buffer = "";
foreach ($bytes as $b) {
if ($b < "\x20" || $b > "\x7E" || in_array($b, self::PE_SET[$set])) {
$buffer .= "%".strtoupper(bin2hex($b));
} else {
$buffer .= $b;
}
}
return $buffer;
}
}