From fd8ef1b21f0b7b1d9e66855a8bd5c93e95706c10 Mon Sep 17 00:00:00 2001 From: "J. King" Date: Sun, 19 Apr 2020 22:10:10 -0400 Subject: [PATCH] Move sniffing to separate class --- lib/MimeType.php | 48 ------------------ lib/Sniffing.php | 128 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 48 deletions(-) create mode 100644 lib/Sniffing.php diff --git a/lib/MimeType.php b/lib/MimeType.php index 5788395..97d7a92 100644 --- a/lib/MimeType.php +++ b/lib/MimeType.php @@ -304,52 +304,4 @@ PATTERN; } return null; } - - public static function interpretHttpMessage(\Psr\Http\Message\MessageInterface $msg, bool $sniff = true): ?self { - $checkForApacheBug = false; - // Use the last Content-Type header-field - $type = array_pop($msg->getHeader("Content-Type")); - if (!is_null($type)) { - if ($msg instanceof \Psr\Http\Message\ResponseInterface) { - $checkForApacheBug = (bool) preg_match("<^text/plain(?:; charset=(?:UTF-8|(?:ISO|iso)-8859-1))?$>", $type); - } - $type = static::decode($type); - } - # stub - return null; - } - - public static function sniffImage(string $resource): ?self { - foreach (self::SNIFF_PATTERNS_IMAGE as $pattern => $type) { - if (preg_match($pattern, $resource)) { - return static::parse($type); - } - } - return null; - } - - public static function sniffAudioVideo(string $resource): ?self { - foreach (self::SNIFF_PATTERNS_AUDIOVIDEO as $pattern => $type) { - if (preg_match($pattern, $resource)) { - return static::parse($type); - } - } - return static::sniffMp4($resource) ?? static::sniffWebm($resource) ?? static::sniffMp3($resource); - } - - protected static function sniffMp4(string $d): ?self { - if (strlen($d) < 12) { - return null; - } - $boxSize = hexdec(bin2hex(substr($d, 0, 4))); - return null; - } - - protected static function sniffWebm(string $d): ?self { - return null; - } - - protected static function sniffMp3(string $d): ?self { - return null; - } } diff --git a/lib/Sniffing.php b/lib/Sniffing.php new file mode 100644 index 0000000..f4095af --- /dev/null +++ b/lib/Sniffing.php @@ -0,0 +1,128 @@ + "image/x-icon", + '/^BM/s' => "image/bmp", + '/^GIF8[79]a/s' => "image/gif", + '/^RIFF.{4}WEBPVP/s' => "image/webp", + '/^\x{89}PNG\r\n\x{1A}\n/s' => "image/png", + '/^\x{FF}\x{D8}\x{FF}/s' => "imaged/jpeg", + ]; + protected const SNIFF_PATTERNS_AUDIOVIDEO = [ + '/^\.snd/s' => "audio/basic", + '/^FORM.{4}AIFF/s' => "audio/aiff", + '/^ID3/s' => "audi/mpeg", + '/^OggS\x{00}/s' => "application/ogg", + '/^MThd\x{00}{3}\x{06}/s' => "audio/midi", + '/^RIFF.{4}AVI /s' => "video/avi", + '/^RIFF.{4}WAVE/s' => "audio/wave", + ]; + protected const SNIFF_PATTERNS_FONT = [ + '/^.{34}LP/s' => "application/vnd.ms-fontobject", + '/^\x{00}\x{01}\x{00}{2}/s' => "font/ttf", + '/^OTTO/s' => "font/otf", + '/^ttcf/s' => "font/collection", + '/^wOFF/s' => "font/woff", + '/^wOF2/s' => "font/woff2", + ]; + protected const SNIFF_PATTERNS_ARCHIVE = [ + '/^\x{1F}\x{8B}\x{08}/s' => "application/x-gzip", + '/^PK\x{03}\x{04}/s' => "application/zip", + '/^Rar \x{1A}\x{07}\x{00}/s' => "application/x-rar-compressed", + ]; + protected const SNIFF_PATTERNS_UNKNWON_SCRIPTABLE = [ + '/^\s*<(?:!DOCTYPE HTML|HTML|HEAD|SCRIPT|IFRAME|H1|DIV|FONT|TABLE|A|B|STYLE|TITLE|BODY|BR|P|!--)[ >]/si' => "text/html", + '/^\s*<\?xml/s' => "text/xml", + '/^%PDF-/s' => "application/pdf", + ]; + protected const SNIFF_PATTERN_UNKNWON_SAFE = [ + '/^%!PS-Adobe-/s' => "application/postscript", + '/^(?:(?:\x{FE}\x{FF}|\x{FF}\x{FE})..|\x{EF}\x{BB}\x{BF}.)/s' => "text/plain", + ]; + + public static function interpretHttpMessage(\Psr\Http\Message\MessageInterface $msg, bool $sniff = true): ?MimeType { + $checkForApacheBug = false; + // Use the last Content-Type header-field + $type = array_pop($msg->getHeader("Content-Type")); + if (!is_null($type)) { + if ($msg instanceof \Psr\Http\Message\ResponseInterface) { + $checkForApacheBug = (bool) preg_match("<^text/plain(?:; charset=(?:UTF-8|(?:ISO|iso)-8859-1))?$>", $type); + } + $type = MimeType::decode($type); + } + # stub + return null; + } + + public static function sniffImage(string $resource): ?MimeType { + foreach (self::SNIFF_PATTERNS_IMAGE as $pattern => $type) { + if (preg_match($pattern, $resource)) { + return MimeType::parse($type); + } + } + return null; + } + + public static function sniffAudioVideo(string $resource): ?MimeType { + foreach (self::SNIFF_PATTERNS_AUDIOVIDEO as $pattern => $type) { + if (preg_match($pattern, $resource)) { + return MimeType::parse($type); + } + } + return static::sniffMp4($resource) ?? static::sniffWebm($resource) ?? static::sniffMp3($resource); + } + + protected static function sniffMp4(string $d): ?MimeType { + if (strlen($d) < 12) { + return null; + } + $boxSize = hexdec(bin2hex(substr($d, 0, 4))); + if (strlen($d) < $boxSize || $boxSize % 4 > 0 || substr($d, 4, 4) !== "ftyp") { + return null; + } + if (substr($d, 8, 3) === "mp4") { + return MimeType::parse("video/mp4"); + } + $bytesRead = 16; + while ($bytesRead < $boxSize) { + if (substr($d, $bytesRead, 3) === "mp4") { + return MimeType::parse("video/mp4"); + } + $bytesRead += 4; + } + return null; + } + + protected static function sniffWebm(string $d): ?MimeType { + $length = strlen($d); + if ($length < 4 || substr($d, 0, 4) !== "\x1A\x45\xDF\xA3") { + return null; + } + $iter = 4; + while ($iter < 38 && $iter < $length) { + if (substr($d, $iter, 2) === "\x42\x82") { + $iter += 2; + if ($iter >= $length) { + return null; + } + $iter += static::parseVint($d, $iter); + } + } + return null; + } + + protected static function parseVint(string $d, int $iter): int { + return 1; + } + + protected static function sniffMp3(string $d): ?MimeType { + return null; + } +}