Browse Source

Tests for ad hoc content sniffing

master
J. King 4 years ago
parent
commit
251249b11e
  1. 8
      lib/Parser/Parser.php
  2. 18
      tests/cases/Parser/ParserTest.php

8
lib/Parser/Parser.php

@ -35,21 +35,21 @@ abstract class Parser {
return "application/xml";
} elseif (preg_match('/^\s*</s', $data)) {
// distinguish between XML feeds and HTML; first skip any comments before the root element
$offset = preg_match('/^\s*(?:<!--(?:[^\-]|-(?!->)*-->\s*)*/s', $data, $match) ? strlen($match[0]) : 0;
$offset = preg_match('/^\s*(<!(?=--)([^-]|-(?!->))*-->\s*)*/s', $data, $match) ? strlen($match[0]) : 0;
$prefix = substr($data, $offset, 100);
if (preg_match('/^<(?:!DOCTYPE\s+html|html|body|head|table|div|title|p|link|meta)[\s>]/si', $prefix)) {
return "text/html";
} elseif (preg_match('/^<rss[\s>\/]/', $prefix)) {
return "application/rss+xml";
} elseif (preg_match('/^<(?:[A-Za-z0-9\-\._]+:)?(feed|RDF)\s/', $prefix)) {
} elseif (preg_match('/^<(?:[A-Za-z0-9\-\._]+:)?(feed|RDF)\s/', $prefix, $match)) {
if ($match[1] === "feed") {
return "application/atom+xml";
} else {
return "applicatiojn/rdf+xml";
return "application/rdf+xml";
}
} else {
// FIIXME: Is there a better fallback that could used here?
"application/xml";
return "application/xml";
}
} else {
return "application/octet-stream";

18
tests/cases/Parser/ParserTest.php

@ -46,8 +46,22 @@ class ParserTest extends \PHPUnit\Framework\TestCase {
public function provideDetectableContent(): iterable {
return [
['{""}', "application/json"],
[" \n {\"v", "application/json"],
['{""}', "application/json"],
[" \n {\"v", "application/json"],
["<?xml", "application/xml"],
[" \n <?xml", "application/xml"],
["<!DOCTYPE html>", "text/html"],
["<!DOCTYPE html ", "text/html"],
[" \n <!DOCTYPE html>", "text/html"],
[" \n <!DOCTYPE html\n", "text/html"],
[" <!-- --> <!-- oops -->\n <rss>", "application/rss+xml"],
[" <!--> <!-- oops -->\n <rss>", "application/rss+xml"],
["<feed ", "application/atom+xml"],
["<atom:feed ", "application/atom+xml"],
["<RDF ", "application/rdf+xml"],
["<rdf:RDF ", "application/rdf+xml"],
["<opml>", "application/xml"],
["plain text", "application/octet-stream"],
];
}
}
Loading…
Cancel
Save