Browse Source

Fixed more Feed bugs

- Two items with differing IDs but identical hashes were matching in the deduplicator; they shouldn't
- They would also match in the database matcher, and shouldn't
- The second-pass database matcher was overaggressively finding items as new
microsub
J. King 7 years ago
parent
commit
6771e8916a
  1. 9
      lib/Feed.php
  2. 3
      tests/Feed/TestFeed.php
  3. 37
      tests/docroot/Feed/Deduplication/IdenticalHashes.php

9
lib/Feed.php

@ -139,6 +139,8 @@ class Feed {
$out = [];
foreach($items as $item) {
foreach($out as $index => $check) {
// if the two items both have IDs and they differ, they do not match, regardless of hashes
if($item->id && $check->id && $item->id != $check->id) continue;
// if the two items have the same ID or any one hash matches, they are two versions of the same item
if(
($item->id && $check->id && $item->id == $check->id) ||
@ -185,6 +187,8 @@ class Feed {
foreach($items as $index => $i) {
$found = false;
foreach($articles as $a) {
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
if($i->id && $i->id !== $a['guid']) continue;
if(
// the item matches if the GUID matches...
($i->id && $i->id === $a['guid']) ||
@ -228,6 +232,8 @@ class Feed {
$i = $items[$index];
$found = false;
foreach($articles as $a) {
// if the item has an ID and it doesn't match the article ID, the two don't match, regardless of hashes
if($i->id && $i->id !== $a['guid']) continue;
if(
// the item matches if the GUID matches...
($i->id && $i->id === $a['guid']) ||
@ -252,9 +258,6 @@ class Feed {
$found = true;
break;
}
} else {
// if we don't have a match, add the item to the definite new list
$new[] = $index;
}
}
if(!$found) $new[] = $index;

3
tests/Feed/TestFeed.php

@ -27,6 +27,9 @@ class TestFeed extends \PHPUnit\Framework\TestCase {
$f = new Feed(null, $this->base."Deduplication/ID-Dates");
$this->assertCount(2, $f->newItems);
$this->assertTime($t, $f->newItems[0]->updatedDate);
$f = new Feed(null, $this->base."Deduplication/IdenticalHashes");
$this->assertCount(2, $f->newItems);
$this->assertTime($t, $f->newItems[0]->updatedDate);
}
function testHandleCacheHeadersOn304() {

37
tests/docroot/Feed/Deduplication/IdenticalHashes.php

@ -0,0 +1,37 @@
<?php return [
'mime' => "application/rss+xml",
'content' => <<<MESSAGE_BODY
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Test feed</title>
<link>http://example.com/</link>
<description>A basic feed for testing</description>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate> <!-- This is the correct item and date -->
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>1</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
<item>
<guid>2</guid>
<description>Sample article 2</description>
<pubDate>Sun, 19 May 2002 15:21:36 GMT</pubDate>
<atom:updated>2002-04-19T15:21:36Z</atom:updated>
</item>
</channel>
</rss>
MESSAGE_BODY
];
Loading…
Cancel
Save