A lax Web news feed parser
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

304 lines
12 KiB

# For the purposes of testing Dublin Core metadata is considered a part of RSS 1.0
Empty entry:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<item/>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
RDF identifier:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<item rdf:about="blah"/>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- id: blah
Dublin Core identifier:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<channel>
<item rdf:about="bloo">
<dc:identifier>blah</dc:identifier>
</item>
<item rdf:about="bloo">
<term:identifier>bloo</term:identifier>
</item>
</channel>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- id: blah
- id: bloo
Dublin Core language:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item rdf:about="blah">
<dc:language>fr</dc:language>
</item>
<item rdf:about="bloo">
<term:language>de</term:language>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- id: blah
lang: fr
- id: bloo
lang: de
Entry link:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<item>
<link>http://example.com/</link>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- link: 'http://example.com/'
Dublin Core related link:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item><dc:references>http://example.com/</dc:references></item>
<item><dc:relation>http://example.com/</dc:relation></item>
<item><term:references>http://example.com/</term:references></item>
<item><term:relation>http://example.com/</term:relation></item>
<item>
<term:references>http://example.net/</term:references>
<term:relation>http://example.com/</term:relation>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- relatedLink: 'http://example.com/'
- relatedLink: 'http://example.com/'
- relatedLink: 'http://example.com/'
- relatedLink: 'http://example.com/'
- relatedLink: 'http://example.com/' # 'relation' is preferred over 'references' because the former's semantics are more likely to be a URL
Dublin Core dates:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<item>
<dc:date>2020-03-03T00:00:00Z</dc:date>
</item>
<item>
<dc:date>2020-03-03T00:00:00Z</dc:date>
<dc:date>2020-01-01T00:00:00Z</dc:date>
</item>
<item>
<dc:date>2020-03-03T01:00:00+01:00</dc:date>
<dc:date>2020-03-03T00:00:00Z</dc:date>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- dateModified: '2020-03-03T00:00:00Z'
- dateCreated: '2020-01-01T00:00:00Z'
dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T01:00:00+01:00'
Dublin Core terms dates:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item><dc:available>2020-03-03T00:00:00Z</dc:available></item>
<item><dc:created>2020-03-03T00:00:00Z</dc:created></item>
<item><dc:date>2020-03-03T00:00:00Z</dc:date></item>
<item><dc:dateAccepted>2020-03-03T00:00:00Z</dc:dateAccepted></item>
<item><dc:dateCopyrighted>2020-03-03T00:00:00Z</dc:dateCopyrighted></item>
<item><dc:dateSubmitted>2020-03-03T00:00:00Z</dc:dateSubmitted></item>
<item><dc:issued>2020-03-03T00:00:00Z</dc:issued></item>
<item><dc:modified>2020-03-03T00:00:00Z</dc:modified></item>
<item><dc:valid>2020-03-03T00:00:00Z</dc:valid></item>
<item><term:available>2020-03-03T00:00:00Z</term:available></item>
<item><term:created>2020-03-03T00:00:00Z</term:created></item>
<item><term:date>2020-03-03T00:00:00Z</term:date></item>
<item><term:dateAccepted>2020-03-03T00:00:00Z</term:dateAccepted></item>
<item><term:dateCopyrighted>2020-03-03T00:00:00Z</term:dateCopyrighted></item>
<item><term:dateSubmitted>2020-03-03T00:00:00Z</term:dateSubmitted></item>
<item><term:issued>2020-03-03T00:00:00Z</term:issued></item>
<item><term:modified>2020-03-03T00:00:00Z</term:modified></item>
<item><term:valid>2020-03-03T00:00:00Z</term:valid></item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- dateModified: '2020-03-03T00:00:00Z'
- dateCreated: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateCreated: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
- dateModified: '2020-03-03T00:00:00Z'
Entry and Dublin Core titles:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item><title>Loose title</title></item>
<item><dc:title>Plain title</dc:title></item>
<item><term:title>Plain title</term:title></item>
<item>
<dc:title>Plain title</dc:title>
<title>Loose title</title>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- title: {loose: 'Loose title'}
- title: {plain: 'Plain title'}
- title: {plain: 'Plain title'}
- title: {loose: 'Loose title'} # Native titles take precedence over DC titles
Entry content:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<item>
<description>Loose content</description>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- content: {loose: 'Loose content'}
Explicitly encoded content:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<item>
<description>Loose content</description>
<content:encoded>HTML content</content:encoded>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- content: {html: 'HTML content'}
Dublin Core summary:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item><dc:description>Plain summary</dc:description></item>
<item><dc:abstract>Plain summary</dc:abstract></item>
<item><term:description>Plain summary</term:description></item>
<item><term:abstract>Plain summary</term:abstract></item>
<item>
<term:description>Description</term:description>
<term:abstract>Abstract</term:abstract>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- summary: 'Plain summary'
- summary: 'Plain summary'
- summary: 'Plain summary'
- summary: 'Plain summary'
- summary: 'Abstract'
Dublin Core creators and contributors:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item>
<dc:creator>john.doe@example.com (John Doe)</dc:creator>
<term:creator>jane.doe@example.com (Jane Doe)</term:creator>
<dc:contributor>Larry</dc:contributor>
<term:contributor>Curly</term:contributor>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- people:
- name: 'John Doe'
mail: 'john.doe@example.com'
role: author
- name: 'Jane Doe'
mail: 'jane.doe@example.com'
role: author
- name: Larry
role: contributor
- name: Curly
role: contributor
Categories by way of Dublin Core subjects:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:term="http://purl.org/dc/terms/">
<item>
<dc:subject>Category, sort of</dc:subject>
<term:subject>blah</term:subject>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- categories:
- name: 'Category, sort of'
- name: 'blah'
Enclosures:
input: >
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:enc="http://purl.oclc.org/net/rss_2.0/enc#">
<item>
<enc:enclosure rdf:resource="http://example.com/" enc:type="image/svg+xml" enc:length="2112"/>
<enc:enclosure enc:url="http://example.net/" enc:type="image"/>
<enc:enclosure rdf:resource="http://example.net/enclosure.png" enc:length="bogus"/>
<enc:enclosure rdf:resource="http://[/" url="http://example.org/" type="image/svg+xml" enc:length="bogus" length="002112"/>
</item>
</rdf:RDF>
output:
format: rdf
version: '1.0'
entries:
- enclosures:
- url: 'http://example.com/'
type: 'image/svg+xml'
size: 2112
- url: 'http://example.net/'
type: 'image'
- url: 'http://example.net/enclosure.png'
type: 'image/png'
- url: 'http://example.org/'
type: 'image/svg+xml'
size: 2112