A lax Web news feed parser
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

159 lines
4.5 KiB

Dates in preferred format:
input:
head:
Date: 'Thu, 05 Mar 2020 23:08:55 GMT'
Expires: 'Wed, 11 Mar 2020 09:22:00 GMT'
Last-Modified: 'Tue, 03 Mar 2020 06:14:00 GMT'
output:
meta:
date: '2020-03-05T23:08:55Z'
expires: '2020-03-11T09:22:00Z'
lastModified: '2020-03-03T06:14:00Z'
Dates in RFC 850 format:
input:
head:
Date: 'Thursday, 05-Mar-20 23:08:55 GMT'
Expires: 'Wednesday, 11-Mar-20 09:22:00 GMT'
Last-Modified: 'Tuesday, 03-Mar-20 06:14:00 GMT'
output:
meta:
date: '2020-03-05T23:08:55Z'
expires: '2020-03-11T09:22:00Z'
lastModified: '2020-03-03T06:14:00Z'
Dates in asctime() format:
input:
head:
Date: 'Thu Mar 5 23:08:55 2020'
Expires: 'Wed Mar 11 09:22:00 2020'
Last-Modified: 'Tue Mar 3 06:14:00 2020'
output:
meta:
date: '2020-03-05T23:08:55Z'
expires: '2020-03-11T09:22:00Z'
lastModified: '2020-03-03T06:14:00Z'
Invalid dates:
input:
head:
Date:
- 'Thu, 05 Mar 2020 25:08:55 GMT' # impossible time
- 'Fri, 06 Mar 2020 09:22:00 EST' # bad timezone
- 'Wed, 03 Mar 2020 06:14:00 GMT' # wrong weekday
- 'Tue Mar 3 06:14:00 2020' # missing padding before single-digit day of month
- 'thu, 05 Mar 2020 23:08:55 GMT' # bad casing
output:
meta: {}
Content type:
input:
head:
Content-Type: 'application/atom+xml'
output:
meta:
type: 'application/atom+xml'
Invalid content type:
input:
head:
Content-Type: 'application/atom/xml'
output:
meta: {}
ETag:
input:
head:
ETag: '"jwhwh"'
output:
meta:
etag: '"jwhwh"'
Age:
input:
head:
Age: '012345'
output:
meta:
age: 'PT12345S'
Max age 1:
input:
head:
Cache-Control: 'max-age=123'
output:
meta:
maxAge: 'PT123S'
Max age 2:
input:
head:
Cache-Control: 'max-age="123"'
output:
meta:
maxAge: 'PT123S'
Max age 3:
input:
head:
Cache-Control: 'max-age="001\23"blah'
output:
meta:
maxAge: 'PT123S'
Max age 4:
input:
head:
Cache-Control: 's-maxage=123, s-maxage=22'
output:
meta:
maxAge: 'PT123S'
Max age 5:
input:
head:
Cache-Control:
- 'no-store, max-age=123'
- 'must-revalidate'
- 's-maxage=22'
output:
meta:
maxAge: 'PT22S'
Links:
doc_url: 'http://example.org/blah'
input:
head:
Link:
- '<http://example.com/>; rel="alternate"; type="appplication/atom+xml; charset=\"UTF-8\";", </>; rel=alternate; type="application/rss+xml"'
- '</favicon.ico>; rel="shortcut icon"'
- '<http://example.net/>;rev=help;rel=up'
- '<http://exampple.biz/>; rel="http://example.com/help" ; anchor="#ook" ; hreflang=en ; media=print ; title="Title!" ; title*="multilingual titles" ; ext="Extended Attribute"'
- '<http://[/>; rel="invalid URL"'
- '<>; rel= ; x="no relation"'
output:
meta:
url: 'http://example.org/blah'
links:
- url: 'http://example.com/'
rel: alternate
type: 'appplication/atom+xml;charset=UTF-8'
- url: ['/', 'http://example.org/blah']
rel: alternate
type: 'application/rss+xml'
- url: ['/favicon.ico', 'http://example.org/blah']
rel: shortcut
- url: ['/favicon.ico', 'http://example.org/blah']
rel: icon
- url: 'http://example.net/'
rel: up
- url: 'http://example.net/'
rel: help
rev: true
- url: 'http://exampple.biz/'
rel: 'http://example.com/help'
anchor: ['#ook', 'http://example.org/blah']
lang: en
media: print
title: 'Title!'
attr: {title*: 'multilingual titles', ext: 'Extended Attribute'}