Browse Source

Add public API

master
J. King 11 months ago
parent
commit
bd9d5511f5
  1. 2
      composer.json
  2. 130
      lib/Microformats.php
  3. 5
      lib/Microformats/Parser.php
  4. 0
      lib/Microformats/Url.php

2
composer.json

@ -38,7 +38,7 @@
},
"autoload": {
"psr-4": {
"MensBeam\\Microformats\\": "lib/"
"MensBeam\\": "lib/"
}
},
"autoload-dev": {

130
lib/Microformats.php

@ -0,0 +1,130 @@
<?php
/** @license MIT
* Copyright 2023 J. King
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam;
use MensBeam\HTML\Parser as HTMLParser;
use MensBeam\Microformats\Parser as MfParser;
/** A generic parser for microformats
*
* It implements Microformats v2 as well as backwards-compatible processing of
* so-called "classic" or "backcompat" Microformats. Some of its functionality
* is optional. Where an $options array is a possible parameter, the following
* keys are understood:
*
* - `impliedTz` (bool) Whether to allow an implied datetime value to supply an
* implied timezone to datetimes without a timezone
* - `lang` (bool) Whether to include language information in microformat and
* rich-text structures
* - `simpleTrim` (bool) Whether to use the traditional "simple" whitespace
* trimming algorithm rather than the default, more aggressive trimming algorithm
*
* Currently all input is assumed to be HTML, but processing of generic XML
* data may be supported in future.
*/
class Microformats implements \ArrayAccess, \JsonSerializable {
protected $data;
/** Parses a file for microformats
*
* If reading the file fails `null` is returned.
*
* While fopen wrappers can be used to open remote resources over HTTP, no
* effort is made to support this specially by reading the `Content-Type`
* header or deducing the URL. Using a proper HTTP client such as Guzzle
* is highly recommended instead.
*
* @param string $file The file to read and parse
* @param string $contentType The HTTP Content-Type of the file if known, optionally with parameters
* @param string $url The effective URL (after redirections) of the file if known
* @param array $options Options for the parser; please see the class documentetation for details
*/
public static function fromFile(string $file, string $contentType, string $url, array $options = []): ?self {
$string = file_get_contents($file);
if ($string === false) {
return null;
}
return static::fromString($string, $contentType, $url, $options);
}
/** Parses a string for microformats
*
* @param string $input The string to parse
* @param string $contentType The HTTP Content-Type of the string if known, optionally with parameters
* @param string $url The effective URL (after redirections) of the string if known
* @param array $options Options for the parser; please see the class documentetation for details
*/
public static function fromString(string $input, string $contentType, string $url, array $options = []): self {
$parsed = HTMLParser::parse($input, $contentType);
return static::fromHTMLElement($parsed->document->documentElement, $url, $options);
}
/** Parses an HTML element for microformats
*
* @param \DOMElement $input The element to examine. Siblings and ancestors of this element will be ignored
* @param string $url The effective URL (after redirections) of the document if known
* @param array $options Options for the parser; please see the class documentetation for details
*/
public static function fromHTMLElement(\DOMElement $input, string $url, array $options = []): self {
return new static((new MfParser)->parseHTMLElement($input, $url, $options));
}
/** Imports a plain array into this wrapper class
*
* This is mainly useful for proper JSON serialization.
*
* @param array $data The complete Microformats associative array
*/
public function __construct(array $data) {
$this->data = $data;
}
public function offsetExists(mixed $offset): bool {
return isset($this->data[$offset]);
}
public function &offsetGet(mixed $offset): mixed {
return $this->data[$offset];
}
public function offsetSet(mixed $offset, mixed $value): void {
$this->data[$offset] = $value;
}
public function offsetUnset(mixed $offset): void {
unset($this->data[$offset]);
}
public function jsonSerialize(): mixed {
// In order for a Microformats structure to serialize to JSON correctly
// we must ensure empty hash tables serialize to objects rather than
// arrays as they otherwise would. This cannot cover all possible
// cases of manipulation, but does cover cases which normally occur
// with data in the wild.
$data = $this->data;
$walk = function(&$arr) {
foreach ($arr as $k => &$v) {
if (is_array($v)) {
if ($k === "properties" && !$v) {
$v = new \stdClass;
} else {
__FUNCTION__($v);
}
}
}
};
if (!$data['rels']) {
$data['rels'] = new \stdClass;
}
if (!$data['rel-urls']) {
$data['rel-urls'] = new \stdClass;
}
$walk($data['items']);
return $data;
}
}

5
lib/Parser.php → lib/Microformats/Parser.php

@ -16,11 +16,12 @@ use MensBeam\HTML\Parser\Serializer;
* is optional. Where an $options array is a possible parameter, the following
* keys are understood:
*
* - `simpleTrim` (bool) Whether to use the traditional "simple" whitespace trimming algorithm rather than the default, more aggressive trimming algorithm
* - `impliedTz` (bool) Whether to allow an implied datetime value to supply an implied timezone to datetimes without a timezone
* - `lang` (bool) Whether to include language information in microformat and rich-text structures
* - `simpleTrim` (bool) Whether to use the traditional "simple" whitespace trimming algorithm rather than the default, more aggressive trimming algorithm
*
* @internal */
* @internal
*/
class Parser {
/** @var array A ranking of prefixes (with 1 being least preferred) to break ties when multiple properties of the same name exist on one element */
protected const PREFIX_RANK = [

0
lib/Url.php → lib/Microformats/Url.php

Loading…
Cancel
Save