lax/lib/Parser/Construct.php

<?php
/** @license MIT
 * Copyright 2018 J. King et al.
 * See LICENSE and AUTHORS files for details */

declare(strict_types=1);
namespace JKingWeb\Lax\Parser;

use JKingWeb\Lax\Collection;
use JKingWeb\Lax\Date;
use JKingWeb\Lax\Url;

trait Construct {
    /** Trims plain text and collapses whitespace */
    protected function trimText(string $text): string {
        return trim(preg_replace("<\s{2,}>s", " ", $text));
    }

    /** Takes an HTML string as input and returns a sanitized version of that string
     * 
     * The $outputHtml parameter, when false, outputs only the plain-text content of the sanitized HTML
     */
    protected function sanitizeString(string $markup, bool $outputHtml = true): string {
        if (!preg_match("/<\S/", $markup)) {
            // if the string does not appear to actually contain markup besides entities, we can skip most of the sanitization
            return $outputHtml ? $markup : $this->trimText(html_entity_decode($markup, \ENT_QUOTES | \ENT_HTML5, "UTF-8"));
        } else {
            return "OOK!";
        }
    }

    /** Tests whether a string is a valid e-mail address
     * 
     * Accepts IDN hosts and Unicode localparts
     */
    protected function validateMail(string $addr): bool {
        $out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match);
        if (!$out) {
            return false;
        }
        $local = $match[1];
        $domain = $match[2];
        // PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first
        $domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII | \IDNA_CHECK_BIDI | \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)
        if ($domain !== false) {$addr = "$local@$domain";
            return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE);
        }
        return false;
    }

    protected function parseDate(string $date): ?Date {
        $out = null;
        $date = $this->trimText($date);
        if (!strlen($date)) {
            return $out;
        }
        $tz = new \DateTimeZone("UTC");
        foreach (Date::$supportedFormats as $format) {
            $out = Date::createFromFormat($format, $date, $tz);
            if ($out) {
                break;
            }
        }
        return $out ?: null;
    }

    protected function parseMediaType(string $type, ?Url $url = null): ?string {
        if (preg_match('<^\s*([0-9a-z]+(?:/[!#$%&\'\*\+\-\.^_`|~0-9a-z]+))(?:\s|;|$)>i', $type, $match)) {
            /* NOTE: The pattern used here is a subset of what is 
                technically allowed by RFC 7231: the "type" portion
                is supposed to be as general as the "subtype" portion,
                but in practice only alphabetic types have ever been
                registered, making a more specific pattern more
                practically useful for detecting media types.

                See:
                <https://tools.ietf.org/html/rfc7231#section-3.1.1.1>
                <https://tools.ietf.org/html/rfc7230#section-3.2.6>

                Additionally, types without subtypes are accepted as
                we foresee the general type still being useful to
                feed processors.
            */
            return strtolower($match[1]);
        }
        if ($url) {
            $file = substr($url->getPath(), (int) strrpos($url->getPath(), "/"));
            $ext = strrpos($file, ".");
            if ($ext !== false) {
                $ext = substr($file, $ext + 1);
                if (strlen($ext)) {
                    return ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext);
                }
            }
        }
        return null;
    }

    protected function empty($o): bool {
        return !array_filter((array) $o, function($v) {
            return !is_null($v) && (!$v instanceof Collection || sizeof($v) > 0);
        });
    }
}
Implement JSON Feed 2018-02-24 23:34:32 -05:00			`<?php`
			`/** @license MIT`
			`* Copyright 2018 J. King et al.`
			`* See LICENSE and AUTHORS files for details */`

			`declare(strict_types=1);`
Initial API redesign Lots of things are probably (still) broken 2020-02-18 18:50:02 -05:00			`namespace JKingWeb\Lax\Parser;`
Implement JSON Feed 2018-02-24 23:34:32 -05:00
Various changes - Preliminary work on enclosures - Feed and entry language - Better API docs - Class constant visibility 2020-03-03 11:25:38 -05:00			`use JKingWeb\Lax\Collection;`
Make collections serialize correctly to JSON; add helper Date class The new Date class, which extends DateTimeImmutable, has both __toString() and jsonSerialize() methods which return unambiguously formatted strings. 2018-02-26 10:45:25 -05:00			`use JKingWeb\Lax\Date;`
Properly implement media type detection 2020-03-04 11:49:36 -05:00			`use JKingWeb\Lax\Url;`
Implement JSON Feed 2018-02-24 23:34:32 -05:00
			`trait Construct {`
			`/** Trims plain text and collapses whitespace */`
			`protected function trimText(string $text): string {`
			`return trim(preg_replace("<\s{2,}>s", " ", $text));`
			`}`

			`/** Takes an HTML string as input and returns a sanitized version of that string`
			`*`
			`* The $outputHtml parameter, when false, outputs only the plain-text content of the sanitized HTML`
			`*/`
			`protected function sanitizeString(string $markup, bool $outputHtml = true): string {`
			`if (!preg_match("/<\S/", $markup)) {`
			`// if the string does not appear to actually contain markup besides entities, we can skip most of the sanitization`
			`return $outputHtml ? $markup : $this->trimText(html_entity_decode($markup, \ENT_QUOTES \| \ENT_HTML5, "UTF-8"));`
			`} else {`
			`return "OOK!";`
			`}`
			`}`

			`/** Tests whether a string is a valid e-mail address`
			`*`
Properly implement media type detection 2020-03-04 11:49:36 -05:00			`* Accepts IDN hosts and Unicode localparts`
Implement JSON Feed 2018-02-24 23:34:32 -05:00			`*/`
			`protected function validateMail(string $addr): bool {`
			`$out = preg_match("/^(.+?)@([^@]+)$/", $addr, $match);`
			`if (!$out) {`
			`return false;`
			`}`
			`$local = $match[1];`
			`$domain = $match[2];`
Properly implement media type detection 2020-03-04 11:49:36 -05:00			`// PHP's filter_var does not accept IDN hosts, so we have to perform an IDNA transformation first`
			`$domain = idn_to_ascii($domain, \IDNA_NONTRANSITIONAL_TO_ASCII \| \IDNA_CHECK_BIDI \| \IDNA_CHECK_CONTEXTJ, \INTL_IDNA_VARIANT_UTS46); // settings for IDNA2008 algorithm (I think)`
			`if ($domain !== false) {$addr = "$local@$domain";`
			`return (bool) filter_var($addr, \FILTER_VALIDATE_EMAIL, \FILTER_FLAG_EMAIL_UNICODE);`
Implement JSON Feed 2018-02-24 23:34:32 -05:00			`}`
Properly implement media type detection 2020-03-04 11:49:36 -05:00			`return false;`
Implement JSON Feed 2018-02-24 23:34:32 -05:00			`}`
Add feed modification dates 2018-02-25 22:53:02 -05:00
More JSON Feed functionality 2020-02-25 15:23:38 -05:00			`protected function parseDate(string $date): ?Date {`
Add feed modification dates 2018-02-25 22:53:02 -05:00			`$out = null;`
			`$date = $this->trimText($date);`
			`if (!strlen($date)) {`
			`return $out;`
			`}`
			`$tz = new \DateTimeZone("UTC");`
Make input date formats user-editable, and implicitly never default to current-time date-parts (especially microseconds in PHP 7.1). 2018-02-26 11:10:37 -05:00			`foreach (Date::$supportedFormats as $format) {`
Make collections serialize correctly to JSON; add helper Date class The new Date class, which extends DateTimeImmutable, has both __toString() and jsonSerialize() methods which return unambiguously formatted strings. 2018-02-26 10:45:25 -05:00			`$out = Date::createFromFormat($format, $date, $tz);`
Add feed modification dates 2018-02-25 22:53:02 -05:00			`if ($out) {`
			`break;`
			`}`
			`}`
			`return $out ?: null;`
			`}`
Various changes - Preliminary work on enclosures - Feed and entry language - Better API docs - Class constant visibility 2020-03-03 11:25:38 -05:00
Properly implement media type detection 2020-03-04 11:49:36 -05:00			`protected function parseMediaType(string $type, ?Url $url = null): ?string {`
			if (preg_match('<^\s([0-9a-z]+(?:/[!#$%&\'\\+\-\.^_`\|~0-9a-z]+))(?:\s\|;\|$)>i', $type, $match)) {
			`/* NOTE: The pattern used here is a subset of what is`
			`technically allowed by RFC 7231: the "type" portion`
			`is supposed to be as general as the "subtype" portion,`
			`but in practice only alphabetic types have ever been`
			`registered, making a more specific pattern more`
			`practically useful for detecting media types.`

			`See:`
			`<https://tools.ietf.org/html/rfc7231#section-3.1.1.1>`
			`<https://tools.ietf.org/html/rfc7230#section-3.2.6>`

			`Additionally, types without subtypes are accepted as`
			`we foresee the general type still being useful to`
			`feed processors.`
			`*/`
			`return strtolower($match[1]);`
			`}`
			`if ($url) {`
			`$file = substr($url->getPath(), (int) strrpos($url->getPath(), "/"));`
			`$ext = strrpos($file, ".");`
			`if ($ext !== false) {`
			`$ext = substr($file, $ext + 1);`
			`if (strlen($ext)) {`
			`return ($this->mime ?? ($this->mime = new \Mimey\MimeTypes))->getMimeType($ext);`
			`}`
			`}`
			`}`
			`return null;`
			`}`

Various changes - Preliminary work on enclosures - Feed and entry language - Better API docs - Class constant visibility 2020-03-03 11:25:38 -05:00			`protected function empty($o): bool {`
			`return !array_filter((array) $o, function($v) {`
			`return !is_null($v) && (!$v instanceof Collection \|\| sizeof($v) > 0);`
			`});`
			`}`
Implement JSON Feed 2018-02-24 23:34:32 -05:00			`}`