BossBey File Manager
PHP:
8.2.30
OS:
Linux
User:
imagivibe
Root
/
.
/
plugins
/
wordpress-importer
/
php-toolkit
/
DataLiberation
/
BlockMarkup
📤 Upload
📝 New File
📁 New Folder
Close
Editing: class-blockmarkupurlprocessor.php
<?php namespace WordPress\DataLiberation\BlockMarkup; use Rowbot\URL\URL; use WordPress\DataLiberation\URL\URLInTextProcessor; use WordPress\DataLiberation\URL\CSSURLProcessor; use WordPress\DataLiberation\URL\WPURL; /** * Reports all the URLs in the imported post and enables rewriting them. */ class BlockMarkupUrlProcessor extends BlockMarkupProcessor { private $raw_url; /** * @var URL */ private $parsed_url; private $base_url_string; private $base_url_object; private $url_in_text_processor; private $url_in_text_node_updated; private $css_url_processor; private $css_url_processor_updated; /** * The list of names of URL-related HTML attributes that may be available on * the current token. They will be inspected by next_url_attribute(). * * Possible values: * * - null: We haven't inspected any attribute yet. * - array: The first element is the currently inspected attribute * and the rest of the list are elements yet to be inspected on * the upcoming next_url_attribute() call. * - empty array: We've already inspected all the URL-related attributes. * * @var array<string>|null */ private $inspecting_html_attributes; public function __construct( $html, ?string $base_url_string = null ) { parent::__construct( $html ); $this->base_url_string = $base_url_string; $this->base_url_object = $base_url_string ? WPURL::parse( $base_url_string ) : null; } public function get_updated_html(): string { if ( $this->url_in_text_node_updated ) { $this->set_modifiable_text( $this->url_in_text_processor->get_updated_text() ); $this->url_in_text_node_updated = false; } if ( $this->css_url_processor_updated ) { if ( null !== $this->css_url_processor ) { $updated_css = $this->css_url_processor->get_updated_css(); $this->set_attribute( 'style', $updated_css ); } $this->css_url_processor_updated = false; } return parent::get_updated_html(); } public function get_raw_url() { return $this->raw_url; } public function get_parsed_url() { return $this->parsed_url; } public function next_token(): bool { $this->get_updated_html(); $this->raw_url = null; $this->parsed_url = null; $this->inspecting_html_attributes = null; $this->url_in_text_processor = null; $this->css_url_processor = null; /* * Do not reset url_in_text_node_updated or css_url_processor_updated – they're reset * in get_updated_html() which is called in parent::next_token(). */ return parent::next_token(); } public function next_url() { do { if ( $this->next_url_in_current_token() ) { return true; } } while ( false !== $this->next_token() ); return false; } public function next_url_in_current_token() { $this->raw_url = null; switch ( parent::get_token_type() ) { case '#tag': return $this->next_url_attribute(); case '#block-comment': return $this->next_url_block_attribute(); case '#text': return $this->next_url_in_text_node(); default: return false; } } private function next_url_in_text_node() { if ( '#text' !== $this->get_token_type() ) { return false; } if ( null === $this->url_in_text_processor ) { /* * Use the base URL for URLs matched in text nodes. This is the only * way to recognize a substring "WordPress.org" as a URL. We might * get some false positives this way, e.g. in this string: * * > And that's how you build a theme. Now let's take a look at..." * * `theme.Now` would be recognized as a URL. It's up to the API consumer * to filter out such false positives e.g. by checking the domain against * a list of accepted domains, or the TLD against a list of public suffixes. */ $this->url_in_text_processor = new URLInTextProcessor( $this->get_modifiable_text(), $this->base_url_string ); } while ( $this->url_in_text_processor->next_url() ) { $this->raw_url = $this->url_in_text_processor->get_raw_url(); $this->parsed_url = $this->url_in_text_processor->get_parsed_url(); return true; } return false; } /** * Advances to the next CSS URL in the `style` attribute of the current tag token. * * @return bool Whether a CSS URL was found. */ private function next_url_in_css() { if ( '#tag' !== $this->get_token_type() ) { return false; } if ( null === $this->css_url_processor ) { $css_value = $this->get_attribute( 'style' ); if ( ! is_string( $css_value ) ) { return false; } $this->css_url_processor = new CSSURLProcessor( $css_value ); } while ( $this->css_url_processor->next_url() ) { /** * Skip data URIs. They may be really large and they don't * have a hostname to migrate. */ if ( $this->css_url_processor->is_data_uri() ) { continue; } $this->raw_url = $this->css_url_processor->get_raw_url(); $this->parsed_url = WPURL::parse( $this->raw_url, $this->base_url_string ); if ( false === $this->parsed_url ) { continue; } return true; } return false; } private function next_url_attribute() { $tag = $this->get_tag(); // Check if we have a style attribute with CSS URLs to process. if ( null !== $this->css_url_processor ) { if ( $this->next_url_in_css() ) { return true; } // Done with CSS URLs in this attribute, apply any pending updates and move on. $this->get_updated_html(); $this->css_url_processor = null; } if ( null === $this->inspecting_html_attributes ) { if ( array_key_exists( $tag, self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM ) ) { /** * Initialize the list on the first call to next_url_attribute() * for the current token. The last element is the attribute we'll * inspect in the while() loop below. */ $this->inspecting_html_attributes = self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $tag ]; // Add style attribute to the list if it exists. if ( null !== $this->get_attribute( 'style' ) ) { $this->inspecting_html_attributes[] = 'style'; } } elseif ( null !== $this->get_attribute( 'style' ) ) { $this->inspecting_html_attributes = array( 'style' ); } else { return false; } } else { /** * Forget the attribute we've inspected on the previous call to * next_url_attribute(). */ array_pop( $this->inspecting_html_attributes ); } while ( count( $this->inspecting_html_attributes ) > 0 ) { $attr = $this->inspecting_html_attributes[ count( $this->inspecting_html_attributes ) - 1 ]; $url_maybe = $this->get_attribute( $attr ); if ( ! is_string( $url_maybe ) ) { array_pop( $this->inspecting_html_attributes ); continue; } // Rewrite any CSS `url()` declarations in the `style` attribute. if ( 'style' === $attr ) { $this->css_url_processor = new CSSURLProcessor( $url_maybe ); if ( $this->next_url_in_css() ) { return true; } // No CSS URLs found, move to next attribute. $this->css_url_processor = null; array_pop( $this->inspecting_html_attributes ); continue; } /* * Use base URL to resolve known URI attributes as we are certain we're * dealing with URI values. * With a base URL, the string "plugins.php" in <a href="plugins.php"> will * be correctly recognized as a URL. * Without a base URL, this Processor would incorrectly skip it. */ $parsed_url = WPURL::parse( $url_maybe, $this->base_url_string ); if ( false === $parsed_url ) { array_pop( $this->inspecting_html_attributes ); continue; } $this->raw_url = $url_maybe; $this->parsed_url = $parsed_url; return true; } return false; } private function next_url_block_attribute() { while ( $this->next_block_attribute() ) { $url_maybe = $this->get_block_attribute_value(); if ( ! is_string( $url_maybe ) || count( $this->get_block_attribute_path() ) > 1 ) { // @TODO: support arrays, objects, and other non-string data structures. continue; } /** * Decide whether the current block attribute holds a URL. * * Known URL attributes can be assumed to hold a URL and be * parsed with the base URL. For example, a "/about-us" value * in a wp:navigation-link block's `url` attribute is a * relative URL to the `/about-us` page. * * Other attributes may or may not contain URLs, but we cannot assume * they do. A value `/about-us` could be a relative URL or a class name. * In those cases, we'll let go of relative URLs and only detect * absolute URLs to avoid treating every string as a URL. This requires * parsing without a base URL. */ $is_relative_url_block_attribute = ( isset( self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ] ) && in_array( $this->get_block_attribute_key(), self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ], true ) ); /** * Filters whether a block attribute is known to contain a relative URL. * * This filter allows extending the list of block attributes that are * recognized as containing URLs. When a block attribute is marked as * a known URL attribute, it will be parsed with the base URL, allowing * relative URLs to be properly resolved. * * @since 6.8.0 * * @param bool $is_relative_url_block_attribute Whether the block attribute is known to contain a relative URL. * @param array $context { * Context information about the block attribute. * * @type string $block_name The name of the block (e.g., 'wp:image', 'wp:button'). * @type string $attribute_name The name of the attribute (e.g., 'url', 'href'). * } */ $is_relative_url_block_attribute = apply_filters( 'url_processor_is_relative_url_block_attribute', $is_relative_url_block_attribute, array( 'block_name' => $this->get_block_name(), 'attribute_name' => $this->get_block_attribute_key(), ) ); $parsed_url = false; if ( $is_relative_url_block_attribute ) { // Known relative URL attribute – let's parse with the base URL. $parsed_url = WPURL::parse( $url_maybe, $this->base_url_string ); } else { // Other attributes – let's parse without a base URL (and only detect absolute URLs). $parsed_url = WPURL::parse( $url_maybe ); } if ( false === $parsed_url ) { continue; } $this->raw_url = $url_maybe; $this->parsed_url = $parsed_url; return true; } return false; } /** * Replaces the currently matched URL with a new one. * * @param string $raw_url The raw URL. * @param URL $parsed_url The parsed version of the raw URL. It is required * as $raw_url might be a relative URL pointing to a different * host than this processor's base URL. * * @return bool True if the URL was set, false otherwise. */ public function set_url( $raw_url, $parsed_url ) { if ( null === $this->raw_url ) { return false; } $this->raw_url = $raw_url; $this->parsed_url = $parsed_url; switch ( parent::get_token_type() ) { case '#tag': // Check if we're processing a CSS URL. if ( null !== $this->css_url_processor ) { $this->css_url_processor_updated = true; return $this->css_url_processor->set_raw_url( $raw_url ); } $attr = $this->get_inspected_attribute_name(); if ( false === $attr ) { return false; } $this->set_attribute( $attr, $raw_url ); return true; case '#block-comment': return $this->set_block_attribute_value( $raw_url ); case '#text': if ( null === $this->url_in_text_processor ) { return false; } $this->url_in_text_node_updated = true; return $this->url_in_text_processor->set_raw_url( $raw_url ); } } /** * Rewrites the components of the currently matched URL from ones * provided in $from_url to ones specified in $to_url. * * It preserves the relative nature of the matched URL. * * @TODO: Should this method live in this class? It's specific to the import process * and the URL rewriting logic and has knowledge about the quirks of detecting * relative URLs in text nodes. On the other hand, the detection is performed * by this WPURL_In_Text_Processor class so maybe the two do go hand in hand? */ public function replace_base_url( URL $to_url, ?URL $base_url = null ) { $base_url = $base_url ?? $this->base_url_object; if ( ! $base_url ) { return false; } $result = WPURL::replace_base_url( $this->get_parsed_url(), array( 'old_base_url' => $base_url, 'new_base_url' => $to_url, 'raw_url' => $this->get_raw_url(), 'is_relative' => ( /** * In text nodes, the only detected URLs are absolute. The tricky part * is they may start without a protocol, e.g. `wordpress.org`. Therefore, * we need to tell WPURL::replace_base_url what's our intention regarding * the URL's relativity. It cannot just infer it from the URL itself. */ '#text' !== $this->get_token_type() && ! WPURL::can_parse( $this->get_raw_url() ) ), ) ); if ( false === $result ) { return false; } $this->set_url( $result . '', $result->new_url ); return true; } /** * Returns true if the currently matched URL is absolute. * * @return bool Whether the currently matched URL is absolute. */ public function is_url_absolute() { return WPURL::can_parse( $this->get_raw_url() ); } public function get_inspected_attribute_name() { if ( '#tag' !== $this->get_token_type() ) { return false; } if ( null === $this->inspecting_html_attributes ) { return false; } if ( empty( $this->inspecting_html_attributes ) ) { return false; } return $this->inspecting_html_attributes[ count( $this->inspecting_html_attributes ) - 1 ]; } /** * A list of block attributes that are known to contain URLs. * * It covers WordPress core blocks as of WordPress version 6.9. It can be * extended by plugins and themes via the "url_processor_is_relative_url_block_attribute" * filter. * * @var array */ public const BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array( 'wp:button' => array( 'url', 'linkTarget' ), 'wp:cover' => array( 'url' ), 'wp:embed' => array( 'url' ), 'wp:gallery' => array( 'url', 'fullUrl' ), 'wp:image' => array( 'url', 'src', 'href' ), 'wp:media-text' => array( 'mediaUrl', 'href' ), 'wp:navigation-link' => array( 'url' ), 'wp:navigation-submenu' => array( 'url' ), 'wp:rss' => array( 'feedURL' ), ); /** * A list of HTML attributes meant to contain URLs, as defined in the HTML specification. * It includes some deprecated attributes like `lowsrc` and `highsrc` for the `IMG` element. * * See https://html.spec.whatwg.org/multipage/indices.html#attributes-1. * See https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value. */ public const HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array( 'A' => array( 'href' ), 'APPLET' => array( 'codebase', 'archive' ), 'AREA' => array( 'href' ), 'AUDIO' => array( 'src' ), 'BASE' => array( 'href' ), 'BLOCKQUOTE' => array( 'cite' ), 'BODY' => array( 'background' ), 'BUTTON' => array( 'formaction' ), 'COMMAND' => array( 'icon' ), 'DEL' => array( 'cite' ), 'EMBED' => array( 'src' ), 'FORM' => array( 'action' ), 'FRAME' => array( 'longdesc', 'src' ), 'HEAD' => array( 'profile' ), 'HTML' => array( 'manifest' ), 'IFRAME' => array( 'longdesc', 'src' ), // SVG <image> element. 'IMAGE' => array( 'href' ), 'IMG' => array( 'longdesc', 'src', 'usemap', 'lowsrc', 'highsrc' ), 'INPUT' => array( 'src', 'usemap', 'formaction' ), 'INS' => array( 'cite' ), 'LINK' => array( 'href' ), 'OBJECT' => array( 'classid', 'codebase', 'data', 'usemap' ), 'Q' => array( 'cite' ), 'SCRIPT' => array( 'src' ), 'SOURCE' => array( 'src' ), 'TRACK' => array( 'src' ), 'VIDEO' => array( 'poster', 'src' ), ); /** * @TODO: Either explicitly support these attributes, or explicitly drop support for * handling their subsyntax. A generic URL matcher might be good enough. */ public const HTML_ATTRIBUTES_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array( '*' => array( 'style' ), // background(), background-image(). 'APPLET' => array( 'archive' ), 'IMG' => array( 'srcset' ), 'META' => array( 'content' ), 'SOURCE' => array( 'srcset' ), 'OBJECT' => array( 'archive' ), ); /** * Also <style> and <script> tag content can contain URLs. * <style> has specific syntax rules we can use for matching, but perhaps a generic matcher would be good enough? * * <style> * #domID { background:url(https://mysite.com/wp-content/uploads/image.png) } * </style> * * @TODO: Either explicitly support these tags, or explicitly drop support for * handling their subsyntax. A generic URL matcher might be good enough. */ public const HTML_TAGS_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array( 'STYLE', 'SCRIPT', ); }
Save
Cancel