.0 * * @return array[]|null { * Array of block structures. * * @type array ...$0 { * An associative array of a single parsed block object. See WP_Block_Parser_Block. * * @type string|null $blockName Name of block. * @type array $attrs Attributes from block comment delimiters. * @type array[] $innerBlocks List of inner blocks. An array of arrays that * have the same structure as this one. * @type string $innerHTML HTML from inside block comment delimiters. * @type array $innerContent List of string fragments and null markers where * inner blocks were found. * } * } */ public function extract_full_block_and_advance(): ?array { if ( $this->is_html() ) { $chunk = $this->get_html_content(); return array( 'blockName' => null, 'attrs' => array(), 'innerBlocks' => array(), 'innerHTML' => $chunk, 'innerContent' => array( $chunk ), ); } $block = array( 'blockName' => $this->get_block_type(), 'attrs' => $this->allocate_and_return_parsed_attributes() ?? array(), 'innerBlocks' => array(), 'innerHTML' => '', 'innerContent' => array(), ); $depth = $this->get_depth(); while ( $this->next_token() && $this->get_depth() > $depth ) { if ( $this->is_html() ) { $chunk = $this->get_html_content(); $block['innerHTML'] .= $chunk; $block['innerContent'][] = $chunk; continue; } /** * Inner blocks. * * @todo This is a decent place to call {@link \render_block()} * @todo Use iteration instead of recursion, or at least refactor to tail-call form. */ if ( $this->opens_block() ) { $inner_block = $this->extract_full_block_and_advance(); $block['innerBlocks'][] = $inner_block; $block['innerContent'][] = null; } /* * Because the parser has advanced past the closing block token, it * may be matched on an HTML span. This needs to be processed before * moving on to the next token at the start of the next loop iteration. */ if ( $this->is_html() ) { $chunk = $this->get_html_content(); $block['innerHTML'] .= $chunk; $block['innerContent'][] = $chunk; } } return $block; } /** * Returns the byte-offset after the ending character of an HTML comment, * assuming the proper starting byte offset. * * @since 6.9.0 * * @param int $comment_starting_at Where the HTML comment started, the leading `<`. * @param int $search_end Last offset in which to search, for limiting search span. * @return int Offset after the current HTML comment ends, or `$search_end` if no end was found. */ private function find_html_comment_end( int $comment_starting_at, int $search_end ): int { $text = $this->source_text; // Find span-of-dashes comments which look like ``. $span_of_dashes = strspn( $text, '-', $comment_starting_at + 2 ); if ( $comment_starting_at + 2 + $span_of_dashes < $search_end && '>' === $text[ $comment_starting_at + 2 + $span_of_dashes ] ) { return $comment_starting_at + $span_of_dashes + 1; } // Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`. $now_at = $comment_starting_at + 4; while ( $now_at < $search_end ) { $dashes_at = strpos( $text, '--', $now_at ); if ( false === $dashes_at ) { return $search_end; } $closer_must_be_at = $dashes_at + 2 + strspn( $text, '-', $dashes_at + 2 ); if ( $closer_must_be_at < $search_end && '!' === $text[ $closer_must_be_at ] ) { ++$closer_must_be_at; } if ( $closer_must_be_at < $search_end && '>' === $text[ $closer_must_be_at ] ) { return $closer_must_be_at + 1; } ++$now_at; } return $search_end; } /** * Indicates if the last attempt to parse a block comment delimiter * failed, if set, otherwise `null` if the last attempt succeeded. * * @since 6.9.0 * * @return string|null Error from last attempt at parsing next block delimiter, * or `null` if last attempt succeeded. */ public function get_last_error(): ?string { return $this->last_error; } /** * Indicates if the last attempt to parse a block’s JSON attributes failed. * * @see \json_last_error() * * @since 6.9.0 * * @return int JSON_ERROR_ code from last attempt to parse block JSON attributes. */ public function get_last_json_error(): int { return $this->last_json_error; } /** * Returns the type of the block comment delimiter. * * One of: * * - {@see self::OPENER} * - {@see self::CLOSER} * - {@see self::VOID} * - `null` * * @since 6.9.0 * * @return string|null type of the block comment delimiter, if currently matched. */ public function get_delimiter_type(): ?string { switch ( $this->state ) { case self::HTML_SPAN: return self::VOID; case self::MATCHED: return $this->type; default: return null; } } /** * Returns whether the delimiter contains the closing flag. * * This should be avoided except in cases of custom error-handling * with block closers containing the void flag. For normative use, * {@see self::get_delimiter_type()}. * * @since 6.9.0 * * @return bool Whether the currently-matched block delimiter contains the closing flag. */ public function has_closing_flag(): bool { return $this->has_closing_flag; } /** * Indicates if the block delimiter represents a block of the given type. * * Since the “core” namespace may be implicit, it’s allowable to pass * either the fully-qualified block type with namespace and block name * as well as the shorthand version only containing the block name, if * the desired block is in the “core” namespace. * * Since freeform HTML content is non-block content, it has no block type. * Passing the wildcard “*” will, however, return true for all block types, * even the implicit freeform content, though not for spans of inner HTML. * * Example: * * $is_core_paragraph = $processor->is_block_type( 'paragraph' ); * $is_core_paragraph = $processor->is_block_type( 'core/paragraph' ); * $is_formula = $processor->is_block_type( 'math-block/formula' ); * * @param string $block_type Block type name for the desired block. * E.g. "paragraph", "core/paragraph", "math-blocks/formula". * @return bool Whether this delimiter represents a block of the given type. */ public function is_block_type( string $block_type ): bool { if ( '*' === $block_type ) { return true; } if ( $this->is_html() ) { // This is a core/freeform text block, it’s special. if ( 0 === ( $this->open_blocks_length[0] ?? null ) ) { return ( 'core/freeform' === $block_type || 'freeform' === $block_type ); } // Otherwise this is innerHTML and not a block. return false; } return $this->are_equal_block_types( $this->source_text, $this->namespace_at, $this->name_at - $this->namespace_at + $this->name_length, $block_type, 0, strlen( $block_type ) ); } /** * Given two spans of text, indicate if they represent identical block types. * * This function normalizes block types to account for implicit core namespacing. * * Note! This function only returns valid results when the complete block types are * represented in the span offsets and lengths. This means that the full optional * namespace and block name must be represented in the input arguments. * * Example: * * 0 5 10 15 20 25 30 35 40 * $text = ''; * * true === WP_Block_Processor::are_equal_block_types( $text, 9, 5, $text, 27, 10 ); * false === WP_Block_Processor::are_equal_block_types( $text, 9, 5, 'my/block', 0, 8 ); * * @since 6.9.0 * * @param string $a_text Text in which first block type appears. * @param int $a_at Byte offset into text in which first block type starts. * @param int $a_length Byte length of first block type. * @param string $b_text Text in which second block type appears (may be the same as the first text). * @param int $b_at Byte offset into text in which second block type starts. * @param int $b_length Byte length of second block type. * @return bool Whether the spans of text represent identical block types, normalized for namespacing. */ public static function are_equal_block_types( string $a_text, int $a_at, int $a_length, string $b_text, int $b_at, int $b_length ): bool { $a_ns_length = strcspn( $a_text, '/', $a_at, $a_length ); $b_ns_length = strcspn( $b_text, '/', $b_at, $b_length ); $a_has_ns = $a_ns_length !== $a_length; $b_has_ns = $b_ns_length !== $b_length; // Both contain namespaces. if ( $a_has_ns && $b_has_ns ) { if ( $a_length !== $b_length ) { return false; } $a_block_type = substr( $a_text, $a_at, $a_length ); return 0 === substr_compare( $b_text, $a_block_type, $b_at, $b_length ); } if ( $a_has_ns ) { $b_block_type = 'core/' . substr( $b_text, $b_at, $b_length ); return ( strlen( $b_block_type ) === $a_length && 0 === substr_compare( $a_text, $b_block_type, $a_at, $a_length ) ); } if ( $b_has_ns ) { $a_block_type = 'core/' . substr( $a_text, $a_at, $a_length ); return ( strlen( $a_block_type ) === $b_length && 0 === substr_compare( $b_text, $a_block_type, $b_at, $b_length ) ); } // Neither contains a namespace. if ( $a_length !== $b_length ) { return false; } $a_name = substr( $a_text, $a_at, $a_length ); return 0 === substr_compare( $b_text, $a_name, $b_at, $b_length ); } /** * Indicates if the matched delimiter is an opening or void delimiter of the given type, * if a type is provided, otherwise if it opens any block or implicit freeform HTML content. * * This is a helper method to ease handling of code inspecting where blocks start, and for * checking if the blocks are of a given type. The function is variadic to allow for * checking if the delimiter opens one of many possible block types. * * To advance to the start of a block {@see self::next_block()}. * * Example: * * $processor = new WP_Block_Processor( $html ); * while ( $processor->next_delimiter() ) { * if ( $processor->opens_block( 'core/code', 'syntaxhighlighter/code' ) ) { * echo "Found code!"; * continue; * } * * if ( $processor->opens_block( 'core/image' ) ) { * echo "Found an image!"; * continue; * } * * if ( $processor->opens_block() ) { * echo "Found a new block!"; * } * } * * @since 6.9.0 * * @see self::is_block_type() * * @param string[] $block_type Optional. Is the matched block type one of these? * If none are provided, will not test block type. * @return bool Whether the matched block delimiter opens a block, and whether it * opens a block of one of the given block types, if provided. */ public function opens_block( string ...$block_type ): bool { // HTML spans only open implicit freeform content at the top level. if ( self::HTML_SPAN === $this->state && 1 !== count( $this->open_blocks_at ) ) { return false; } /* * Because HTML spans are discovered after the next delimiter is found, * the delimiter type when visiting HTML spans refers to the type of the * following delimiter. Therefore the HTML case is handled by checking * the state and depth of the stack of open block. */ if ( self::CLOSER === $this->type && ! $this->is_html() ) { return false; } if ( count( $block_type ) === 0 ) { return true; } foreach ( $block_type as $block ) { if ( $this->is_block_type( $block ) ) { return true; } } return false; } /** * Indicates if the matched delimiter is an HTML span. * * @since 6.9.0 * * @see self::is_non_whitespace_html() * * @return bool Whether the processor is matched on an HTML span. */ public function is_html(): bool { return self::HTML_SPAN === $this->state; } /** * Indicates if the matched delimiter is an HTML span and comprises more * than whitespace characters, i.e. contains real content. * * Many block serializers introduce newlines between block delimiters, * so the presence of top-level non-block content does not imply that * there are “real” freeform HTML blocks. Checking if there is content * beyond whitespace is a more certain check, such as for determining * whether to load CSS for the freeform or fallback block type. * * @since 6.9.0 * * @see self::is_html() * * @return bool Whether the currently-matched delimiter is an HTML * span containing non-whitespace text. */ public function is_non_whitespace_html(): bool { if ( ! $this->is_html() ) { return false; } $length = $this->matched_delimiter_at - $this->after_previous_delimiter; $whitespace_length = strspn( $this->source_text, " \t\f\r\n", $this->after_previous_delimiter, $length ); return $whitespace_length !== $length; } /** * Returns the string content of a matched HTML span, or `null` otherwise. * * @since 6.9.0 * * @return string|null Raw HTML content, or `null` if not currently matched on HTML. */ public function get_html_content(): ?string { if ( ! $this->is_html() ) { return null; } return substr( $this->source_text, $this->after_previous_delimiter, $this->matched_delimiter_at - $this->after_previous_delimiter ); } /** * Allocates a substring for the block type and returns the fully-qualified * name, including the namespace, if matched on a delimiter, otherwise `null`. * * This function is like {@see self::get_printable_block_type()} but when * paused on a freeform HTML block, will return `null` instead of “core/freeform”. * The `null` behavior matches what {@see \parse_blocks()} returns but may not * be as useful as having a string value. * * This function allocates a substring for the given block type. This * allocation will be small and likely fine in most cases, but it's * preferable to call {@see self::is_block_type()} if only needing * to know whether the delimiter is for a given block type, as that * function is more efficient for this purpose and avoids the allocation. * * Example: * * // Avoid. * 'core/paragraph' = $processor->get_block_type(); * * // Prefer. * $processor->is_block_type( 'core/paragraph' ); * $processor->is_block_type( 'paragraph' ); * $processor->is_block_type( 'core/freeform' ); * * // Freeform HTML content has no block type. * $processor = new WP_Block_Processor( 'non-block content' ); * $processor->next_token(); * null === $processor->get_block_type(); * * @since 6.9.0 * * @see self::are_equal_block_types() * * @return string|null Fully-qualified block namespace and type, e.g. "core/paragraph", * if matched on an explicit delimiter, otherwise `null`. */ public function get_block_type(): ?string { if ( self::READY === $this->state || self::COMPLETE === $this->state || self::INCOMPLETE_INPUT === $this->state ) { return null; } // This is a core/freeform text block, it’s special. if ( $this->is_html() ) { return null; } $block_type = substr( $this->source_text, $this->namespace_at, $this->name_at - $this->namespace_at + $this->name_length ); return self::normalize_block_type( $block_type ); } /** * Allocates a printable substring for the block type and returns the fully-qualified * name, including the namespace, if matched on a delimiter or freeform block, otherwise `null`. * * This function is like {@see self::get_block_type()} but when paused on a freeform * HTML block, will return “core/freeform” instead of `null`. The `null` behavior matches * what {@see \parse_blocks()} returns but may not be as useful as having a string value. * * This function allocates a substring for the given block type. This * allocation will be small and likely fine in most cases, but it's * preferable to call {@see self::is_block_type()} if only needing * to know whether the delimiter is for a given block type, as that * function is more efficient for this purpose and avoids the allocation. * * Example: * * // Avoid. * 'core/paragraph' = $processor->get_printable_block_type(); * * // Prefer. * $processor->is_block_type( 'core/paragraph' ); * $processor->is_block_type( 'paragraph' ); * $processor->is_block_type( 'core/freeform' ); * * // Freeform HTML content is given an implicit type. * $processor = new WP_Block_Processor( 'non-block content' ); * $processor->next_token(); * 'core/freeform' === $processor->get_printable_block_type(); * * @since 6.9.0 * * @see self::are_equal_block_types() * * @return string|null Fully-qualified block namespace and type, e.g. "core/paragraph", * if matched on an explicit delimiter or freeform block, otherwise `null`. */ public function get_printable_block_type(): ?string { if ( self::READY === $this->state || self::COMPLETE === $this->state || self::INCOMPLETE_INPUT === $this->state ) { return null; } // This is a core/freeform text block, it’s special. if ( $this->is_html() ) { return 1 === count( $this->open_blocks_at ) ? 'core/freeform' : '#innerHTML'; } $block_type = substr( $this->source_text, $this->namespace_at, $this->name_at - $this->namespace_at + $this->name_length ); return self::normalize_block_type( $block_type ); } /** * Normalizes a block name to ensure that missing implicit “core” namespaces are present. * * Example: * * 'core/paragraph' === WP_Block_Processor::normalize_block_byte( 'paragraph' ); * 'core/paragraph' === WP_Block_Processor::normalize_block_byte( 'core/paragraph' ); * 'my/paragraph' === WP_Block_Processor::normalize_block_byte( 'my/paragraph' ); * * @since 6.9.0 * * @param string $block_type Valid block name, potentially without a namespace. * @return string Fully-qualified block type including namespace. */ public static function normalize_block_type( string $block_type ): string { return false === strpos( $block_type, '/' ) ? "core/{$block_type}" : $block_type; } /** * Returns a lazy wrapper around the block attributes, which can be used * for efficiently interacting with the JSON attributes. * * This stub hints that there should be a lazy interface for parsing * block attributes but doesn’t define it. It serves both as a placeholder * for one to come as well as a guard against implementing an eager * function in its place. * * @throws Exception This function is a stub for subclasses to implement * when providing streaming attribute parsing. * * @since 6.9.0 * * @see self::allocate_and_return_parsed_attributes() * * @return never */ public function get_attributes() { throw new Exception( 'Lazy attribute parsing not yet supported' ); } /** * Attempts to parse and return the entire JSON attributes from the delimiter, * allocating memory and processing the JSON span in the process. * * This does not return any parsed attributes for a closing block delimiter * even if there is a span of JSON content; this JSON is a parsing error. * * Consider calling {@see static::get_attributes()} instead if it's not * necessary to read all the attributes at the same time, as that provides * a more efficient mechanism for typical use cases. * * Since the JSON span inside the comment delimiter may not be valid JSON, * this function will return `null` if it cannot parse the span and set the * {@see static::get_last_json_error()} to the appropriate JSON_ERROR_ constant. * * If the delimiter contains no JSON span, it will also return `null`, * but the last error will be set to {@see \JSON_ERROR_NONE}. * * Example: * * $processor = new WP_Block_Processor( '' ); * $processor->next_delimiter(); * $memory_hungry_and_slow_attributes = $processor->allocate_and_return_parsed_attributes(); * $memory_hungry_and_slow_attributes === array( 'url' => 'https://wordpress.org/favicon.ico' ); * * $processor = new WP_Block_Processor( '' ); * $processor->next_delimiter(); * null = $processor->allocate_and_return_parsed_attributes(); * JSON_ERROR_NONE = $processor->get_last_json_error(); * * $processor = new WP_Block_Processor( '' ); * $processor->next_delimiter(); * array() === $processor->allocate_and_return_parsed_attributes(); * * $processor = new WP_Block_Processor( '' ); * $processor->next_delimiter(); * null = $processor->allocate_and_return_parsed_attributes(); * * $processor = new WP_Block_Processor( '' ); * $processor->next_delimiter(); * null = $processor->allocate_and_return_parsed_attributes(); * JSON_ERROR_CTRL_CHAR = $processor->get_last_json_error(); * * @since 6.9.0 * * @return array|null Parsed JSON attributes, if present and valid, otherwise `null`. */ public function allocate_and_return_parsed_attributes(): ?array { $this->last_json_error = JSON_ERROR_NONE; if ( self::CLOSER === $this->type || $this->is_html() || 0 === $this->json_length ) { return null; } $json_span = substr( $this->source_text, $this->json_at, $this->json_length ); $parsed = json_decode( $json_span, null, 512, JSON_OBJECT_AS_ARRAY | JSON_INVALID_UTF8_SUBSTITUTE ); $last_error = json_last_error(); $this->last_json_error = $last_error; return ( JSON_ERROR_NONE === $last_error && is_array( $parsed ) ) ? $parsed : null; } /** * Returns the span representing the currently-matched delimiter, if matched, otherwise `null`. * * Example: * * $processor = new WP_Block_Processor( '' ); * null === $processor->get_span(); * * $processor->next_delimiter(); * WP_HTML_Span( 0, 17 ) === $processor->get_span(); * * @since 6.9.0 * * @return WP_HTML_Span|null Span of text in source text spanning matched delimiter. */ public function get_span(): ?WP_HTML_Span { switch ( $this->state ) { case self::HTML_SPAN: return new WP_HTML_Span( $this->after_previous_delimiter, $this->matched_delimiter_at - $this->after_previous_delimiter ); case self::MATCHED: return new WP_HTML_Span( $this->matched_delimiter_at, $this->matched_delimiter_length ); default: return null; } } // // Constant declarations that would otherwise pollute the top of the class. // /** * Indicates that the block comment delimiter closes an open block. * * @see self::$type * * @since 6.9.0 */ const CLOSER = 'closer'; /** * Indicates that the block comment delimiter opens a block. * * @see self::$type * * @since 6.9.0 */ const OPENER = 'opener'; /** * Indicates that the block comment delimiter represents a void block * with no inner content of any kind. * * @see self::$type * * @since 6.9.0 */ const VOID = 'void'; /** * Indicates that the processor is ready to start parsing but hasn’t yet begun. * * @see self::$state * * @since 6.9.0 */ const READY = 'processor-ready'; /** * Indicates that the processor is matched on an explicit block delimiter. * * @see self::$state * * @since 6.9.0 */ const MATCHED = 'processor-matched'; /** * Indicates that the processor is matched on the opening of an implicit freeform delimiter. * * @see self::$state * * @since 6.9.0 */ const HTML_SPAN = 'processor-html-span'; /** * Indicates that the parser started parsing a block comment delimiter, but * the input document ended before it could finish. The document was likely truncated. * * @see self::$state * * @since 6.9.0 */ const INCOMPLETE_INPUT = 'incomplete-input'; /** * Indicates that the processor has finished parsing and has nothing left to scan. * * @see self::$state * * @since 6.9.0 */ const COMPLETE = 'processor-complete'; }