. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $element The name of the element to which this attribute belongs. * @param array $allowed_html The full list of allowed elements and attributes. * @return bool Whether or not the attribute is allowed. */ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) { $name_low = strtolower( $name ); $element_low = strtolower( $element ); if ( ! isset( $allowed_html[ $element_low ] ) ) { $name = ''; $value = ''; $whole = ''; return false; } $allowed_attr = $allowed_html[ $element_low ]; if ( ! isset( $allowed_attr[ $name_low ] ) || '' === $allowed_attr[ $name_low ] ) { /* * Allow `data-*` attributes. * * When specifying `$allowed_html`, the attribute name should be set as * `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see * https://www.w3.org/TR/html40/struct/objects.html#adef-data). * * Note: the attribute name should only contain `A-Za-z0-9_-` chars, * double hyphens `--` are not accepted by WordPress. */ if ( strpos( $name_low, 'data-' ) === 0 && ! empty( $allowed_attr['data-*'] ) && preg_match( '/^data(?:-[a-z0-9_]+)+$/', $name_low, $match ) ) { /* * Add the whole attribute name to the allowed attributes and set any restrictions * for the `data-*` attribute values for the current element. */ $allowed_attr[ $match[0] ] = $allowed_attr['data-*']; } else { $name = ''; $value = ''; $whole = ''; return false; } } if ( 'style' === $name_low ) { $new_value = safecss_filter_attr( $value ); if ( empty( $new_value ) ) { $name = ''; $value = ''; $whole = ''; return false; } $whole = str_replace( $value, $new_value, $whole ); $value = $new_value; } if ( is_array( $allowed_attr[ $name_low ] ) ) { // There are some checks. foreach ( $allowed_attr[ $name_low ] as $currkey => $currval ) { if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) { $name = ''; $value = ''; $whole = ''; return false; } } } return true; } /** * Builds an attribute list from string containing attributes. * * This function does a lot of work. It parses an attribute list into an array * with attribute data, and tries to do the right thing even if it gets weird * input. It will add quotes around attribute values that don't have any quotes * or apostrophes around them, to make it easier to produce HTML code that will * conform to W3C's HTML specification. It will also remove bad URL protocols * from attribute values. It also reduces duplicate attributes by using the * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). * * @since 1.0.0 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { $attrarr = array(); $mode = 0; $attrname = ''; $uris = wp_kses_uri_attributes(); // Loop through the whole attribute list. while ( strlen( $attr ) != 0 ) { $working = 0; // Was the last operation successful? switch ( $mode ) { case 0: if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { $attrname = $match[1]; $working = 1; $mode = 1; $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); } break; case 1: if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. $working = 1; $mode = 2; $attr = preg_replace( '/^\s*=\s*/', '', $attr ); break; } if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. $working = 1; $mode = 0; if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y', ); } $attr = preg_replace( '/^\s+/', '', $attr ); } break; case 2: if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { // "value" $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n', ); } $working = 1; $mode = 0; $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); break; } if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { // 'value' $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n', ); } $working = 1; $mode = 0; $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); break; } if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { // value $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n', ); } // We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); } break; } // End switch. if ( 0 == $working ) { // Not well-formed, remove and try again. $attr = wp_kses_html_error( $attr ); $mode = 0; } } // End while. if ( 1 == $mode && false === array_key_exists( $attrname, $attrarr ) ) { // Special case, for when the attribute list ends with a valueless // attribute like "selected". $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y', ); } return $attrarr; } /** * Finds all attributes of an HTML element. * * Does not modify input. May return "evil" output. * * Based on `wp_kses_split2()` and `wp_kses_attr()`. * * @since 4.2.3 * * @param string $element HTML element. * @return array|false List of attributes found in the element. Returns false on failure. */ function wp_kses_attr_parse( $element ) { $valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches ); if ( 1 !== $valid ) { return false; } $begin = $matches[1]; $slash = $matches[2]; $elname = $matches[3]; $attr = $matches[4]; $end = $matches[5]; if ( '' !== $slash ) { // Closing elements do not get parsed. return false; } // Is there a closing XHTML slash at the end of the attributes? if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) { $xhtml_slash = $matches[0]; $attr = substr( $attr, 0, -strlen( $xhtml_slash ) ); } else { $xhtml_slash = ''; } // Split it. $attrarr = wp_kses_hair_parse( $attr ); if ( false === $attrarr ) { return false; } // Make sure all input is returned by adding front and back matter. array_unshift( $attrarr, $begin . $slash . $elname ); array_push( $attrarr, $xhtml_slash . $end ); return $attrarr; } /** * Builds an attribute list from string containing attributes. * * Does not modify input. May return "evil" output. * In case of unexpected input, returns false instead of stripping things. * * Based on `wp_kses_hair()` but does not return a multi-dimensional array. * * @since 4.2.3 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @return array|false List of attributes found in $attr. Returns false on failure. */ function wp_kses_hair_parse( $attr ) { if ( '' === $attr ) { return array(); } // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation $regex = '(?:' . '[_a-zA-Z][-_a-zA-Z0-9:.]*' // Attribute name. . '|' . '\[\[?[^\[\]]+\]\]?' // Shortcode in the name position implies unfiltered_html. . ')' . '(?:' // Attribute value. . '\s*=\s*' // All values begin with '='. . '(?:' . '"[^"]*"' // Double-quoted. . '|' . "'[^']*'" // Single-quoted. . '|' . '[^\s"\']+' // Non-quoted. . '(?:\s|$)' // Must have a space. . ')' . '|' . '(?:\s|$)' // If attribute has no value, space is required. . ')' . '\s*'; // Trailing space is optional except as mentioned above. // phpcs:enable // Although it is possible to reduce this procedure to a single regexp, // we must run that regexp twice to get exactly the expected result. $validation = "%^($regex)+$%"; $extraction = "%$regex%"; if ( 1 === preg_match( $validation, $attr ) ) { preg_match_all( $extraction, $attr, $attrarr ); return $attrarr[0]; } else { return false; } } /** * Performs different checks for attribute values. * * The currently implemented checks are "maxlen", "minlen", "maxval", "minval", * and "valueless". * * @since 1.0.0 * * @param string $value Attribute value. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $checkname What $checkvalue is checking for. * @param mixed $checkvalue What constraint the value should pass. * @return bool Whether check passes. */ function wp_kses_check_attr_val( $value, $vless, $checkname, $checkvalue ) { $ok = true; switch ( strtolower( $checkname ) ) { case 'maxlen': /* * The maxlen check makes sure that the attribute value has a length not * greater than the given value. This can be used to avoid Buffer Overflows * in WWW clients and various Internet servers. */ if ( strlen( $value ) > $checkvalue ) { $ok = false; } break; case 'minlen': /* * The minlen check makes sure that the attribute value has a length not * smaller than the given value. */ if ( strlen( $value ) < $checkvalue ) { $ok = false; } break; case 'maxval': /* * The maxval check does two things: it checks that the attribute value is * an integer from 0 and up, without an excessive amount of zeroes or * whitespace (to avoid Buffer Overflows). It also checks that the attribute * value is not greater than the given value. * This check can be used to avoid Denial of Service attacks. */ if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { $ok = false; } if ( $value > $checkvalue ) { $ok = false; } break; case 'minval': /* * The minval check makes sure that the attribute value is a positive integer, * and that it is not smaller than the given value. */ if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { $ok = false; } if ( $value < $checkvalue ) { $ok = false; } break; case 'valueless': /* * The valueless check makes sure if the attribute has a value * (like ``) or not (`