WordPressを読む 25-2 /blog/wp-includes/formatting.php 2
2014/12/09
目次
/blog/wp-includes/formatting.php 2
関数 wpautop()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | function wpautop($pee, $br = true) { $pre_tags = array(); if ( trim($pee) === '' ) return ''; $pee = $pee . "\n"; // just to make things a little easier, pad the end if ( strpos($pee, '<p re') !== false ) { $pee_parts = explode( '</p re>', $pee ); $last_pee = array_pop($pee_parts); $pee = ''; $i = 0; foreach ( $pee_parts as $pee_part ) { $start = strpos($pee_part, '<p re'); // Malformed html? if ( $start === false ) { $pee .= $pee_part; continue; } $name = "<p re wp-pre-tag-$i></p re>"; $pre_tags[$name] = substr( $pee_part, $start ) . '</p re>'; $pee .= substr( $pee_part, 0, $start ) . $name; $i++; } $pee .= $last_pee; } $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee); // Space things out a little $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|details|menu|summary)'; $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee); $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines if ( strpos( $pee, '<option' ) !== false ) { // no P/BR around option $pee = preg_replace( '|\s*<option|', '<option', $pee ); $pee = preg_replace( '|</option>\s*|', '</option>', $pee ); } if ( strpos( $pee, '</object>' ) !== false ) { // no P/BR around param and embed $pee = preg_replace( '|(<object[^>]*>)\s*|', '$1', $pee ); $pee = preg_replace( '|\s*</object>|', '</object>', $pee ); $pee = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $pee ); } if ( strpos( $pee, '<source' ) !== false || strpos( $pee, '<track' ) !== false ) { // no P/BR around source and track $pee = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $pee ); $pee = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $pee ); $pee = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $pee ); } $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates // make paragraphs, including one at the end $pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY); $pee = ''; foreach ( $pees as $tinkle ) { $pee .= '<p>' . trim($tinkle, "\n") . "</p>\n"; } $pee = preg_replace('|<p>\s*</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace $pee = preg_replace('!<p>([^<]+)</(div|address|form)>!', "<p>$1</p></$2>", $pee); $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee); $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee); $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee); $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); if ( $br ) { $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee); $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks $pee = str_replace('<WPPreserveNewline />', "\n", $pee); } $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee); $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee); $pee = preg_replace( "|\n</p>$|", '</p>', $pee ); if ( !empty($pre_tags) ) $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee); return $pee; } |
関数 _autop_newline_preservation_helper()
1 2 3 | function _autop_newline_preservation_helper( $matches ) { return str_replace("\n", "<WPPreserveNewline />", $matches[0]); } |
関数 shortcode_unautop()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | function shortcode_unautop( $pee ) { global $shortcode_tags; if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) { return $pee; } $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); $spaces = wp_spaces_regexp(); $pattern = '/' . '<p>' // Opening paragraph . '(?:' . $spaces . ')*+' // Optional leading whitespace . '(' // 1: The shortcode . '\\[' // Opening bracket . "($tagregexp)" // 2: Shortcode name . '(?![\\w-])' // Not followed by word character or hyphen // Unroll the loop: Inside the opening shortcode tag . '[^\\]\\/]*' // Not a closing bracket or forward slash . '(?:' . '\\/(?!\\])' // A forward slash not followed by a closing bracket . '[^\\]\\/]*' // Not a closing bracket or forward slash . ')*?' . '(?:' . '\\/\\]' // Self closing tag and closing bracket . '|' . '\\]' // Closing bracket . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags . '[^\\[]*+' // Not an opening bracket . '(?:' . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag . '[^\\[]*+' // Not an opening bracket . ')*+' . '\\[\\/\\2\\]' // Closing shortcode tag . ')?' . ')' . ')' . '(?:' . $spaces . ')*+' // optional trailing whitespace . '<\\/p>' // closing paragraph . '/s'; return preg_replace( $pattern, '$1', $pee ); } |
関数 seems_utf8()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | function seems_utf8($str) { mbstring_binary_safe_encoding(); $length = strlen($str); reset_mbstring_encoding(); for ($i=0; $i < $length; $i++) { $c = ord($str[$i]); if ($c < 0x80) $n = 0; # 0bbbbbbb elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b else return false; # Does not match any model for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) return false; } } return true; } |
関数 _wp_specialchars()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { $string = (string) $string; if ( 0 === strlen( $string ) ) return ''; // Don't bother if there are no specialchars - saves some processing if ( ! preg_match( '/[&<>"\']/', $string ) ) return $string; // Account for the previous behaviour of the function when the $quote_style is not an accepted value if ( empty( $quote_style ) ) $quote_style = ENT_NOQUOTES; elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) $quote_style = ENT_QUOTES; // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() if ( ! $charset ) { static $_charset; if ( ! isset( $_charset ) ) { $alloptions = wp_load_alloptions(); $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; } $charset = $_charset; } if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) $charset = 'UTF-8'; $_quote_style = $quote_style; if ( $quote_style === 'double' ) { $quote_style = ENT_COMPAT; $_quote_style = ENT_COMPAT; } elseif ( $quote_style === 'single' ) { $quote_style = ENT_NOQUOTES; } // Handle double encoding ourselves if ( $double_encode ) { $string = @htmlspecialchars( $string, $quote_style, $charset ); } else { // Decode & into & $string = wp_specialchars_decode( $string, $_quote_style ); // Guarantee every &entity; is valid or re-encode the & $string = wp_kses_normalize_entities( $string ); // Now re-encode everything except &entity; $string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE ); for ( $i = 0; $i < count( $string ); $i += 2 ) $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset ); $string = implode( '', $string ); } // Backwards compatibility if ( 'single' === $_quote_style ) $string = str_replace( "'", ''', $string ); return $string; } |
関数 wp_specialchars_decode()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) { $string = (string) $string; if ( 0 === strlen( $string ) ) { return ''; } // Don't bother if there are no entities - saves a lot of processing if ( strpos( $string, '&' ) === false ) { return $string; } // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value if ( empty( $quote_style ) ) { $quote_style = ENT_NOQUOTES; } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { $quote_style = ENT_QUOTES; } // More complete than get_html_translation_table( HTML_SPECIALCHARS ) $single = array( ''' => '\'', ''' => '\'' ); $single_preg = array( '/�*39;/' => ''', '/�*27;/i' => ''' ); $double = array( '"' => '"', '"' => '"', '"' => '"' ); $double_preg = array( '/�*34;/' => '"', '/�*22;/i' => '"' ); $others = array( '<' => '<', '<' => '<', '>' => '>', '>' => '>', '&' => '&', '&' => '&', '&' => '&' ); $others_preg = array( '/�*60;/' => '<', '/�*62;/' => '>', '/�*38;/' => '&', '/�*26;/i' => '&' ); if ( $quote_style === ENT_QUOTES ) { $translation = array_merge( $single, $double, $others ); $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); } elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) { $translation = array_merge( $double, $others ); $translation_preg = array_merge( $double_preg, $others_preg ); } elseif ( $quote_style === 'single' ) { $translation = array_merge( $single, $others ); $translation_preg = array_merge( $single_preg, $others_preg ); } elseif ( $quote_style === ENT_NOQUOTES ) { $translation = $others; $translation_preg = $others_preg; } // Remove zero padding on numeric entities $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); // Replace characters according to translation table return strtr( $string, $translation ); } |
関数 wp_check_invalid_utf8()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | function wp_check_invalid_utf8( $string, $strip = false ) { $string = (string) $string; if ( 0 === strlen( $string ) ) { return ''; } // Store the site charset as a static to avoid multiple calls to get_option() static $is_utf8; if ( !isset( $is_utf8 ) ) { $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); } if ( !$is_utf8 ) { return $string; } // Check for support for utf8 in the installed PCRE library once and store the result in a static static $utf8_pcre; if ( !isset( $utf8_pcre ) ) { $utf8_pcre = @preg_match( '/^./u', 'a' ); } // We can't demand utf8 in the PCRE installation, so just return the string in those cases if ( !$utf8_pcre ) { return $string; } // preg_match fails when it encounters invalid UTF8 in $string if ( 1 === @preg_match( '/^./us', $string ) ) { return $string; } // Attempt to strip the bad chars if requested (not recommended) if ( $strip && function_exists( 'iconv' ) ) { return iconv( 'utf-8', 'utf-8', $string ); } return ''; } |
関数 utf8_uri_encode()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | function utf8_uri_encode( $utf8_string, $length = 0 ) { $unicode = ''; $values = array(); $num_octets = 1; $unicode_length = 0; mbstring_binary_safe_encoding(); $string_length = strlen( $utf8_string ); reset_mbstring_encoding(); for ($i = 0; $i < $string_length; $i++ ) { $value = ord( $utf8_string[ $i ] ); if ( $value < 128 ) { if ( $length && ( $unicode_length >= $length ) ) break; $unicode .= chr($value); $unicode_length++; } else { if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3; $values[] = $value; if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length ) break; if ( count( $values ) == $num_octets ) { if ($num_octets == 3) { $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]); $unicode_length += 9; } else { $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]); $unicode_length += 6; } $values = array(); $num_octets = 1; } } } return $unicode; } |