WordPressを読む 83 /blog/wp-content/plugins/crayon-syntax-highlighter/crayon_parser.class.php
/blog/wp-content/plugins/crayon-syntax-highlighter/crayon_parser.class.php
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | <?php require_once ('global.php'); require_once (CRAYON_LANGS_PHP); /* Manages parsing the syntax for any given language, constructing the regex, and validating the elements. */ class CrayonParser { // Properties and Constants =============================================== const CASE_INSENSITIVE = 'CASE_INSENSITIVE'; const MULTI_LINE = 'MULTI_LINE'; const SINGLE_LINE = 'SINGLE_LINE'; const ALLOW_MIXED = 'ALLOW_MIXED'; //const NO_END_TAG = '(?![^<]*>)'; // No longer used const HTML_CHAR = 'HTML_CHAR'; const HTML_CHAR_REGEX = '<|>|(&([\w-]+);?)|[ \t]+'; const CRAYON_ELEMENT = 'CRAYON_ELEMENT'; const CRAYON_ELEMENT_REGEX = '\{\{crayon-internal:[^\}]*\}\}'; const CRAYON_ELEMENT_REGEX_CAPTURE = '\{\{crayon-internal:([^\}]*)\}\}'; private static $modes = array(self::CASE_INSENSITIVE => TRUE, self::MULTI_LINE => TRUE, self::SINGLE_LINE => TRUE, self::ALLOW_MIXED => TRUE); // Methods ================================================================ private function __construct() {} /** * Parse all languages stored in CrayonLangs. * Avoid using this unless you must list the details in language files for all languages. * @return array Array of all loaded CrayonLangs. */ public static function parse_all() { $langs = CrayonResources::langs()->get(); if (empty($langs)) { return FALSE; } foreach ($langs as $lang) { self::parse($lang->id()); } return $langs; } /* Read a syntax file and parse the regex rules within it, this may require several other files containing lists of keywords and such to be read. Updates the parsed elements and regex in the CrayonLang with the given $id. */ public static function parse($id) { // Verify the language is loaded and has not been parsed before if ( !($lang = CrayonResources::langs()->get($id)) ) { CrayonLog::syslog("The language with id '$id' was not loaded and could not be parsed."); return FALSE; } else if ($lang->is_parsed()) { return; } // Read language file $path = CrayonResources::langs()->path($id); CrayonLog::debug('Parsing language ' . $path); if ( ($file = CrayonUtil::lines($path, 'wcs')) === FALSE ) { CrayonLog::debug('Parsing failed ' . $path); return FALSE; } // Extract the language name $name_pattern = '#^[ \t]*name[ \t]+([^\r\n]+)[ \t]*#mi'; preg_match($name_pattern, $file, $name); if (count($name) > 1) { $name = $name[1]; $lang->name($name); $file = preg_replace($name_pattern, '', $file); } else { $name = $lang->id(); } // Extract the language version $version_pattern = '#^[ \t]*version[ \t]+([^\r\n]+)[ \t]*#mi'; preg_match($version_pattern, $file, $version); if (count($version) > 1) { $version = $version[1]; $lang->version($version); $file = preg_replace($version_pattern, '', $file); } // Extract the modes $mode_pattern = '#^[ \t]*(' . implode('|', array_keys(self::$modes)) . ')[ \t]+(?:=[ \t]*)?([^\r\n]+)[ \t]*#mi'; preg_match_all($mode_pattern, $file, $mode_matches); if (count($mode_matches) == 3) { for ($i = 0; $i < count($mode_matches[0]); $i++) { $lang->mode($mode_matches[1][$i], $mode_matches[2][$i]); } $file = preg_replace($mode_pattern, '', $file); } /* Add reserved Crayon element. This is used by Crayon internally. */ $crayon_element = new CrayonElement(self::CRAYON_ELEMENT, $path, self::CRAYON_ELEMENT_REGEX); $lang->element(self::CRAYON_ELEMENT, $crayon_element); // Extract elements, classes and regex $pattern = '#^[ \t]*([\w:]+)[ \t]+(?:\[([\w\t ]*)\][ \t]+)?([^\r\n]+)[ \t]*#m'; preg_match_all($pattern, $file, $matches); if (!empty($matches[0])) { $elements = $matches[1]; $classes = $matches[2]; $regexes = $matches[3]; } else { CrayonLog::syslog("No regex patterns and/or elements were parsed from language file at '$path'."); } // Remember state in case we encounter catchable exceptions $error = FALSE; for ($i = 0; $i < count($matches[0]); $i++) { // References $name = &$elements[$i]; $class = &$classes[$i]; $regex = &$regexes[$i]; $name = trim(strtoupper($name)); // Ensure both the element and regex are valid if (empty($name) || empty($regex)) { CrayonLog::syslog("Element(s) and/or regex(es) are missing in '$path'."); $error = TRUE; continue; } // Look for fallback element $pieces = explode(':', $name); if (count($pieces) == 2) { $name = $pieces[0]; $fallback = $pieces[1]; } else if (count($pieces) == 1) { $name = $pieces[0]; $fallback = ''; } else { CrayonLog::syslog("Too many colons found in element name '$name' in '$path'"); $error = TRUE; continue; } // Create a new CrayonElement $element = new CrayonElement($name, $path); $element->fallback($fallback); if (!empty($class)) { // Avoid setting known css to blank $element->css($class); } if ($element->regex($regex) === FALSE) { $error = TRUE; continue; } // Add the regex to the element $lang->element($name, $element); $state = $error ? CrayonLang::PARSED_ERRORS : CrayonLang::PARSED_SUCCESS; $lang->state($state); } /* Prevents < > and other html entities from being printed as is, which could lead to actual html tags * from the printed code appearing on the page - not good. This can also act to color any HTML entities * that are not picked up by previously defined elements. */ $html = new CrayonElement(self::HTML_CHAR, $path, self::HTML_CHAR_REGEX); $lang->element(self::HTML_CHAR, $html); } // Validates regex and accesses data stored in a CrayonElement public static function validate_regex($regex, $element) { if (is_string($regex) && @get_class($element) == CRAYON_ELEMENT_CLASS) { // If the (?alt) tag has been used, insert the file into the regex $file = self::regex_match('#\(\?alt:(.+?)\)#', $regex); if ( count($file) == 2 ) { // Element 0 has full match, 1 has captured groups for ($i = 0; $i < count($file[1]); $i++) { $file_lines = CrayonUtil::lines(dirname($element->path()) . crayon_s() . $file[1][$i], 'rcwh'); if ($file_lines !== FALSE) { $file_lines = implode('|', $file_lines); // If any spaces exist, treat them as whitespace $file_lines = preg_replace('#[ \t]+#msi', '\s+', $file_lines); $regex = str_replace($file[0][$i], "(?:$file_lines)", $regex); } else { CrayonLog::syslog("Parsing of '{$element->path()}' failed, an (?alt) tag failed for the element '{$element->name()}'" ); return FALSE; } } } // If the (?default:element) function is used, replace the regex with the default, if exists $def = self::regex_match('#\(\?default(?:\:(\w+))?\)#', $regex); if ( count($def) == 2 ) { // Load default language $default = CrayonResources::langs()->get(CrayonLangs::DEFAULT_LANG); // If default has not been loaded, we can't use it, skip the element if (!$default) { CrayonLog::syslog( "Could not use default regex in the element '{$element->name()}' in '{$element->path()}'"); return FALSE; } for ($i = 0; $i < count($def[1]); $i++) { // If an element has been provided $element_name = ( !empty($def[1][$i]) ) ? $def[1][$i] : $element->name(); if (($default_element = $default->element($element_name)) != FALSE) { $regex = str_replace($def[0][$i], '(?:' . $default_element->regex() .')', $regex); } else { CrayonLog::syslog("The language at '{$element->path()}' referred to the Default Language regex for element '{$element->name()}', which did not exist."); if (CRAYON_DEBUG) { CrayonLog::syslog("Default language URL: " . CrayonResources::langs()->url(CrayonLangs::DEFAULT_LANG)); CrayonLog::syslog("Default language Path: " . CrayonResources::langs()->path(CrayonLangs::DEFAULT_LANG)); } return FALSE; } } } // If the (?html) tag is used, escape characters in html (<, > and &) $html = self::regex_match('#\(\?html:(.+?)\)#', $regex); if ( count($html) == 2 ) { for ($i = 0; $i < count($html[1]); $i++) { $regex = str_replace($html[0][$i], htmlentities($html[1][$i]), $regex); } } // Ensure all parenthesis are atomic to avoid conflicting with element matches $regex = CrayonUtil::esc_atomic($regex); // Escape #, this is our delimiter $regex = CrayonUtil::esc_hash($regex); // Test if regex is valid if (@preg_match("#$regex#", '') === FALSE) { CrayonLog::syslog("The regex for the element '{$element->name()}' in '{$element->path()}' is not valid."); return FALSE; } return $regex; } else { return ''; } } public static function validate_css($css) { if (is_string($css)) { // Remove dots in CSS class and convert to lowercase $css = str_replace('.', '', $css); $css = strtolower($css); $css = explode(' ', $css); $css_str = ''; foreach ($css as $c) { if (!empty($c)) { $css_str .= $c . ' '; } } return trim($css_str); } else { return ''; } } public static function regex_match($pattern, $subject) { if (preg_match_all($pattern, $subject, $matches)) { return $matches; } return array(); } public static function modes() { return self::$modes; } public static function is_mode($name) { return is_string($name) && array_key_exists($name, self::$modes); } } ?> |