[ Index ] |
PHP Cross Reference of DokuWiki |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 4 * For an intro to the Lexer see: 5 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes 6 * 7 * @author Marcus Baker http://www.lastcraft.com 8 */ 9 10 namespace dokuwiki\Parsing\Lexer; 11 12 /** 13 * Compounded regular expression. 14 * 15 * Any of the contained patterns could match and when one does it's label is returned. 16 */ 17 class ParallelRegex 18 { 19 /** @var string[] patterns to match */ 20 protected $patterns; 21 /** @var string[] labels for above patterns */ 22 protected $labels; 23 /** @var string the compound regex matching all patterns */ 24 protected $regex; 25 /** @var bool case sensitive matching? */ 26 protected $case; 27 28 /** 29 * Constructor. Starts with no patterns. 30 * 31 * @param boolean $case True for case sensitive, false 32 * for insensitive. 33 */ 34 public function __construct($case) 35 { 36 $this->case = $case; 37 $this->patterns = array(); 38 $this->labels = array(); 39 $this->regex = null; 40 } 41 42 /** 43 * Adds a pattern with an optional label. 44 * 45 * @param mixed $pattern Perl style regex. Must be UTF-8 46 * encoded. If its a string, the (, ) 47 * lose their meaning unless they 48 * form part of a lookahead or 49 * lookbehind assertation. 50 * @param bool|string $label Label of regex to be returned 51 * on a match. Label must be ASCII 52 */ 53 public function addPattern($pattern, $label = true) 54 { 55 $count = count($this->patterns); 56 $this->patterns[$count] = $pattern; 57 $this->labels[$count] = $label; 58 $this->regex = null; 59 } 60 61 /** 62 * Attempts to match all patterns at once against a string. 63 * 64 * @param string $subject String to match against. 65 * @param string $match First matched portion of 66 * subject. 67 * @return bool|string False if no match found, label if label exists, true if not 68 */ 69 public function apply($subject, &$match) 70 { 71 if (count($this->patterns) == 0) { 72 return false; 73 } 74 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 75 $match = ""; 76 return false; 77 } 78 79 $match = $matches[0]; 80 $size = count($matches); 81 // FIXME this could be made faster by storing the labels as keys in a hashmap 82 for ($i = 1; $i < $size; $i++) { 83 if ($matches[$i] && isset($this->labels[$i - 1])) { 84 return $this->labels[$i - 1]; 85 } 86 } 87 return true; 88 } 89 90 /** 91 * Attempts to split the string against all patterns at once 92 * 93 * @param string $subject String to match against. 94 * @param array $split The split result: array containing, pre-match, match & post-match strings 95 * @return boolean True on success. 96 * 97 * @author Christopher Smith <chris@jalakai.co.uk> 98 */ 99 public function split($subject, &$split) 100 { 101 if (count($this->patterns) == 0) { 102 return false; 103 } 104 105 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 106 if (function_exists('preg_last_error')) { 107 $err = preg_last_error(); 108 switch ($err) { 109 case PREG_BACKTRACK_LIMIT_ERROR: 110 msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); 111 break; 112 case PREG_RECURSION_LIMIT_ERROR: 113 msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); 114 break; 115 case PREG_BAD_UTF8_ERROR: 116 msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); 117 break; 118 case PREG_INTERNAL_ERROR: 119 msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); 120 break; 121 } 122 } 123 124 $split = array($subject, "", ""); 125 return false; 126 } 127 128 $idx = count($matches)-2; 129 list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2); 130 $split = array($pre, $matches[0], $post); 131 132 return isset($this->labels[$idx]) ? $this->labels[$idx] : true; 133 } 134 135 /** 136 * Compounds the patterns into a single 137 * regular expression separated with the 138 * "or" operator. Caches the regex. 139 * Will automatically escape (, ) and / tokens. 140 * 141 * @return null|string 142 */ 143 protected function getCompoundedRegex() 144 { 145 if ($this->regex == null) { 146 $cnt = count($this->patterns); 147 for ($i = 0; $i < $cnt; $i++) { 148 /* 149 * decompose the input pattern into "(", "(?", ")", 150 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... 151 * elements. 152 */ 153 preg_match_all('/\\\\.|' . 154 '\(\?|' . 155 '[()]|' . 156 '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . 157 '[^[()\\\\]+/', $this->patterns[$i], $elts); 158 159 $pattern = ""; 160 $level = 0; 161 162 foreach ($elts[0] as $elt) { 163 /* 164 * for "(", ")" remember the nesting level, add "\" 165 * only to the non-"(?" ones. 166 */ 167 168 switch ($elt) { 169 case '(': 170 $pattern .= '\('; 171 break; 172 case ')': 173 if ($level > 0) 174 $level--; /* closing (? */ 175 else $pattern .= '\\'; 176 $pattern .= ')'; 177 break; 178 case '(?': 179 $level++; 180 $pattern .= '(?'; 181 break; 182 default: 183 if (substr($elt, 0, 1) == '\\') 184 $pattern .= $elt; 185 else $pattern .= str_replace('/', '\/', $elt); 186 } 187 } 188 $this->patterns[$i] = "($pattern)"; 189 } 190 $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); 191 } 192 return $this->regex; 193 } 194 195 /** 196 * Accessor for perl regex mode flags to use. 197 * @return string Perl regex flags. 198 */ 199 protected function getPerlMatchingFlags() 200 { 201 return ($this->case ? "msS" : "msSi"); 202 } 203 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body