case = $case; } /** * Adds a pattern with an optional label. * * @param mixed $pattern Perl style regex. Must be UTF-8 * encoded. If its a string, the (, ) * lose their meaning unless they * form part of a lookahead or * lookbehind assertation. * @param bool|string $label Label of regex to be returned * on a match. Label must be ASCII */ public function addPattern($pattern, $label = true) { $count = count($this->patterns); $this->patterns[$count] = $pattern; $this->labels[$count] = $label; $this->regex = null; } /** * Attempts to match all patterns at once against a string. * * @param string $subject String to match against. * @param string $match First matched portion of * subject. * @return bool|string False if no match found, label if label exists, true if not */ public function apply($subject, &$match) { if (count($this->patterns) == 0) { return false; } if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { $match = ""; return false; } $match = $matches[0]; $size = count($matches); // FIXME this could be made faster by storing the labels as keys in a hashmap for ($i = 1; $i < $size; $i++) { if ($matches[$i] && isset($this->labels[$i - 1])) { return $this->labels[$i - 1]; } } return true; } /** * Attempts to split the string against all patterns at once * * @param string $subject String to match against. * @param array $split The split result: array containing, pre-match, match & post-match strings * @return boolean True on success. * * @author Christopher Smith */ public function split($subject, &$split) { if (count($this->patterns) == 0) { return false; } if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { if (function_exists('preg_last_error')) { $err = preg_last_error(); switch ($err) { case PREG_BACKTRACK_LIMIT_ERROR: msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); break; case PREG_RECURSION_LIMIT_ERROR: msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); break; case PREG_BAD_UTF8_ERROR: msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); break; case PREG_INTERNAL_ERROR: msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); break; } } $split = [$subject, "", ""]; return false; } $idx = count($matches) - 2; [$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2); $split = [$pre, $matches[0], $post]; return $this->labels[$idx] ?? true; } /** * Compounds the patterns into a single * regular expression separated with the * "or" operator. Caches the regex. * Will automatically escape (, ) and / tokens. * * @return null|string */ protected function getCompoundedRegex() { if ($this->regex == null) { $cnt = count($this->patterns); for ($i = 0; $i < $cnt; $i++) { /* * decompose the input pattern into "(", "(?", ")", * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... * elements. */ preg_match_all('/\\\\.|' . '\(\?|' . '[()]|' . '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . '[^[()\\\\]+/', $this->patterns[$i], $elts); $pattern = ""; $level = 0; foreach ($elts[0] as $elt) { /* * for "(", ")" remember the nesting level, add "\" * only to the non-"(?" ones. */ switch ($elt) { case '(': $pattern .= '\('; break; case ')': if ($level > 0) $level--; /* closing (? */ else $pattern .= '\\'; $pattern .= ')'; break; case '(?': $level++; $pattern .= '(?'; break; default: if (str_starts_with($elt, '\\')) $pattern .= $elt; else $pattern .= str_replace('/', '\/', $elt); } } $this->patterns[$i] = "($pattern)"; } $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); } return $this->regex; } /** * Accessor for perl regex mode flags to use. * @return string Perl regex flags. */ protected function getPerlMatchingFlags() { return ($this->case ? "msS" : "msSi"); } }