[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/Parsing/Lexer/ -> ParallelRegex.php (source)

   1  <?php
   2  
   3  /**
   4   * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
   5   * For an intro to the Lexer see:
   6   * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
   7   *
   8   * @author Marcus Baker http://www.lastcraft.com
   9   */
  10  
  11  namespace dokuwiki\Parsing\Lexer;
  12  
  13  /**
  14   * Compounded regular expression.
  15   *
  16   * Any of the contained patterns could match and when one does it's label is returned.
  17   */
  18  class ParallelRegex
  19  {
  20      /** @var string[] patterns to match */
  21      protected $patterns = [];
  22      /** @var string[] labels for above patterns */
  23      protected $labels = [];
  24      /** @var string the compound regex matching all patterns */
  25      protected $regex;
  26      /** @var bool case sensitive matching? */
  27      protected $case;
  28  
  29      /**
  30       * Constructor. Starts with no patterns.
  31       *
  32       * @param boolean $case    True for case sensitive, false
  33       *                         for insensitive.
  34       */
  35      public function __construct($case)
  36      {
  37          $this->case = $case;
  38      }
  39  
  40      /**
  41       * Adds a pattern with an optional label.
  42       *
  43       * @param mixed       $pattern Perl style regex. Must be UTF-8
  44       *                             encoded. If its a string, the (, )
  45       *                             lose their meaning unless they
  46       *                             form part of a lookahead or
  47       *                             lookbehind assertation.
  48       * @param bool|string $label   Label of regex to be returned
  49       *                             on a match. Label must be ASCII
  50       */
  51      public function addPattern($pattern, $label = true)
  52      {
  53          $count = count($this->patterns);
  54          $this->patterns[$count] = $pattern;
  55          $this->labels[$count] = $label;
  56          $this->regex = null;
  57      }
  58  
  59      /**
  60       * Attempts to match all patterns at once against a string.
  61       *
  62       * @param string $subject      String to match against.
  63       * @param string $match        First matched portion of
  64       *                             subject.
  65       * @return bool|string         False if no match found, label if label exists, true if not
  66       */
  67      public function apply($subject, &$match)
  68      {
  69          if (count($this->patterns) == 0) {
  70              return false;
  71          }
  72          if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
  73              $match = "";
  74              return false;
  75          }
  76  
  77          $match = $matches[0];
  78          $size = count($matches);
  79          // FIXME this could be made faster by storing the labels as keys in a hashmap
  80          for ($i = 1; $i < $size; $i++) {
  81              if ($matches[$i] && isset($this->labels[$i - 1])) {
  82                  return $this->labels[$i - 1];
  83              }
  84          }
  85          return true;
  86      }
  87  
  88      /**
  89       * Attempts to split the string against all patterns at once
  90       *
  91       * @param string $subject      String to match against.
  92       * @param array $split         The split result: array containing, pre-match, match & post-match strings
  93       * @return boolean             True on success.
  94       *
  95       * @author Christopher Smith <chris@jalakai.co.uk>
  96       */
  97      public function split($subject, &$split)
  98      {
  99          if (count($this->patterns) == 0) {
 100              return false;
 101          }
 102  
 103          if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 104              if (function_exists('preg_last_error')) {
 105                  $err = preg_last_error();
 106                  switch ($err) {
 107                      case PREG_BACKTRACK_LIMIT_ERROR:
 108                          msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
 109                          break;
 110                      case PREG_RECURSION_LIMIT_ERROR:
 111                          msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
 112                          break;
 113                      case PREG_BAD_UTF8_ERROR:
 114                          msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
 115                          break;
 116                      case PREG_INTERNAL_ERROR:
 117                          msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
 118                          break;
 119                  }
 120              }
 121  
 122              $split = [$subject, "", ""];
 123              return false;
 124          }
 125  
 126          $idx = count($matches) - 2;
 127          [$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2);
 128          $split = [$pre, $matches[0], $post];
 129  
 130          return $this->labels[$idx] ?? true;
 131      }
 132  
 133      /**
 134       * Compounds the patterns into a single
 135       * regular expression separated with the
 136       * "or" operator. Caches the regex.
 137       * Will automatically escape (, ) and / tokens.
 138       *
 139       * @return null|string
 140       */
 141      protected function getCompoundedRegex()
 142      {
 143          if ($this->regex == null) {
 144              $cnt = count($this->patterns);
 145              for ($i = 0; $i < $cnt; $i++) {
 146                  /*
 147                   * decompose the input pattern into "(", "(?", ")",
 148                   * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
 149                   * elements.
 150                   */
 151                  preg_match_all('/\\\\.|' .
 152                                 '\(\?|' .
 153                                 '[()]|' .
 154                                 '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
 155                                 '[^[()\\\\]+/', $this->patterns[$i], $elts);
 156  
 157                  $pattern = "";
 158                  $level = 0;
 159  
 160                  foreach ($elts[0] as $elt) {
 161                      /*
 162                       * for "(", ")" remember the nesting level, add "\"
 163                       * only to the non-"(?" ones.
 164                       */
 165  
 166                      switch ($elt) {
 167                          case '(':
 168                              $pattern .= '\(';
 169                              break;
 170                          case ')':
 171                              if ($level > 0)
 172                                  $level--; /* closing (? */
 173                              else $pattern .= '\\';
 174                              $pattern .= ')';
 175                              break;
 176                          case '(?':
 177                              $level++;
 178                              $pattern .= '(?';
 179                              break;
 180                          default:
 181                              if (str_starts_with($elt, '\\'))
 182                                  $pattern .= $elt;
 183                              else $pattern .= str_replace('/', '\/', $elt);
 184                      }
 185                  }
 186                  $this->patterns[$i] = "($pattern)";
 187              }
 188              $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
 189          }
 190          return $this->regex;
 191      }
 192  
 193      /**
 194       * Accessor for perl regex mode flags to use.
 195       * @return string       Perl regex flags.
 196       */
 197      protected function getPerlMatchingFlags()
 198      {
 199          return ($this->case ? "msS" : "msSi");
 200      }
 201  }