[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/Utf8/ -> PhpString.php (source)

   1  <?php
   2  
   3  namespace dokuwiki\Utf8;
   4  
   5  /**
   6   * UTF-8 aware equivalents to PHP's string functions
   7   */
   8  class PhpString
   9  {
  10  
  11      /**
  12       * A locale independent basename() implementation
  13       *
  14       * works around a bug in PHP's basename() implementation
  15       *
  16       * @param string $path A path
  17       * @param string $suffix If the name component ends in suffix this will also be cut off
  18       * @return string
  19       * @link   https://bugs.php.net/bug.php?id=37738
  20       *
  21       * @see basename()
  22       */
  23      public static function basename($path, $suffix = '')
  24      {
  25          $path = trim($path, '\\/');
  26          $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
  27          if ($rpos) {
  28              $path = substr($path, $rpos + 1);
  29          }
  30  
  31          $suflen = strlen($suffix);
  32          if ($suflen && (substr($path, -$suflen) === $suffix)) {
  33              $path = substr($path, 0, -$suflen);
  34          }
  35  
  36          return $path;
  37      }
  38  
  39      /**
  40       * Unicode aware replacement for strlen()
  41       *
  42       * utf8_decode() converts characters that are not in ISO-8859-1
  43       * to '?', which, for the purpose of counting, is alright
  44       *
  45       * @param string $string
  46       * @return int
  47       * @see    utf8_decode()
  48       *
  49       * @author <chernyshevsky at hotmail dot com>
  50       * @see    strlen()
  51       */
  52      public static function strlen($string)
  53      {
  54          if (UTF8_MBSTRING) {
  55              return mb_strlen($string, 'UTF-8');
  56          }
  57  
  58          if (function_exists('iconv_strlen')) {
  59              return iconv_strlen($string, 'UTF-8');
  60          }
  61  
  62          // utf8_decode is deprecated
  63          if (function_exists('utf8_decode')) {
  64              return strlen(utf8_decode($string));
  65          }
  66  
  67          return strlen($string);
  68      }
  69  
  70      /**
  71       * UTF-8 aware alternative to substr
  72       *
  73       * Return part of a string given character offset (and optionally length)
  74       *
  75       * @param string $str
  76       * @param int $offset number of UTF-8 characters offset (from left)
  77       * @param int $length (optional) length in UTF-8 characters from offset
  78       * @return string
  79       * @author Harry Fuecks <hfuecks@gmail.com>
  80       * @author Chris Smith <chris@jalakai.co.uk>
  81       *
  82       */
  83      public static function substr($str, $offset, $length = null)
  84      {
  85          if (UTF8_MBSTRING) {
  86              if ($length === null) {
  87                  return mb_substr($str, $offset);
  88              }
  89  
  90              return mb_substr($str, $offset, $length);
  91          }
  92  
  93          /*
  94           * Notes:
  95           *
  96           * no mb string support, so we'll use pcre regex's with 'u' flag
  97           * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
  98           * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
  99           *
 100           * substr documentation states false can be returned in some cases (e.g. offset > string length)
 101           * mb_substr never returns false, it will return an empty string instead.
 102           *
 103           * calculating the number of characters in the string is a relatively expensive operation, so
 104           * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
 105           */
 106  
 107          // cast parameters to appropriate types to avoid multiple notices/warnings
 108          $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
 109          $offset = (int)$offset;
 110          if ($length !== null) $length = (int)$length;
 111  
 112          // handle trivial cases
 113          if ($length === 0) return '';
 114          if ($offset < 0 && $length < 0 && $length < $offset) return '';
 115  
 116          $offset_pattern = '';
 117          $length_pattern = '';
 118  
 119          // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
 120          if ($offset < 0) {
 121              $strlen = self::strlen($str);        // see notes
 122              $offset = $strlen + $offset;
 123              if ($offset < 0) $offset = 0;
 124          }
 125  
 126          // establish a pattern for offset, a non-captured group equal in length to offset
 127          if ($offset > 0) {
 128              $Ox = (int)($offset / 65535);
 129              $Oy = $offset % 65535;
 130  
 131              if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
 132              $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
 133          } else {
 134              $offset_pattern = '^';                      // offset == 0; just anchor the pattern
 135          }
 136  
 137          // establish a pattern for length
 138          if ($length === null) {
 139              $length_pattern = '(.*)$';                  // the rest of the string
 140          } else {
 141  
 142              if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
 143              if ($offset > $strlen) return '';           // another trivial case
 144  
 145              if ($length > 0) {
 146  
 147                  // reduce any length that would go past the end of the string
 148                  $length = min($strlen - $offset, $length);
 149  
 150                  $Lx = (int)($length / 65535);
 151                  $Ly = $length % 65535;
 152  
 153                  // +ve length requires ... a captured group of length characters
 154                  if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
 155                  $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
 156  
 157              } else if ($length < 0) {
 158  
 159                  if ($length < ($offset - $strlen)) return '';
 160  
 161                  $Lx = (int)((-$length) / 65535);
 162                  $Ly = (-$length) % 65535;
 163  
 164                  // -ve length requires ... capture everything except a group of -length characters
 165                  //                         anchored at the tail-end of the string
 166                  if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
 167                  $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
 168              }
 169          }
 170  
 171          if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
 172          return $match[1];
 173      }
 174  
 175      // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
 176      /**
 177       * Unicode aware replacement for substr_replace()
 178       *
 179       * @param string $string input string
 180       * @param string $replacement the replacement
 181       * @param int $start the replacing will begin at the start'th offset into string.
 182       * @param int $length If given and is positive, it represents the length of the portion of string which is
 183       *                            to be replaced. If length is zero then this function will have the effect of inserting
 184       *                            replacement into string at the given start offset.
 185       * @return string
 186       * @see    substr_replace()
 187       *
 188       * @author Andreas Gohr <andi@splitbrain.org>
 189       */
 190      public static function substr_replace($string, $replacement, $start, $length = 0)
 191      {
 192          $ret = '';
 193          if ($start > 0) $ret .= self::substr($string, 0, $start);
 194          $ret .= $replacement;
 195          $ret .= self::substr($string, $start + $length);
 196          return $ret;
 197      }
 198      // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
 199  
 200      /**
 201       * Unicode aware replacement for ltrim()
 202       *
 203       * @param string $str
 204       * @param string $charlist
 205       * @return string
 206       * @see    ltrim()
 207       *
 208       * @author Andreas Gohr <andi@splitbrain.org>
 209       */
 210      public static function ltrim($str, $charlist = '')
 211      {
 212          if ($charlist === '') return ltrim($str);
 213  
 214          //quote charlist for use in a characterclass
 215          $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist);
 216  
 217          return preg_replace('/^[' . $charlist . ']+/u', '', $str);
 218      }
 219  
 220      /**
 221       * Unicode aware replacement for rtrim()
 222       *
 223       * @param string $str
 224       * @param string $charlist
 225       * @return string
 226       * @see    rtrim()
 227       *
 228       * @author Andreas Gohr <andi@splitbrain.org>
 229       */
 230      public static function rtrim($str, $charlist = '')
 231      {
 232          if ($charlist === '') return rtrim($str);
 233  
 234          //quote charlist for use in a characterclass
 235          $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist);
 236  
 237          return preg_replace('/[' . $charlist . ']+$/u', '', $str);
 238      }
 239  
 240      /**
 241       * Unicode aware replacement for trim()
 242       *
 243       * @param string $str
 244       * @param string $charlist
 245       * @return string
 246       * @see    trim()
 247       *
 248       * @author Andreas Gohr <andi@splitbrain.org>
 249       */
 250      public static function trim($str, $charlist = '')
 251      {
 252          if ($charlist === '') return trim($str);
 253  
 254          return self::ltrim(self::rtrim($str, $charlist), $charlist);
 255      }
 256  
 257      /**
 258       * This is a unicode aware replacement for strtolower()
 259       *
 260       * Uses mb_string extension if available
 261       *
 262       * @param string $string
 263       * @return string
 264       * @see    \dokuwiki\Utf8\PhpString::strtoupper()
 265       *
 266       * @author Leo Feyer <leo@typolight.org>
 267       * @see    strtolower()
 268       */
 269      public static function strtolower($string)
 270      {
 271          if($string === null) return ''; // pre-8.1 behaviour
 272          if (UTF8_MBSTRING) {
 273              if (class_exists('Normalizer', $autoload = false)) {
 274                  return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
 275              }
 276              return (mb_strtolower($string, 'utf-8'));
 277          }
 278          return strtr($string, Table::upperCaseToLowerCase());
 279      }
 280  
 281      /**
 282       * This is a unicode aware replacement for strtoupper()
 283       *
 284       * Uses mb_string extension if available
 285       *
 286       * @param string $string
 287       * @return string
 288       * @see    \dokuwiki\Utf8\PhpString::strtoupper()
 289       *
 290       * @author Leo Feyer <leo@typolight.org>
 291       * @see    strtoupper()
 292       */
 293      public static function strtoupper($string)
 294      {
 295          if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
 296  
 297          return strtr($string, Table::lowerCaseToUpperCase());
 298      }
 299  
 300  
 301      /**
 302       * UTF-8 aware alternative to ucfirst
 303       * Make a string's first character uppercase
 304       *
 305       * @param string $str
 306       * @return string with first character as upper case (if applicable)
 307       * @author Harry Fuecks
 308       *
 309       */
 310      public static function ucfirst($str)
 311      {
 312          switch (self::strlen($str)) {
 313              case 0:
 314                  return '';
 315              case 1:
 316                  return self::strtoupper($str);
 317              default:
 318                  preg_match('/^(.{1})(.*)$/us', $str, $matches);
 319                  return self::strtoupper($matches[1]) . $matches[2];
 320          }
 321      }
 322  
 323      /**
 324       * UTF-8 aware alternative to ucwords
 325       * Uppercase the first character of each word in a string
 326       *
 327       * @param string $str
 328       * @return string with first char of each word uppercase
 329       * @author Harry Fuecks
 330       * @see http://php.net/ucwords
 331       *
 332       */
 333      public static function ucwords($str)
 334      {
 335          // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
 336          // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
 337          // This corresponds to the definition of a "word" defined at http://php.net/ucwords
 338          $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
 339  
 340          return preg_replace_callback(
 341              $pattern,
 342              function ($matches) {
 343                  $leadingws = $matches[2];
 344                  $ucfirst = self::strtoupper($matches[3]);
 345                  $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
 346                  return $leadingws . $ucword;
 347              },
 348              $str
 349          );
 350      }
 351  
 352      /**
 353       * This is an Unicode aware replacement for strpos
 354       *
 355       * @param string $haystack
 356       * @param string $needle
 357       * @param integer $offset
 358       * @return integer
 359       * @author Leo Feyer <leo@typolight.org>
 360       * @see    strpos()
 361       *
 362       */
 363      public static function strpos($haystack, $needle, $offset = 0)
 364      {
 365          $comp = 0;
 366          $length = null;
 367  
 368          while ($length === null || $length < $offset) {
 369              $pos = strpos($haystack, $needle, $offset + $comp);
 370  
 371              if ($pos === false)
 372                  return false;
 373  
 374              $length = self::strlen(substr($haystack, 0, $pos));
 375  
 376              if ($length < $offset)
 377                  $comp = $pos - $length;
 378          }
 379  
 380          return $length;
 381      }
 382  
 383  
 384  }