[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/Utf8/ -> PhpString.php (source)

   1  <?php
   2  
   3  namespace dokuwiki\Utf8;
   4  
   5  /**
   6   * UTF-8 aware equivalents to PHP's string functions
   7   */
   8  class PhpString
   9  {
  10      /**
  11       * A locale independent basename() implementation
  12       *
  13       * works around a bug in PHP's basename() implementation
  14       *
  15       * @param string $path A path
  16       * @param string $suffix If the name component ends in suffix this will also be cut off
  17       * @return string
  18       * @link   https://bugs.php.net/bug.php?id=37738
  19       *
  20       * @see basename()
  21       */
  22      public static function basename($path, $suffix = '')
  23      {
  24          $path = trim($path, '\\/');
  25          $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
  26          if ($rpos) {
  27              $path = substr($path, $rpos + 1);
  28          }
  29  
  30          $suflen = strlen($suffix);
  31          if ($suflen && str_ends_with($path, $suffix)) {
  32              $path = substr($path, 0, -$suflen);
  33          }
  34  
  35          return $path;
  36      }
  37  
  38      /**
  39       * Unicode aware replacement for strlen()
  40       *
  41       * utf8_decode() converts characters that are not in ISO-8859-1
  42       * to '?', which, for the purpose of counting, is alright
  43       *
  44       * @param string $string
  45       * @return int
  46       * @see    utf8_decode()
  47       *
  48       * @author <chernyshevsky at hotmail dot com>
  49       * @see    strlen()
  50       */
  51      public static function strlen($string)
  52      {
  53          if (UTF8_MBSTRING) {
  54              return mb_strlen($string, 'UTF-8');
  55          }
  56  
  57          if (function_exists('iconv_strlen')) {
  58              return iconv_strlen($string, 'UTF-8');
  59          }
  60  
  61          // utf8_decode is deprecated
  62          if (function_exists('utf8_decode')) {
  63              return strlen(utf8_decode($string));
  64          }
  65  
  66          return strlen($string);
  67      }
  68  
  69      /**
  70       * UTF-8 aware alternative to substr
  71       *
  72       * Return part of a string given character offset (and optionally length)
  73       *
  74       * @param string $str
  75       * @param int $offset number of UTF-8 characters offset (from left)
  76       * @param int $length (optional) length in UTF-8 characters from offset
  77       * @return string
  78       * @author Harry Fuecks <hfuecks@gmail.com>
  79       * @author Chris Smith <chris@jalakai.co.uk>
  80       *
  81       */
  82      public static function substr($str, $offset, $length = null)
  83      {
  84          if (UTF8_MBSTRING) {
  85              if ($length === null) {
  86                  return mb_substr($str, $offset);
  87              }
  88  
  89              return mb_substr($str, $offset, $length);
  90          }
  91  
  92          /*
  93           * Notes:
  94           *
  95           * no mb string support, so we'll use pcre regex's with 'u' flag
  96           * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
  97           * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
  98           *
  99           * substr documentation states false can be returned in some cases (e.g. offset > string length)
 100           * mb_substr never returns false, it will return an empty string instead.
 101           *
 102           * calculating the number of characters in the string is a relatively expensive operation, so
 103           * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
 104           */
 105  
 106          // cast parameters to appropriate types to avoid multiple notices/warnings
 107          $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
 108          $offset = (int)$offset;
 109          if ($length !== null) $length = (int)$length;
 110  
 111          // handle trivial cases
 112          if ($length === 0) return '';
 113          if ($offset < 0 && $length < 0 && $length < $offset) return '';
 114  
 115          $offset_pattern = '';
 116          $length_pattern = '';
 117  
 118          // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
 119          if ($offset < 0) {
 120              $strlen = self::strlen($str);        // see notes
 121              $offset = $strlen + $offset;
 122              if ($offset < 0) $offset = 0;
 123          }
 124  
 125          // establish a pattern for offset, a non-captured group equal in length to offset
 126          if ($offset > 0) {
 127              $Ox = (int)($offset / 65535);
 128              $Oy = $offset % 65535;
 129  
 130              if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
 131              $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
 132          } else {
 133              $offset_pattern = '^';                      // offset == 0; just anchor the pattern
 134          }
 135  
 136          // establish a pattern for length
 137          if ($length === null) {
 138              $length_pattern = '(.*)$';                  // the rest of the string
 139          } else {
 140              if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
 141              if ($offset > $strlen) return '';           // another trivial case
 142  
 143              if ($length > 0) {
 144                  // reduce any length that would go past the end of the string
 145                  $length = min($strlen - $offset, $length);
 146                  $Lx = (int)($length / 65535);
 147                  $Ly = $length % 65535;
 148                  // +ve length requires ... a captured group of length characters
 149                  if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
 150                  $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
 151              } elseif ($length < 0) {
 152                  if ($length < ($offset - $strlen)) return '';
 153                  $Lx = (int)((-$length) / 65535);
 154                  $Ly = (-$length) % 65535;
 155                  // -ve length requires ... capture everything except a group of -length characters
 156                  //                         anchored at the tail-end of the string
 157                  if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
 158                  $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
 159              }
 160          }
 161  
 162          if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
 163          return $match[1];
 164      }
 165  
 166      // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
 167      /**
 168       * Unicode aware replacement for substr_replace()
 169       *
 170       * @param string $string input string
 171       * @param string $replacement the replacement
 172       * @param int $start the replacing will begin at the start'th offset into string.
 173       * @param int $length If given and is positive, it represents the length of the portion of string which is
 174       *                            to be replaced. If length is zero then this function will have the effect of inserting
 175       *                            replacement into string at the given start offset.
 176       * @return string
 177       * @see    substr_replace()
 178       *
 179       * @author Andreas Gohr <andi@splitbrain.org>
 180       */
 181      public static function substr_replace($string, $replacement, $start, $length = 0)
 182      {
 183          $ret = '';
 184          if ($start > 0) $ret .= self::substr($string, 0, $start);
 185          $ret .= $replacement;
 186          $ret .= self::substr($string, $start + $length);
 187          return $ret;
 188      }
 189      // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
 190  
 191      /**
 192       * Unicode aware replacement for ltrim()
 193       *
 194       * @param string $str
 195       * @param string $charlist
 196       * @return string
 197       * @see    ltrim()
 198       *
 199       * @author Andreas Gohr <andi@splitbrain.org>
 200       */
 201      public static function ltrim($str, $charlist = '')
 202      {
 203          if ($charlist === '') return ltrim($str);
 204  
 205          //quote charlist for use in a characterclass
 206          $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist);
 207  
 208          return preg_replace('/^[' . $charlist . ']+/u', '', $str);
 209      }
 210  
 211      /**
 212       * Unicode aware replacement for rtrim()
 213       *
 214       * @param string $str
 215       * @param string $charlist
 216       * @return string
 217       * @see    rtrim()
 218       *
 219       * @author Andreas Gohr <andi@splitbrain.org>
 220       */
 221      public static function rtrim($str, $charlist = '')
 222      {
 223          if ($charlist === '') return rtrim($str);
 224  
 225          //quote charlist for use in a characterclass
 226          $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist);
 227  
 228          return preg_replace('/[' . $charlist . ']+$/u', '', $str);
 229      }
 230  
 231      /**
 232       * Unicode aware replacement for trim()
 233       *
 234       * @param string $str
 235       * @param string $charlist
 236       * @return string
 237       * @see    trim()
 238       *
 239       * @author Andreas Gohr <andi@splitbrain.org>
 240       */
 241      public static function trim($str, $charlist = '')
 242      {
 243          if ($charlist === '') return trim($str);
 244  
 245          return self::ltrim(self::rtrim($str, $charlist), $charlist);
 246      }
 247  
 248      /**
 249       * This is a unicode aware replacement for strtolower()
 250       *
 251       * Uses mb_string extension if available
 252       *
 253       * @param string $string
 254       * @return string
 255       * @see    \dokuwiki\Utf8\PhpString::strtoupper()
 256       *
 257       * @author Leo Feyer <leo@typolight.org>
 258       * @see    strtolower()
 259       */
 260      public static function strtolower($string)
 261      {
 262          if ($string === null) return ''; // pre-8.1 behaviour
 263          if (UTF8_MBSTRING) {
 264              if (class_exists('Normalizer', $autoload = false)) {
 265                  return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
 266              }
 267              return (mb_strtolower($string, 'utf-8'));
 268          }
 269          return strtr($string, Table::upperCaseToLowerCase());
 270      }
 271  
 272      /**
 273       * This is a unicode aware replacement for strtoupper()
 274       *
 275       * Uses mb_string extension if available
 276       *
 277       * @param string $string
 278       * @return string
 279       * @see    \dokuwiki\Utf8\PhpString::strtoupper()
 280       *
 281       * @author Leo Feyer <leo@typolight.org>
 282       * @see    strtoupper()
 283       */
 284      public static function strtoupper($string)
 285      {
 286          if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
 287  
 288          return strtr($string, Table::lowerCaseToUpperCase());
 289      }
 290  
 291  
 292      /**
 293       * UTF-8 aware alternative to ucfirst
 294       * Make a string's first character uppercase
 295       *
 296       * @param string $str
 297       * @return string with first character as upper case (if applicable)
 298       * @author Harry Fuecks
 299       *
 300       */
 301      public static function ucfirst($str)
 302      {
 303          switch (self::strlen($str)) {
 304              case 0:
 305                  return '';
 306              case 1:
 307                  return self::strtoupper($str);
 308              default:
 309                  preg_match('/^(.{1})(.*)$/us', $str, $matches);
 310                  return self::strtoupper($matches[1]) . $matches[2];
 311          }
 312      }
 313  
 314      /**
 315       * UTF-8 aware alternative to ucwords
 316       * Uppercase the first character of each word in a string
 317       *
 318       * @param string $str
 319       * @return string with first char of each word uppercase
 320       * @author Harry Fuecks
 321       * @see http://php.net/ucwords
 322       *
 323       */
 324      public static function ucwords($str)
 325      {
 326          // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
 327          // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
 328          // This corresponds to the definition of a "word" defined at http://php.net/ucwords
 329          $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
 330  
 331          return preg_replace_callback(
 332              $pattern,
 333              function ($matches) {
 334                  $leadingws = $matches[2];
 335                  $ucfirst = self::strtoupper($matches[3]);
 336                  $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
 337                  return $leadingws . $ucword;
 338              },
 339              $str
 340          );
 341      }
 342  
 343      /**
 344       * This is an Unicode aware replacement for strpos
 345       *
 346       * @param string $haystack
 347       * @param string $needle
 348       * @param integer $offset
 349       * @return integer
 350       * @author Leo Feyer <leo@typolight.org>
 351       * @see    strpos()
 352       *
 353       */
 354      public static function strpos($haystack, $needle, $offset = 0)
 355      {
 356          $comp = 0;
 357          $length = null;
 358  
 359          while ($length === null || $length < $offset) {
 360              $pos = strpos($haystack, $needle, $offset + $comp);
 361  
 362              if ($pos === false)
 363                  return false;
 364  
 365              $length = self::strlen(substr($haystack, 0, $pos));
 366  
 367              if ($length < $offset)
 368                  $comp = $pos - $length;
 369          }
 370  
 371          return $length;
 372      }
 373  }