[ Index ] |
PHP Cross Reference of DokuWiki |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace dokuwiki\Utf8; 4 5 /** 6 * UTF-8 aware equivalents to PHP's string functions 7 */ 8 class PhpString 9 { 10 11 /** 12 * A locale independent basename() implementation 13 * 14 * works around a bug in PHP's basename() implementation 15 * 16 * @param string $path A path 17 * @param string $suffix If the name component ends in suffix this will also be cut off 18 * @return string 19 * @link https://bugs.php.net/bug.php?id=37738 20 * 21 * @see basename() 22 */ 23 public static function basename($path, $suffix = '') 24 { 25 $path = trim($path, '\\/'); 26 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 27 if ($rpos) { 28 $path = substr($path, $rpos + 1); 29 } 30 31 $suflen = strlen($suffix); 32 if ($suflen && (substr($path, -$suflen) === $suffix)) { 33 $path = substr($path, 0, -$suflen); 34 } 35 36 return $path; 37 } 38 39 /** 40 * Unicode aware replacement for strlen() 41 * 42 * utf8_decode() converts characters that are not in ISO-8859-1 43 * to '?', which, for the purpose of counting, is alright 44 * 45 * @param string $string 46 * @return int 47 * @see utf8_decode() 48 * 49 * @author <chernyshevsky at hotmail dot com> 50 * @see strlen() 51 */ 52 public static function strlen($string) 53 { 54 if (UTF8_MBSTRING) { 55 return mb_strlen($string, 'UTF-8'); 56 } 57 58 if (function_exists('iconv_strlen')) { 59 return iconv_strlen($string, 'UTF-8'); 60 } 61 62 // utf8_decode is deprecated 63 if (function_exists('utf8_decode')) { 64 return strlen(utf8_decode($string)); 65 } 66 67 return strlen($string); 68 } 69 70 /** 71 * UTF-8 aware alternative to substr 72 * 73 * Return part of a string given character offset (and optionally length) 74 * 75 * @param string $str 76 * @param int $offset number of UTF-8 characters offset (from left) 77 * @param int $length (optional) length in UTF-8 characters from offset 78 * @return string 79 * @author Harry Fuecks <hfuecks@gmail.com> 80 * @author Chris Smith <chris@jalakai.co.uk> 81 * 82 */ 83 public static function substr($str, $offset, $length = null) 84 { 85 if (UTF8_MBSTRING) { 86 if ($length === null) { 87 return mb_substr($str, $offset); 88 } 89 90 return mb_substr($str, $offset, $length); 91 } 92 93 /* 94 * Notes: 95 * 96 * no mb string support, so we'll use pcre regex's with 'u' flag 97 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 98 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 99 * 100 * substr documentation states false can be returned in some cases (e.g. offset > string length) 101 * mb_substr never returns false, it will return an empty string instead. 102 * 103 * calculating the number of characters in the string is a relatively expensive operation, so 104 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 105 */ 106 107 // cast parameters to appropriate types to avoid multiple notices/warnings 108 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 109 $offset = (int)$offset; 110 if ($length !== null) $length = (int)$length; 111 112 // handle trivial cases 113 if ($length === 0) return ''; 114 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 115 116 $offset_pattern = ''; 117 $length_pattern = ''; 118 119 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 120 if ($offset < 0) { 121 $strlen = self::strlen($str); // see notes 122 $offset = $strlen + $offset; 123 if ($offset < 0) $offset = 0; 124 } 125 126 // establish a pattern for offset, a non-captured group equal in length to offset 127 if ($offset > 0) { 128 $Ox = (int)($offset / 65535); 129 $Oy = $offset % 65535; 130 131 if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}'; 132 $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})'; 133 } else { 134 $offset_pattern = '^'; // offset == 0; just anchor the pattern 135 } 136 137 // establish a pattern for length 138 if ($length === null) { 139 $length_pattern = '(.*)$'; // the rest of the string 140 } else { 141 142 if (!isset($strlen)) $strlen = self::strlen($str); // see notes 143 if ($offset > $strlen) return ''; // another trivial case 144 145 if ($length > 0) { 146 147 // reduce any length that would go past the end of the string 148 $length = min($strlen - $offset, $length); 149 150 $Lx = (int)($length / 65535); 151 $Ly = $length % 65535; 152 153 // +ve length requires ... a captured group of length characters 154 if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}'; 155 $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})'; 156 157 } else if ($length < 0) { 158 159 if ($length < ($offset - $strlen)) return ''; 160 161 $Lx = (int)((-$length) / 65535); 162 $Ly = (-$length) % 65535; 163 164 // -ve length requires ... capture everything except a group of -length characters 165 // anchored at the tail-end of the string 166 if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}'; 167 $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$'; 168 } 169 } 170 171 if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return ''; 172 return $match[1]; 173 } 174 175 // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps 176 /** 177 * Unicode aware replacement for substr_replace() 178 * 179 * @param string $string input string 180 * @param string $replacement the replacement 181 * @param int $start the replacing will begin at the start'th offset into string. 182 * @param int $length If given and is positive, it represents the length of the portion of string which is 183 * to be replaced. If length is zero then this function will have the effect of inserting 184 * replacement into string at the given start offset. 185 * @return string 186 * @see substr_replace() 187 * 188 * @author Andreas Gohr <andi@splitbrain.org> 189 */ 190 public static function substr_replace($string, $replacement, $start, $length = 0) 191 { 192 $ret = ''; 193 if ($start > 0) $ret .= self::substr($string, 0, $start); 194 $ret .= $replacement; 195 $ret .= self::substr($string, $start + $length); 196 return $ret; 197 } 198 // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps 199 200 /** 201 * Unicode aware replacement for ltrim() 202 * 203 * @param string $str 204 * @param string $charlist 205 * @return string 206 * @see ltrim() 207 * 208 * @author Andreas Gohr <andi@splitbrain.org> 209 */ 210 public static function ltrim($str, $charlist = '') 211 { 212 if ($charlist === '') return ltrim($str); 213 214 //quote charlist for use in a characterclass 215 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist); 216 217 return preg_replace('/^[' . $charlist . ']+/u', '', $str); 218 } 219 220 /** 221 * Unicode aware replacement for rtrim() 222 * 223 * @param string $str 224 * @param string $charlist 225 * @return string 226 * @see rtrim() 227 * 228 * @author Andreas Gohr <andi@splitbrain.org> 229 */ 230 public static function rtrim($str, $charlist = '') 231 { 232 if ($charlist === '') return rtrim($str); 233 234 //quote charlist for use in a characterclass 235 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\$1}', $charlist); 236 237 return preg_replace('/[' . $charlist . ']+$/u', '', $str); 238 } 239 240 /** 241 * Unicode aware replacement for trim() 242 * 243 * @param string $str 244 * @param string $charlist 245 * @return string 246 * @see trim() 247 * 248 * @author Andreas Gohr <andi@splitbrain.org> 249 */ 250 public static function trim($str, $charlist = '') 251 { 252 if ($charlist === '') return trim($str); 253 254 return self::ltrim(self::rtrim($str, $charlist), $charlist); 255 } 256 257 /** 258 * This is a unicode aware replacement for strtolower() 259 * 260 * Uses mb_string extension if available 261 * 262 * @param string $string 263 * @return string 264 * @see \dokuwiki\Utf8\PhpString::strtoupper() 265 * 266 * @author Leo Feyer <leo@typolight.org> 267 * @see strtolower() 268 */ 269 public static function strtolower($string) 270 { 271 if($string === null) return ''; // pre-8.1 behaviour 272 if (UTF8_MBSTRING) { 273 if (class_exists('Normalizer', $autoload = false)) { 274 return \Normalizer::normalize(mb_strtolower($string, 'utf-8')); 275 } 276 return (mb_strtolower($string, 'utf-8')); 277 } 278 return strtr($string, Table::upperCaseToLowerCase()); 279 } 280 281 /** 282 * This is a unicode aware replacement for strtoupper() 283 * 284 * Uses mb_string extension if available 285 * 286 * @param string $string 287 * @return string 288 * @see \dokuwiki\Utf8\PhpString::strtoupper() 289 * 290 * @author Leo Feyer <leo@typolight.org> 291 * @see strtoupper() 292 */ 293 public static function strtoupper($string) 294 { 295 if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8'); 296 297 return strtr($string, Table::lowerCaseToUpperCase()); 298 } 299 300 301 /** 302 * UTF-8 aware alternative to ucfirst 303 * Make a string's first character uppercase 304 * 305 * @param string $str 306 * @return string with first character as upper case (if applicable) 307 * @author Harry Fuecks 308 * 309 */ 310 public static function ucfirst($str) 311 { 312 switch (self::strlen($str)) { 313 case 0: 314 return ''; 315 case 1: 316 return self::strtoupper($str); 317 default: 318 preg_match('/^(.{1})(.*)$/us', $str, $matches); 319 return self::strtoupper($matches[1]) . $matches[2]; 320 } 321 } 322 323 /** 324 * UTF-8 aware alternative to ucwords 325 * Uppercase the first character of each word in a string 326 * 327 * @param string $str 328 * @return string with first char of each word uppercase 329 * @author Harry Fuecks 330 * @see http://php.net/ucwords 331 * 332 */ 333 public static function ucwords($str) 334 { 335 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 336 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 337 // This corresponds to the definition of a "word" defined at http://php.net/ucwords 338 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 339 340 return preg_replace_callback( 341 $pattern, 342 function ($matches) { 343 $leadingws = $matches[2]; 344 $ucfirst = self::strtoupper($matches[3]); 345 $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1); 346 return $leadingws . $ucword; 347 }, 348 $str 349 ); 350 } 351 352 /** 353 * This is an Unicode aware replacement for strpos 354 * 355 * @param string $haystack 356 * @param string $needle 357 * @param integer $offset 358 * @return integer 359 * @author Leo Feyer <leo@typolight.org> 360 * @see strpos() 361 * 362 */ 363 public static function strpos($haystack, $needle, $offset = 0) 364 { 365 $comp = 0; 366 $length = null; 367 368 while ($length === null || $length < $offset) { 369 $pos = strpos($haystack, $needle, $offset + $comp); 370 371 if ($pos === false) 372 return false; 373 374 $length = self::strlen(substr($haystack, 0, $pos)); 375 376 if ($length < $offset) 377 $comp = $pos - $length; 378 } 379 380 return $length; 381 } 382 383 384 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body