[ Index ] |
PHP Cross Reference of DokuWiki |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace dokuwiki\Utf8; 4 5 /** 6 * Methods to convert from and to UTF-8 strings 7 */ 8 class Conversion 9 { 10 11 /** 12 * Encodes UTF-8 characters to HTML entities 13 * 14 * @author Tom N Harris <tnharris@whoopdedo.org> 15 * @author <vpribish at shopping dot com> 16 * @link http://php.net/manual/en/function.utf8-decode.php 17 * 18 * @param string $str 19 * @param bool $all Encode non-utf8 char to HTML as well 20 * @return string 21 */ 22 public static function toHtml($str, $all = false) 23 { 24 $ret = ''; 25 foreach (Unicode::fromUtf8($str) as $cp) { 26 if ($cp < 0x80 && !$all) { 27 $ret .= chr($cp); 28 } elseif ($cp < 0x100) { 29 $ret .= "&#$cp;"; 30 } else { 31 $ret .= '&#x' . dechex($cp) . ';'; 32 } 33 } 34 return $ret; 35 } 36 37 /** 38 * Decodes HTML entities to UTF-8 characters 39 * 40 * Convert any &#..; entity to a codepoint, 41 * The entities flag defaults to only decoding numeric entities. 42 * Pass HTML_ENTITIES and named entities, including & < etc. 43 * are handled as well. Avoids the problem that would occur if you 44 * had to decode "&#38;&amp;#38;" 45 * 46 * unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&&" 47 * \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&#38;" 48 * what it should be -> "&&#38;" 49 * 50 * @author Tom N Harris <tnharris@whoopdedo.org> 51 * 52 * @param string $str UTF-8 encoded string 53 * @param boolean $entities decode name entities in addtition to numeric ones 54 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 55 */ 56 public static function fromHtml($str, $entities = false) 57 { 58 if (!$entities) { 59 return preg_replace_callback( 60 '/(&#([Xx])?([0-9A-Za-z]+);)/m', 61 [__CLASS__, 'decodeNumericEntity'], 62 $str 63 ); 64 } 65 66 return preg_replace_callback( 67 '/&(#)?([Xx])?([0-9A-Za-z]+);/m', 68 [__CLASS__, 'decodeAnyEntity'], 69 $str 70 ); 71 } 72 73 /** 74 * Decodes any HTML entity to it's correct UTF-8 char equivalent 75 * 76 * @param string $ent An entity 77 * @return string 78 */ 79 protected static function decodeAnyEntity($ent) 80 { 81 // create the named entity lookup table 82 static $table = null; 83 if ($table === null) { 84 $table = get_html_translation_table(HTML_ENTITIES); 85 $table = array_flip($table); 86 $table = array_map( 87 static function ($c) { 88 return Unicode::toUtf8(array(ord($c))); 89 }, 90 $table 91 ); 92 } 93 94 if ($ent[1] === '#') { 95 return self::decodeNumericEntity($ent); 96 } 97 98 if (array_key_exists($ent[0], $table)) { 99 return $table[$ent[0]]; 100 } 101 102 return $ent[0]; 103 } 104 105 /** 106 * Decodes numeric HTML entities to their correct UTF-8 characters 107 * 108 * @param $ent string A numeric entity 109 * @return string|false 110 */ 111 protected static function decodeNumericEntity($ent) 112 { 113 switch ($ent[2]) { 114 case 'X': 115 case 'x': 116 $cp = hexdec($ent[3]); 117 break; 118 default: 119 $cp = intval($ent[3]); 120 break; 121 } 122 return Unicode::toUtf8(array($cp)); 123 } 124 125 /** 126 * UTF-8 to UTF-16BE conversion. 127 * 128 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 129 * 130 * @param string $str 131 * @param bool $bom 132 * @return string 133 */ 134 public static function toUtf16be($str, $bom = false) 135 { 136 $out = $bom ? "\xFE\xFF" : ''; 137 if (UTF8_MBSTRING) { 138 return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8'); 139 } 140 141 $uni = Unicode::fromUtf8($str); 142 foreach ($uni as $cp) { 143 $out .= pack('n', $cp); 144 } 145 return $out; 146 } 147 148 /** 149 * UTF-8 to UTF-16BE conversion. 150 * 151 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 152 * 153 * @param string $str 154 * @return false|string 155 */ 156 public static function fromUtf16be($str) 157 { 158 $uni = unpack('n*', $str); 159 return Unicode::toUtf8($uni); 160 } 161 162 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body