[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/_test/tests/inc/ -> sort_with_collator.test.php (source)

   1  <?php
   2  
   3  use dokuwiki\Utf8\Sort;
   4  
   5  /**
   6   * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
   7   * @author Andreas Gohr <andi@splitbrain.org>
   8   */
   9  class sort_with_collator_test extends DokuWikiTest
  10  {
  11      /*
  12       * Dependency for tests that need "intl" extension.
  13       */
  14      public function testIntlExtensionAvailability()
  15      {
  16          if (!class_exists('\Collator')) {
  17              $this->markTestSkipped('Skipping all sort tests with collator, as they need "intl" extension');
  18          }
  19          $this->assertTrue(true); // avoid being marked as risky for having no assertion
  20      }
  21  
  22      /**
  23       * Provide real word pairs of the languages being tested (when possible).
  24       * Everything which is beyond the usual A-Z order should be checked,
  25       * including every character with an accent (diacritic) used in the language.
  26       *
  27       * CHECKING NON-EQUIVALENT CHARACTERS (X < Y)
  28       *
  29       * In this case, the words are always sorted according to the character pair.
  30       * Craft word pairs to double-check the collator, such that sort by the next
  31       * character yields the opposite result.
  32       *
  33       *   Esperanto example: ĉ < d
  34       *   ĉokolado, dento ==> ĉ < d ==> ĉokolado < dento
  35       *   (if ĉ < d would fail, o < e would also fail ==> collator failure)
  36       *
  37       * CHECKING EQUIVALENT CHARACTERS (X = Y)
  38       *
  39       * If the sole difference between the words is the character pair, the sort
  40       * will be as if X < Y. Otherwise the characters will be treated as the same.
  41       * Craft two word pairs to test both conditions.
  42       *
  43       *   German example: a = ä
  44       *   Sole diff.: Apfel, Äpfel ==> a < ä        ==> Apfel < Äpfel
  45       *   Otherwise:  Ämter, Arzt  ==> a = ä, m < r ==> Ämter < Arzt
  46       *
  47       * CHECKING MULTIPLE EQUIVALENT CHARACTERS (X = Y = Z = ...)
  48       *
  49       * An extension of the above case. If the sole difference between the words is
  50       * a character pair from the given set, the sort will be as if X < Y < Z < ...
  51       * Otherwise the characters will be treated as the same.
  52       * Craft at least one word pair to test the first case and as many as possible
  53       * to test the other case.
  54       *
  55       *   Portuguese example: e = é = ê
  56       *   Sole diff.: de, dê         ==> e < ê                  ==> de < dê
  57       *   Otherwise:  pé, pedra      ==> é = e, end of word < d ==> pé < pedra
  58       *               pêssego, peste ==> ê = e, s = s, s < t    ==> pêssego < peste
  59       *
  60       * @return Generator|array
  61       * @see testStrcmp
  62       */
  63      public function provideWordPairs()
  64      {
  65          static $pairs = [
  66              // Esperanto
  67              'eo' => [
  68                  // c < ĉ < d
  69                  ['celo', 'ĉapo'], ['ĉokolado', 'dento'],
  70                  // g < ĝ < h < ĥ < i
  71                  ['glacio', 'ĝirafo'], ['ĝojo', 'haro'], ['horo', 'ĥameleono'], ['ĥoro', 'iam'],
  72                  // j < ĵ < k
  73                  ['jes', 'ĵaŭdo'], ['ĵurnalo', 'kapo'],
  74                  // s < ŝ < t
  75                  ['seka', 'ŝako'], ['ŝuo', 'tablo'],
  76                  // u < ŭ < v
  77                  ['urso', 'ŭaŭ'], ['ŭo', 'vino'],
  78                  // natural sort
  79                  ['paĝo 2', 'paĝo 10'], ['paĝo 51', 'paĝo 100']
  80              ],
  81  
  82              // German
  83              'de' => [
  84                  // a = ä
  85                  ['Apfel', 'Äpfel'], ['Ämter', 'Arzt'],
  86                  // o = ö
  87                  ['Tochter', 'Töchter'], ['Öl', 'Orange'],
  88                  // u = ü
  89                  ['Mutter', 'Mütter'], ['Übersetzung', 'Uhrzeit'],
  90                  // ß = ss
  91                  ['weiss', 'weiß'], ['Fuchs', 'Fuß'], ['Fraß', 'Frau'],
  92                  // natural sort
  93                  ['Seite 2', 'Seite 10'], ['Seite 51', 'Seite 100']
  94              ],
  95  
  96              // Portuguese
  97              'pt' => [
  98                  // a = á = à = â = ã
  99                  ['a', 'à'], ['água', 'amor'], ['às', 'ato'], ['âmbar', 'arte'], ['lã', 'lata'],
 100                  // e = é = ê
 101                  ['de', 'dê'], ['pé', 'pedra'], ['pêssego', 'peste'],
 102                  // i = í
 103                  ['liquido', 'líquido'], ['índio', 'indireto'],
 104                  // o = ó = ô = õ
 105                  ['avó', 'avô'], ['ótimo', 'ovo'], ['ônibus', 'osso'], ['limões', 'limonada'],
 106                  // u = ú = ü (ü appears in old texts)
 107                  ['numero', 'número'], ['último', 'um'], ['tranqüila', 'tranquilamente'],
 108                  // c = ç
 109                  ['faca', 'faça'], ['taça', 'taco'],
 110                  // natural sort
 111                  ['página 2', 'página 10'], ['página 51', 'página 100']
 112              ],
 113  
 114              // Spanish
 115              'es' => [
 116                  // n < ñ < o
 117                  ['nube', 'ñoño'], ['ñu', 'ojo'],
 118                  // a = á
 119                  ['mas', 'más'], ['ácido', 'agua'],
 120                  // e = é
 121                  ['de', 'dé'], ['él', 'elefante'],
 122                  // i = í
 123                  ['mi', 'mí'], ['íntimo', 'isla'],
 124                  // o = ó
 125                  ['como', 'cómo'], ['óptimo', 'oreja'],
 126                  // u = ú
 127                  ['tu', 'tú'], ['último', 'uno'],
 128                  // natural sort
 129                  ['página 2', 'página 10'], ['página 51', 'página 100']
 130              ],
 131          ];
 132  
 133          foreach ($pairs as $lang => $list) {
 134              foreach ($list as $pair) {
 135                  yield [$lang, $pair[0], $pair[1]];
 136              }
 137          }
 138      }
 139  
 140      /**
 141       * Provide the sorted sequences of all characters used in the languages being tested.
 142       * Everything which is beyond the usual A-Z order should be checked.
 143       *
 144       * CHECKING NON-EQUIVALENT CHARACTERS (X < Y)
 145       *
 146       * Add a 2nd character to double-check the collator, such that sort by the 2nd
 147       * character yields the opposite result.
 148       *
 149       *   Esperanto example: ĉ < d
 150       *   2nd character: ĉe, da ==> ĉ < d ==> ĉe < da
 151       *   (if ĉ < d would fail, e < a would also fail ==> collator failure)
 152       *
 153       * CHECKING EQUIVALENT CHARACTERS (X = Y = Z)
 154       *
 155       * Don't add a 2nd character, because it would break the test. The lone characters
 156       * will be sorted as words with a sole difference, that is, as if X < Y < Z.
 157       *
 158       *   German example: a = ä
 159       *   Sole difference: a, ä ==> a < ä
 160       *
 161       * @return Generator|array
 162       * @see testSort
 163       * @see testKSort
 164       * @see testASort
 165       * @see testASortFnUrl
 166       * @see testASortFnSafe
 167       * @see testASortFnUtf8
 168       */
 169      public function provideSortedCharList()
 170      {
 171          static $lists = [
 172              // Esperanto
 173              // c < ĉ < d
 174              // g < ĝ < h < ĥ < i
 175              // j < ĵ < k
 176              // s < ŝ < t
 177              // u < ŭ < v
 178              'eo' => 'a b ci ĉe da e f gu ĝo hi ĥe ia ju ĵo ke l m n o p r so ŝi te us ŭo ve z',
 179  
 180              // German
 181              // a = ä
 182              // o = ö
 183              // u = ü
 184              // ß = ss
 185              'de' => 'a ä b c d e f g h i j k l m n o ö p q r s ss ß st t u ü v w x y z',
 186  
 187              // Portuguese
 188              // a = á = à = â = ã
 189              // e = é = ê
 190              // i = í
 191              // o = ó = ô = õ
 192              // u = ú = ü (ü appears in old texts)
 193              // c = ç
 194              'pt' => 'a á à â ã b c ç d e é ê f g h i í j k l m n o ó ô õ p q r s t u ú ü v w x y z',
 195  
 196              // Spanish
 197              // n < ñ < o
 198              // a = á
 199              // e = é
 200              // i = í
 201              // o = ó
 202              // u = ú
 203              'es' => 'a á b c d e é f g h i í j k l m nu ño oh óh p q r s t u ú v w x y z',
 204          ];
 205  
 206          foreach ($lists as $lang => $list) {
 207              yield [$lang, $list];
 208          }
 209      }
 210  
 211      /**
 212       * @depends      testIntlExtensionAvailability
 213       * @dataProvider provideWordPairs
 214       * @param string $lang
 215       * @param string $word1
 216       * @param string $word2
 217       */
 218      public function testStrcmp($lang, $word1, $word2)
 219      {
 220          global $conf;
 221          $conf['lang'] = $lang;
 222  
 223          $this->assertLessThan(0, Sort::strcmp($word1, $word2));
 224      }
 225  
 226      /**
 227       * @dataProvider provideSortedCharList
 228       * @depends      testIntlExtensionAvailability
 229       * @param string $lang
 230       * @param string $list
 231       */
 232      public function testSort($lang, $list)
 233      {
 234          global $conf;
 235          $conf['lang'] = $lang;
 236  
 237          $sorted = explode(' ', $list);
 238          $random = explode(' ', $list);
 239          shuffle($random);
 240          Sort::sort($random);
 241          $this->assertEquals(array_values($random), array_values($sorted));
 242      }
 243  
 244      /**
 245       * @dataProvider provideSortedCharList
 246       * @depends      testIntlExtensionAvailability
 247       * @param string $lang
 248       * @param string $list
 249       */
 250      public function testKSort($lang, $list)
 251      {
 252          global $conf;
 253          $conf['lang'] = $lang;
 254  
 255          $sorted = array_flip(explode(' ', $list));
 256          $random = explode(' ', $list);
 257          shuffle($random);
 258          $random = array_flip($random);
 259          Sort::ksort($random);
 260          $this->assertEquals(array_keys($random), array_keys($sorted));
 261      }
 262  
 263      /**
 264       * @dataProvider provideSortedCharList
 265       * @depends      testIntlExtensionAvailability
 266       * @param string $lang
 267       * @param string $list
 268       */
 269      public function testASort($lang, $list)
 270      {
 271          global $conf;
 272          $conf['lang'] = $lang;
 273  
 274          $sorted = explode(' ', $list);
 275          $keys = array_keys($sorted);
 276          shuffle($keys);
 277          foreach ($keys as $key) {
 278              $random[$key] = $sorted[$key];
 279          }
 280          Sort::asort($random);
 281          $this->assertEquals(array_values($random), array_values($sorted));
 282          $this->assertEquals(array_keys($random), array_keys($sorted));
 283      }
 284  
 285      /**
 286       * @dataProvider provideSortedCharList
 287       * @depends      testIntlExtensionAvailability
 288       * @param string $lang
 289       * @param string $list
 290       */
 291      public function testASortFnUrl($lang, $list)
 292      {
 293          global $conf;
 294          $conf['fnencode'] = 'url';
 295          $conf['lang'] = $lang;
 296  
 297          $sorted = explode('+', urlencode($list));
 298          $keys = array_keys($sorted);
 299          shuffle($keys);
 300          foreach ($keys as $key) {
 301              $random[$key] = $sorted[$key];
 302          }
 303          Sort::asortFN($random);
 304          $this->assertEquals(array_values($random), array_values($sorted));
 305          $this->assertEquals(array_keys($random), array_keys($sorted));
 306      }
 307  
 308      /**
 309       * @dataProvider provideSortedCharList
 310       * @depends      testIntlExtensionAvailability
 311       * @param string $lang
 312       * @param string $list
 313       */
 314      public function testASortFnSafe($lang, $list)
 315      {
 316          global $conf;
 317          $conf['fnencode'] = 'safe';
 318          $conf['lang'] = $lang;
 319  
 320          $sorted = explode(' ', $list);
 321          foreach (array_keys($sorted) as $key) {
 322              $sorted[$key] = SafeFN::encode($sorted[$key]);
 323          }
 324          $keys = array_keys($sorted);
 325          shuffle($keys);
 326          foreach ($keys as $key) {
 327              $random[$key] = $sorted[$key];
 328          }
 329          Sort::asortFN($random);
 330          $this->assertEquals(array_values($random), array_values($sorted));
 331          $this->assertEquals(array_keys($random), array_keys($sorted));
 332      }
 333  
 334      /**
 335       * @dataProvider provideSortedCharList
 336       * @depends      testIntlExtensionAvailability
 337       * @param string $lang
 338       * @param string $list
 339       */
 340      public function testASortFnUtf8($lang, $list)
 341      {
 342          global $conf;
 343          $conf['fnencode'] = 'utf-8';
 344          $conf['lang'] = $lang;
 345  
 346          $sorted = explode(' ', $list);
 347          $keys = array_keys($sorted);
 348          shuffle($keys);
 349          foreach ($keys as $key) {
 350              $random[$key] = $sorted[$key];
 351          }
 352          Sort::asortFN($random);
 353          $this->assertEquals(array_values($random), array_values($sorted));
 354          $this->assertEquals(array_keys($random), array_keys($sorted));
 355      }
 356  }