[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/ -> search.php (source)

   1  <?php
   2  
   3  /**
   4   * DokuWiki search functions
   5   *
   6   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   7   * @author     Andreas Gohr <andi@splitbrain.org>
   8   */
   9  
  10  use dokuwiki\Utf8\PhpString;
  11  use dokuwiki\File\MediaFile;
  12  use dokuwiki\Utf8\Sort;
  13  
  14  /**
  15   * Recurse directory
  16   *
  17   * This function recurses into a given base directory
  18   * and calls the supplied function for each file and directory
  19   *
  20   * @param   array    &$data The results of the search are stored here
  21   * @param   string    $base Where to start the search
  22   * @param   callback  $func Callback (function name or array with object,method)
  23   * @param   array     $opts option array will be given to the Callback
  24   * @param   string    $dir  Current directory beyond $base
  25   * @param   int       $lvl  Recursion Level
  26   * @param   mixed     $sort 'natural' to use natural order sorting (default);
  27   *                          'date' to sort by filemtime; leave empty to skip sorting.
  28   * @author  Andreas Gohr <andi@splitbrain.org>
  29   */
  30  function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural')
  31  {
  32      $dirs   = [];
  33      $files  = [];
  34      $filepaths = [];
  35  
  36      // safeguard against runaways #1452
  37      if ($base == '' || $base == '/') {
  38          throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug');
  39      }
  40  
  41      //read in directories and files
  42      $dh = @opendir($base . '/' . $dir);
  43      if (!$dh) return;
  44      while (($file = readdir($dh)) !== false) {
  45          if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs
  46          if (is_dir($base . '/' . $dir . '/' . $file)) {
  47              $dirs[] = $dir . '/' . $file;
  48              continue;
  49          }
  50          $files[] = $dir . '/' . $file;
  51          $filepaths[] = $base . '/' . $dir . '/' . $file;
  52      }
  53      closedir($dh);
  54      if (!empty($sort)) {
  55          if ($sort == 'date') {
  56              @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files);
  57          } else /* natural */ {
  58              Sort::asortFN($files);
  59          }
  60          Sort::asortFN($dirs);
  61      }
  62  
  63      //give directories to userfunction then recurse
  64      foreach ($dirs as $dir) {
  65          if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])) {
  66              search($data, $base, $func, $opts, $dir, $lvl + 1, $sort);
  67          }
  68      }
  69      //now handle the files
  70      foreach ($files as $file) {
  71          call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]);
  72      }
  73  }
  74  
  75  /**
  76   * The following functions are userfunctions to use with the search
  77   * function above. This function is called for every found file or
  78   * directory. When a directory is given to the function it has to
  79   * decide if this directory should be traversed (true) or not (false)
  80   * The function has to accept the following parameters:
  81   *
  82   * array &$data  - Reference to the result data structure
  83   * string $base  - Base usually $conf['datadir']
  84   * string $file  - current file or directory relative to $base
  85   * string $type  - Type either 'd' for directory or 'f' for file
  86   * int    $lvl   - Current recursion depht
  87   * array  $opts  - option array as given to search()
  88   *
  89   * return values for files are ignored
  90   *
  91   * All functions should check the ACL for document READ rights
  92   * namespaces (directories) are NOT checked (when sneaky_index is 0) as this
  93   * would break the recursion (You can have an nonreadable dir over a readable
  94   * one deeper nested) also make sure to check the file type (for example
  95   * in case of lockfiles).
  96   */
  97  
  98  /**
  99   * Searches for pages beginning with the given query
 100   *
 101   * @author Andreas Gohr <andi@splitbrain.org>
 102   *
 103   * @param array $data
 104   * @param string $base
 105   * @param string $file
 106   * @param string $type
 107   * @param integer $lvl
 108   * @param array $opts
 109   *
 110   * @return bool
 111   */
 112  function search_qsearch(&$data, $base, $file, $type, $lvl, $opts)
 113  {
 114      $opts = [
 115          'idmatch'   => '(^|:)' . preg_quote($opts['query'], '/') . '/',
 116          'listfiles' => true,
 117          'pagesonly' => true
 118      ];
 119      return search_universal($data, $base, $file, $type, $lvl, $opts);
 120  }
 121  
 122  /**
 123   * Build the browsable index of pages
 124   *
 125   * $opts['ns'] is the currently viewed namespace
 126   *
 127   * @author  Andreas Gohr <andi@splitbrain.org>
 128   *
 129   * @param array $data
 130   * @param string $base
 131   * @param string $file
 132   * @param string $type
 133   * @param integer $lvl
 134   * @param array $opts
 135   *
 136   * @return bool
 137   */
 138  function search_index(&$data, $base, $file, $type, $lvl, $opts)
 139  {
 140      global $conf;
 141      $ns = $opts['ns'] ?? '';
 142      $opts = [
 143          'pagesonly' => true,
 144          'listdirs' => true,
 145          'listfiles' => empty($opts['nofiles']),
 146          'sneakyacl' => $conf['sneaky_index'],
 147          // Hacky, should rather use recmatch
 148          'depth' => preg_match('#^' . preg_quote($file, '#') . '(/|$)#', '/' . $ns) ? 0 : -1,
 149      ];
 150  
 151      return search_universal($data, $base, $file, $type, $lvl, $opts);
 152  }
 153  
 154  /**
 155   * List all namespaces
 156   *
 157   * @author  Andreas Gohr <andi@splitbrain.org>
 158   *
 159   * @param array $data
 160   * @param string $base
 161   * @param string $file
 162   * @param string $type
 163   * @param integer $lvl
 164   * @param array $opts
 165   *
 166   * @return bool
 167   */
 168  function search_namespaces(&$data, $base, $file, $type, $lvl, $opts)
 169  {
 170      $opts = ['listdirs' => true];
 171      return search_universal($data, $base, $file, $type, $lvl, $opts);
 172  }
 173  
 174  /**
 175   * List all mediafiles in a namespace
 176   *   $opts['depth']     recursion level, 0 for all
 177   *   $opts['showmsg']   shows message if invalid media id is used
 178   *   $opts['skipacl']   skip acl checking
 179   *   $opts['pattern']   check given pattern
 180   *   $opts['hash']      add hashes to result list
 181   *
 182   * @author  Andreas Gohr <andi@splitbrain.org>
 183   *
 184   * @param array $data
 185   * @param string $base
 186   * @param string $file
 187   * @param string $type
 188   * @param integer $lvl
 189   * @param array $opts
 190   *
 191   * @return bool
 192   */
 193  function search_media(&$data, $base, $file, $type, $lvl, $opts)
 194  {
 195  
 196      //we do nothing with directories
 197      if ($type == 'd') {
 198          if (empty($opts['depth'])) return true; // recurse forever
 199          $depth = substr_count($file, '/');
 200          if ($depth >= $opts['depth']) return false; // depth reached
 201          return true;
 202      }
 203  
 204      $info         = [];
 205      $info['id']   = pathID($file, true);
 206      if ($info['id'] !== cleanID($info['id'])) {
 207          if (!empty($opts['showmsg']))
 208              msg(hsc($info['id']) . ' is not a valid file name for DokuWiki - skipped', -1);
 209          return false; // skip non-valid files
 210      }
 211  
 212      //check ACL for namespace (we have no ACL for mediafiles)
 213      $info['perm'] = auth_quickaclcheck(getNS($info['id']) . ':*');
 214      if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) {
 215          return false;
 216      }
 217  
 218      //check pattern filter
 219      if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) {
 220          return false;
 221      }
 222  
 223      $info['file']     = PhpString::basename($file);
 224      $info['size']     = filesize($base . '/' . $file);
 225      $info['mtime']    = filemtime($base . '/' . $file);
 226      $info['writable'] = is_writable($base . '/' . $file);
 227      if (preg_match("/\.(jpe?g|gif|png)$/", $file)) {
 228          $info['isimg'] = true;
 229          $info['meta']  = new JpegMeta($base . '/' . $file);
 230      } else {
 231          $info['isimg'] = false;
 232      }
 233      if (!empty($opts['hash'])) {
 234          $info['hash'] = md5(io_readFile(mediaFN($info['id']), false));
 235      }
 236  
 237      $data[] = $info;
 238  
 239      return false;
 240  }
 241  
 242  /**
 243   * List all mediafiles in a namespace
 244   *   $opts['depth']     recursion level, 0 for all
 245   *   $opts['showmsg']   shows message if invalid media id is used
 246   *   $opts['skipacl']   skip acl checking
 247   *   $opts['pattern']   check given pattern
 248   *   $opts['hash']      add hashes to result list
 249   *
 250   * @todo This is a temporary copy of search_media returning a list of MediaFile intances
 251   *
 252   * @param array $data
 253   * @param string $base
 254   * @param string $file
 255   * @param string $type
 256   * @param integer $lvl
 257   * @param array $opts
 258   *
 259   * @return bool
 260   */
 261  function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts)
 262  {
 263  
 264      //we do nothing with directories
 265      if ($type == 'd') {
 266          if (empty($opts['depth'])) return true; // recurse forever
 267          $depth = substr_count($file, '/');
 268          if ($depth >= $opts['depth']) return false; // depth reached
 269          return true;
 270      }
 271  
 272      $id   = pathID($file, true);
 273      if ($id != cleanID($id)) {
 274          if ($opts['showmsg'])
 275              msg(hsc($id) . ' is not a valid file name for DokuWiki - skipped', -1);
 276          return false; // skip non-valid files
 277      }
 278  
 279      //check ACL for namespace (we have no ACL for mediafiles)
 280      $info['perm'] = auth_quickaclcheck(getNS($id) . ':*');
 281      if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) {
 282          return false;
 283      }
 284  
 285      //check pattern filter
 286      if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)) {
 287          return false;
 288      }
 289  
 290      $data[] = new MediaFile($id);
 291      return false;
 292  }
 293  
 294  
 295  /**
 296   * This function just lists documents (for RSS namespace export)
 297   *
 298   * @author  Andreas Gohr <andi@splitbrain.org>
 299   *
 300   * @param array $data
 301   * @param string $base
 302   * @param string $file
 303   * @param string $type
 304   * @param integer $lvl
 305   * @param array $opts
 306   *
 307   * @return bool
 308   */
 309  function search_list(&$data, $base, $file, $type, $lvl, $opts)
 310  {
 311      //we do nothing with directories
 312      if ($type == 'd') return false;
 313      //only search txt files
 314      if (str_ends_with($file, '.txt')) {
 315          //check ACL
 316          $id = pathID($file);
 317          if (auth_quickaclcheck($id) < AUTH_READ) {
 318              return false;
 319          }
 320          $data[]['id'] = $id;
 321      }
 322      return false;
 323  }
 324  
 325  /**
 326   * Quicksearch for searching matching pagenames
 327   *
 328   * $opts['query'] is the search query
 329   *
 330   * @author  Andreas Gohr <andi@splitbrain.org>
 331   *
 332   * @param array $data
 333   * @param string $base
 334   * @param string $file
 335   * @param string $type
 336   * @param integer $lvl
 337   * @param array $opts
 338   *
 339   * @return bool
 340   */
 341  function search_pagename(&$data, $base, $file, $type, $lvl, $opts)
 342  {
 343      //we do nothing with directories
 344      if ($type == 'd') return true;
 345      //only search txt files
 346      if (!str_ends_with($file, '.txt')) return true;
 347  
 348      //simple stringmatching
 349      if (!empty($opts['query'])) {
 350          if (strpos($file, (string) $opts['query']) !== false) {
 351              //check ACL
 352              $id = pathID($file);
 353              if (auth_quickaclcheck($id) < AUTH_READ) {
 354                  return false;
 355              }
 356              $data[]['id'] = $id;
 357          }
 358      }
 359      return true;
 360  }
 361  
 362  /**
 363   * Just lists all documents
 364   *
 365   * $opts['depth']   recursion level, 0 for all
 366   * $opts['hash']    do md5 sum of content?
 367   * $opts['skipacl'] list everything regardless of ACL
 368   *
 369   * @author  Andreas Gohr <andi@splitbrain.org>
 370   *
 371   * @param array $data
 372   * @param string $base
 373   * @param string $file
 374   * @param string $type
 375   * @param integer $lvl
 376   * @param array $opts
 377   *
 378   * @return bool
 379   */
 380  function search_allpages(&$data, $base, $file, $type, $lvl, $opts)
 381  {
 382      if (($opts['depth'] ?? 0) > 0) {
 383          $parts = explode('/', ltrim($file, '/'));
 384          if (
 385              ($type == 'd' && count($parts) >= $opts['depth'])
 386              || ($type != 'd' && count($parts) > $opts['depth'])
 387          ) {
 388              return false; // depth reached
 389          }
 390      }
 391  
 392      //we do nothing with directories
 393      if ($type == 'd') {
 394          return true;
 395      }
 396  
 397      //only search txt files
 398      if (!str_ends_with($file, '.txt')) return true;
 399  
 400      $item = [];
 401      $item['id']   = pathID($file);
 402      if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) {
 403          return false;
 404      }
 405  
 406      $item['rev']   = filemtime($base . '/' . $file);
 407      $item['mtime'] = $item['rev'];
 408      $item['size']  = filesize($base . '/' . $file);
 409      if (!empty($opts['hash'])) {
 410          $item['hash'] = md5(trim(rawWiki($item['id'])));
 411      }
 412  
 413      $data[] = $item;
 414      return true;
 415  }
 416  
 417  /* ------------- helper functions below -------------- */
 418  
 419  /**
 420   * fulltext sort
 421   *
 422   * Callback sort function for use with usort to sort the data
 423   * structure created by search_fulltext. Sorts descending by count
 424   *
 425   * @author  Andreas Gohr <andi@splitbrain.org>
 426   *
 427   * @param array $a
 428   * @param array $b
 429   *
 430   * @return int
 431   */
 432  function sort_search_fulltext($a, $b)
 433  {
 434      if ($a['count'] > $b['count']) {
 435          return -1;
 436      } elseif ($a['count'] < $b['count']) {
 437          return 1;
 438      } else {
 439          return Sort::strcmp($a['id'], $b['id']);
 440      }
 441  }
 442  
 443  /**
 444   * translates a document path to an ID
 445   *
 446   * @author  Andreas Gohr <andi@splitbrain.org>
 447   * @todo    move to pageutils
 448   *
 449   * @param string $path
 450   * @param bool $keeptxt
 451   *
 452   * @return string
 453   */
 454  function pathID($path, $keeptxt = false)
 455  {
 456      $id = utf8_decodeFN($path);
 457      $id = str_replace('/', ':', $id);
 458      if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id);
 459      $id = trim($id, ':');
 460      return $id;
 461  }
 462  
 463  
 464  /**
 465   * This is a very universal callback for the search() function, replacing
 466   * many of the former individual functions at the cost of a more complex
 467   * setup.
 468   *
 469   * How the function behaves, depends on the options passed in the $opts
 470   * array, where the following settings can be used.
 471   *
 472   * depth      int     recursion depth. 0 for unlimited                       (default: 0)
 473   * keeptxt    bool    keep .txt extension for IDs                            (default: false)
 474   * listfiles  bool    include files in listing                               (default: false)
 475   * listdirs   bool    include namespaces in listing                          (default: false)
 476   * pagesonly  bool    restrict files to pages                                (default: false)
 477   * skipacl    bool    do not check for READ permission                       (default: false)
 478   * sneakyacl  bool    don't recurse into nonreadable dirs                    (default: false)
 479   * hash       bool    create MD5 hash for files                              (default: false)
 480   * meta       bool    return file metadata                                   (default: false)
 481   * filematch  string  match files against this regexp                        (default: '', so accept everything)
 482   * idmatch    string  match full ID against this regexp                      (default: '', so accept everything)
 483   * dirmatch   string  match directory against this regexp when adding        (default: '', so accept everything)
 484   * nsmatch    string  match namespace against this regexp when adding        (default: '', so accept everything)
 485   * recmatch   string  match directory against this regexp when recursing     (default: '', so accept everything)
 486   * showmsg    bool    warn about non-ID files                                (default: false)
 487   * showhidden bool    show hidden files(e.g. by hidepages config) too        (default: false)
 488   * firsthead  bool    return first heading for pages                         (default: false)
 489   *
 490   * @param array &$data  - Reference to the result data structure
 491   * @param string $base  - Base usually $conf['datadir']
 492   * @param string $file  - current file or directory relative to $base
 493   * @param string $type  - Type either 'd' for directory or 'f' for file
 494   * @param int    $lvl   - Current recursion depht
 495   * @param array  $opts  - option array as given to search()
 496   * @return bool if this directory should be traversed (true) or not (false)
 497   *              return value is ignored for files
 498   *
 499   * @author Andreas Gohr <gohr@cosmocode.de>
 500   */
 501  function search_universal(&$data, $base, $file, $type, $lvl, $opts)
 502  {
 503      $item   = [];
 504      $return = true;
 505  
 506      // get ID and check if it is a valid one
 507      $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt'])));
 508      if ($item['id'] !== cleanID($item['id'])) {
 509          if (!empty($opts['showmsg'])) {
 510              msg(hsc($item['id']) . ' is not a valid file name for DokuWiki - skipped', -1);
 511          }
 512          return false; // skip non-valid files
 513      }
 514      $item['ns']  = getNS($item['id']);
 515  
 516      if ($type == 'd') {
 517          // decide if to recursion into this directory is wanted
 518          if (empty($opts['depth'])) {
 519              $return = true; // recurse forever
 520          } else {
 521              $depth = substr_count($file, '/');
 522              if ($depth >= $opts['depth']) {
 523                  $return = false; // depth reached
 524              } else {
 525                  $return = true;
 526              }
 527          }
 528  
 529          if ($return) {
 530              $match = empty($opts['recmatch']) || preg_match('/' . $opts['recmatch'] . '/', $file);
 531              if (!$match) {
 532                  return false; // doesn't match
 533              }
 534          }
 535      }
 536  
 537      // check ACL
 538      if (empty($opts['skipacl'])) {
 539          if ($type == 'd') {
 540              $item['perm'] = auth_quickaclcheck($item['id'] . ':*');
 541          } else {
 542              $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files
 543          }
 544      } else {
 545          $item['perm'] = AUTH_DELETE;
 546      }
 547  
 548      // are we done here maybe?
 549      if ($type == 'd') {
 550          if (empty($opts['listdirs'])) return $return;
 551          //neither list nor recurse forbidden items:
 552          if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false;
 553          if (!empty($opts['dirmatch']) && !preg_match('/' . $opts['dirmatch'] . '/', $file)) return $return;
 554          if (!empty($opts['nsmatch']) && !preg_match('/' . $opts['nsmatch'] . '/', $item['ns'])) return $return;
 555      } else {
 556          if (empty($opts['listfiles'])) return $return;
 557          if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return;
 558          if (!empty($opts['pagesonly']) && !str_ends_with($file, '.txt')) return $return;
 559          if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return;
 560          if (!empty($opts['filematch']) && !preg_match('/' . $opts['filematch'] . '/', $file)) return $return;
 561          if (!empty($opts['idmatch']) && !preg_match('/' . $opts['idmatch'] . '/', $item['id'])) return $return;
 562      }
 563  
 564      // still here? prepare the item
 565      $item['type']  = $type;
 566      $item['level'] = $lvl;
 567      $item['open']  = $return;
 568  
 569      if (!empty($opts['meta'])) {
 570          $item['file']       = PhpString::basename($file);
 571          $item['size']       = filesize($base . '/' . $file);
 572          $item['mtime']      = filemtime($base . '/' . $file);
 573          $item['rev']        = $item['mtime'];
 574          $item['writable']   = is_writable($base . '/' . $file);
 575          $item['executable'] = is_executable($base . '/' . $file);
 576      }
 577  
 578      if ($type == 'f') {
 579          if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base . '/' . $file, false));
 580          if (!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER);
 581      }
 582  
 583      // finally add the item
 584      $data[] = $item;
 585      return $return;
 586  }
 587  
 588  //Setup VIM: ex: et ts=4 :