[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/parser/ -> metadata.php (source)

   1  <?php
   2  
   3  use dokuwiki\File\MediaResolver;
   4  use dokuwiki\File\PageResolver;
   5  use dokuwiki\Utf8\PhpString;
   6  
   7  /**
   8   * The MetaData Renderer
   9   *
  10   * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
  11   * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
  12   * $persistent.
  13   *
  14   * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
  15   *
  16   * @author Esther Brunner <wikidesign@gmail.com>
  17   */
  18  class Doku_Renderer_metadata extends Doku_Renderer
  19  {
  20      /** the approximate byte lenght to capture for the abstract */
  21      public const ABSTRACT_LEN = 250;
  22  
  23      /** the maximum UTF8 character length for the abstract */
  24      public const ABSTRACT_MAX = 500;
  25  
  26      /** @var array transient meta data, will be reset on each rendering */
  27      public $meta = [];
  28  
  29      /** @var array persistent meta data, will be kept until explicitly deleted */
  30      public $persistent = [];
  31  
  32      /** @var array the list of headers used to create unique link ids */
  33      protected $headers = [];
  34  
  35      /** @var string temporary $doc store */
  36      protected $store = '';
  37  
  38      /** @var string keeps the first image reference */
  39      protected $firstimage = '';
  40  
  41      /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
  42      public $capturing = true;
  43  
  44      /** @var bool determines if enough data for the abstract was collected, yet */
  45      public $capture = true;
  46  
  47      /** @var int number of bytes captured for abstract */
  48      protected $captured = 0;
  49  
  50      /**
  51       * Returns the format produced by this renderer.
  52       *
  53       * @return string always 'metadata'
  54       */
  55      public function getFormat()
  56      {
  57          return 'metadata';
  58      }
  59  
  60      /**
  61       * Initialize the document
  62       *
  63       * Sets up some of the persistent info about the page if it doesn't exist, yet.
  64       */
  65      public function document_start()
  66      {
  67          global $ID;
  68  
  69          $this->headers = [];
  70  
  71          // external pages are missing create date
  72          if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
  73              $this->persistent['date']['created'] = filectime(wikiFN($ID));
  74          }
  75          if (!isset($this->persistent['user'])) {
  76              $this->persistent['user'] = '';
  77          }
  78          if (!isset($this->persistent['creator'])) {
  79              $this->persistent['creator'] = '';
  80          }
  81          // reset metadata to persistent values
  82          $this->meta = $this->persistent;
  83      }
  84  
  85      /**
  86       * Finalize the document
  87       *
  88       * Stores collected data in the metadata
  89       */
  90      public function document_end()
  91      {
  92          global $ID;
  93  
  94          // store internal info in metadata (notoc,nocache)
  95          $this->meta['internal'] = $this->info;
  96  
  97          if (!isset($this->meta['description']['abstract'])) {
  98              // cut off too long abstracts
  99              $this->doc = trim($this->doc);
 100              if (strlen($this->doc) > self::ABSTRACT_MAX) {
 101                  $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…';
 102              }
 103              $this->meta['description']['abstract'] = $this->doc;
 104          }
 105  
 106          $this->meta['relation']['firstimage'] = $this->firstimage;
 107  
 108          if (!isset($this->meta['date']['modified'])) {
 109              $this->meta['date']['modified'] = filemtime(wikiFN($ID));
 110          }
 111      }
 112  
 113      /**
 114       * Render plain text data
 115       *
 116       * This function takes care of the amount captured data and will stop capturing when
 117       * enough abstract data is available
 118       *
 119       * @param $text
 120       */
 121      public function cdata($text)
 122      {
 123          if (!$this->capture || !$this->capturing) {
 124              return;
 125          }
 126  
 127          $this->doc .= $text;
 128  
 129          $this->captured += strlen($text);
 130          if ($this->captured > self::ABSTRACT_LEN) {
 131              $this->capture = false;
 132          }
 133      }
 134  
 135      /**
 136       * Add an item to the TOC
 137       *
 138       * @param string $id the hash link
 139       * @param string $text the text to display
 140       * @param int $level the nesting level
 141       */
 142      public function toc_additem($id, $text, $level)
 143      {
 144          global $conf;
 145  
 146          //only add items within configured levels
 147          if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
 148              // the TOC is one of our standard ul list arrays ;-)
 149              $this->meta['description']['tableofcontents'][] = [
 150                  'hid' => $id,
 151                  'title' => $text,
 152                  'type' => 'ul',
 153                  'level' => $level - $conf['toptoclevel'] + 1
 154              ];
 155          }
 156      }
 157  
 158      /**
 159       * Render a heading
 160       *
 161       * @param string $text the text to display
 162       * @param int $level header level
 163       * @param int $pos byte position in the original source
 164       */
 165      public function header($text, $level, $pos)
 166      {
 167          if (!isset($this->meta['title'])) {
 168              $this->meta['title'] = $text;
 169          }
 170  
 171          // add the header to the TOC
 172          $hid = $this->_headerToLink($text, true);
 173          $this->toc_additem($hid, $text, $level);
 174  
 175          // add to summary
 176          $this->cdata(DOKU_LF . $text . DOKU_LF);
 177      }
 178  
 179      /**
 180       * Open a paragraph
 181       */
 182      public function p_open()
 183      {
 184          $this->cdata(DOKU_LF);
 185      }
 186  
 187      /**
 188       * Close a paragraph
 189       */
 190      public function p_close()
 191      {
 192          $this->cdata(DOKU_LF);
 193      }
 194  
 195      /**
 196       * Create a line break
 197       */
 198      public function linebreak()
 199      {
 200          $this->cdata(DOKU_LF);
 201      }
 202  
 203      /**
 204       * Create a horizontal line
 205       */
 206      public function hr()
 207      {
 208          $this->cdata(DOKU_LF . '----------' . DOKU_LF);
 209      }
 210  
 211      /**
 212       * Callback for footnote start syntax
 213       *
 214       * All following content will go to the footnote instead of
 215       * the document. To achieve this the previous rendered content
 216       * is moved to $store and $doc is cleared
 217       *
 218       * @author Andreas Gohr <andi@splitbrain.org>
 219       */
 220      public function footnote_open()
 221      {
 222          if ($this->capture) {
 223              // move current content to store
 224              // this is required to ensure safe behaviour of plugins accessed within footnotes
 225              $this->store = $this->doc;
 226              $this->doc = '';
 227  
 228              // disable capturing
 229              $this->capturing = false;
 230          }
 231      }
 232  
 233      /**
 234       * Callback for footnote end syntax
 235       *
 236       * All content rendered whilst within footnote syntax mode is discarded,
 237       * the previously rendered content is restored and capturing is re-enabled.
 238       *
 239       * @author Andreas Gohr
 240       */
 241      public function footnote_close()
 242      {
 243          if ($this->capture) {
 244              // re-enable capturing
 245              $this->capturing = true;
 246              // restore previously rendered content
 247              $this->doc = $this->store;
 248              $this->store = '';
 249          }
 250      }
 251  
 252      /**
 253       * Open an unordered list
 254       */
 255      public function listu_open()
 256      {
 257          $this->cdata(DOKU_LF);
 258      }
 259  
 260      /**
 261       * Open an ordered list
 262       */
 263      public function listo_open()
 264      {
 265          $this->cdata(DOKU_LF);
 266      }
 267  
 268      /**
 269       * Open a list item
 270       *
 271       * @param int $level the nesting level
 272       * @param bool $node true when a node; false when a leaf
 273       */
 274      public function listitem_open($level, $node = false)
 275      {
 276          $this->cdata(str_repeat(DOKU_TAB, $level) . '* ');
 277      }
 278  
 279      /**
 280       * Close a list item
 281       */
 282      public function listitem_close()
 283      {
 284          $this->cdata(DOKU_LF);
 285      }
 286  
 287      /**
 288       * Output preformatted text
 289       *
 290       * @param string $text
 291       */
 292      public function preformatted($text)
 293      {
 294          $this->cdata($text);
 295      }
 296  
 297      /**
 298       * Start a block quote
 299       */
 300      public function quote_open()
 301      {
 302          $this->cdata(DOKU_LF . DOKU_TAB . '"');
 303      }
 304  
 305      /**
 306       * Stop a block quote
 307       */
 308      public function quote_close()
 309      {
 310          $this->cdata('"' . DOKU_LF);
 311      }
 312  
 313      /**
 314       * Display text as file content, optionally syntax highlighted
 315       *
 316       * @param string $text text to show
 317       * @param string $lang programming language to use for syntax highlighting
 318       * @param string $file file path label
 319       */
 320      public function file($text, $lang = null, $file = null)
 321      {
 322          $this->cdata(DOKU_LF . $text . DOKU_LF);
 323      }
 324  
 325      /**
 326       * Display text as code content, optionally syntax highlighted
 327       *
 328       * @param string $text text to show
 329       * @param string $language programming language to use for syntax highlighting
 330       * @param string $file file path label
 331       */
 332      public function code($text, $language = null, $file = null)
 333      {
 334          $this->cdata(DOKU_LF . $text . DOKU_LF);
 335      }
 336  
 337      /**
 338       * Format an acronym
 339       *
 340       * Uses $this->acronyms
 341       *
 342       * @param string $acronym
 343       */
 344      public function acronym($acronym)
 345      {
 346          $this->cdata($acronym);
 347      }
 348  
 349      /**
 350       * Format a smiley
 351       *
 352       * Uses $this->smiley
 353       *
 354       * @param string $smiley
 355       */
 356      public function smiley($smiley)
 357      {
 358          $this->cdata($smiley);
 359      }
 360  
 361      /**
 362       * Format an entity
 363       *
 364       * Entities are basically small text replacements
 365       *
 366       * Uses $this->entities
 367       *
 368       * @param string $entity
 369       */
 370      public function entity($entity)
 371      {
 372          $this->cdata($entity);
 373      }
 374  
 375      /**
 376       * Typographically format a multiply sign
 377       *
 378       * Example: ($x=640, $y=480) should result in "640×480"
 379       *
 380       * @param string|int $x first value
 381       * @param string|int $y second value
 382       */
 383      public function multiplyentity($x, $y)
 384      {
 385          $this->cdata($x . '×' . $y);
 386      }
 387  
 388      /**
 389       * Render an opening single quote char (language specific)
 390       */
 391      public function singlequoteopening()
 392      {
 393          global $lang;
 394          $this->cdata($lang['singlequoteopening']);
 395      }
 396  
 397      /**
 398       * Render a closing single quote char (language specific)
 399       */
 400      public function singlequoteclosing()
 401      {
 402          global $lang;
 403          $this->cdata($lang['singlequoteclosing']);
 404      }
 405  
 406      /**
 407       * Render an apostrophe char (language specific)
 408       */
 409      public function apostrophe()
 410      {
 411          global $lang;
 412          $this->cdata($lang['apostrophe']);
 413      }
 414  
 415      /**
 416       * Render an opening double quote char (language specific)
 417       */
 418      public function doublequoteopening()
 419      {
 420          global $lang;
 421          $this->cdata($lang['doublequoteopening']);
 422      }
 423  
 424      /**
 425       * Render an closinging double quote char (language specific)
 426       */
 427      public function doublequoteclosing()
 428      {
 429          global $lang;
 430          $this->cdata($lang['doublequoteclosing']);
 431      }
 432  
 433      /**
 434       * Render a CamelCase link
 435       *
 436       * @param string $link The link name
 437       * @see http://en.wikipedia.org/wiki/CamelCase
 438       */
 439      public function camelcaselink($link)
 440      {
 441          $this->internallink($link, $link);
 442      }
 443  
 444      /**
 445       * Render a page local link
 446       *
 447       * @param string $hash hash link identifier
 448       * @param string $name name for the link
 449       */
 450      public function locallink($hash, $name = null)
 451      {
 452          if (is_array($name)) {
 453              $this->_firstimage($name['src']);
 454              if ($name['type'] == 'internalmedia') {
 455                  $this->_recordMediaUsage($name['src']);
 456              }
 457          }
 458      }
 459  
 460      /**
 461       * keep track of internal links in $this->meta['relation']['references']
 462       *
 463       * @param string $id page ID to link to. eg. 'wiki:syntax'
 464       * @param string|array|null $name name for the link, array for media file
 465       */
 466      public function internallink($id, $name = null)
 467      {
 468          global $ID;
 469  
 470          if (is_array($name)) {
 471              $this->_firstimage($name['src']);
 472              if ($name['type'] == 'internalmedia') {
 473                  $this->_recordMediaUsage($name['src']);
 474              }
 475          }
 476  
 477          $parts = explode('?', $id, 2);
 478          if (count($parts) === 2) {
 479              $id = $parts[0];
 480          }
 481  
 482          $default = $this->_simpleTitle($id);
 483  
 484          // first resolve and clean up the $id
 485          $resolver = new PageResolver($ID);
 486          $id = $resolver->resolveId($id);
 487          [$page] = sexplode('#', $id, 2);
 488  
 489          // set metadata
 490          $this->meta['relation']['references'][$page] = page_exists($page);
 491          // $data = array('relation' => array('isreferencedby' => array($ID => true)));
 492          // p_set_metadata($id, $data);
 493  
 494          // add link title to summary
 495          if ($this->capture) {
 496              $name = $this->_getLinkTitle($name, $default, $id);
 497              $this->doc .= $name;
 498          }
 499      }
 500  
 501      /**
 502       * Render an external link
 503       *
 504       * @param string $url full URL with scheme
 505       * @param string|array|null $name name for the link, array for media file
 506       */
 507      public function externallink($url, $name = null)
 508      {
 509          if (is_array($name)) {
 510              $this->_firstimage($name['src']);
 511              if ($name['type'] == 'internalmedia') {
 512                  $this->_recordMediaUsage($name['src']);
 513              }
 514          }
 515  
 516          if ($this->capture) {
 517              $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
 518          }
 519      }
 520  
 521      /**
 522       * Render an interwiki link
 523       *
 524       * You may want to use $this->_resolveInterWiki() here
 525       *
 526       * @param string $match original link - probably not much use
 527       * @param string|array $name name for the link, array for media file
 528       * @param string $wikiName indentifier (shortcut) for the remote wiki
 529       * @param string $wikiUri the fragment parsed from the original link
 530       */
 531      public function interwikilink($match, $name, $wikiName, $wikiUri)
 532      {
 533          if (is_array($name)) {
 534              $this->_firstimage($name['src']);
 535              if ($name['type'] == 'internalmedia') {
 536                  $this->_recordMediaUsage($name['src']);
 537              }
 538          }
 539  
 540          if ($this->capture) {
 541              [$wikiUri] = explode('#', $wikiUri, 2);
 542              $name = $this->_getLinkTitle($name, $wikiUri);
 543              $this->doc .= $name;
 544          }
 545      }
 546  
 547      /**
 548       * Link to windows share
 549       *
 550       * @param string $url the link
 551       * @param string|array $name name for the link, array for media file
 552       */
 553      public function windowssharelink($url, $name = null)
 554      {
 555          if (is_array($name)) {
 556              $this->_firstimage($name['src']);
 557              if ($name['type'] == 'internalmedia') {
 558                  $this->_recordMediaUsage($name['src']);
 559              }
 560          }
 561  
 562          if ($this->capture) {
 563              if ($name) {
 564                  $this->doc .= $name;
 565              } else {
 566                  $this->doc .= '<' . $url . '>';
 567              }
 568          }
 569      }
 570  
 571      /**
 572       * Render a linked E-Mail Address
 573       *
 574       * Should honor $conf['mailguard'] setting
 575       *
 576       * @param string $address Email-Address
 577       * @param string|array $name name for the link, array for media file
 578       */
 579      public function emaillink($address, $name = null)
 580      {
 581          if (is_array($name)) {
 582              $this->_firstimage($name['src']);
 583              if ($name['type'] == 'internalmedia') {
 584                  $this->_recordMediaUsage($name['src']);
 585              }
 586          }
 587  
 588          if ($this->capture) {
 589              if ($name) {
 590                  $this->doc .= $name;
 591              } else {
 592                  $this->doc .= '<' . $address . '>';
 593              }
 594          }
 595      }
 596  
 597      /**
 598       * Render an internal media file
 599       *
 600       * @param string $src media ID
 601       * @param string $title descriptive text
 602       * @param string $align left|center|right
 603       * @param int $width width of media in pixel
 604       * @param int $height height of media in pixel
 605       * @param string $cache cache|recache|nocache
 606       * @param string $linking linkonly|detail|nolink
 607       */
 608      public function internalmedia(
 609          $src,
 610          $title = null,
 611          $align = null,
 612          $width = null,
 613          $height = null,
 614          $cache = null,
 615          $linking = null
 616      ) {
 617          if ($this->capture && $title) {
 618              $this->doc .= '[' . $title . ']';
 619          }
 620          $this->_firstimage($src);
 621          $this->_recordMediaUsage($src);
 622      }
 623  
 624      /**
 625       * Render an external media file
 626       *
 627       * @param string $src full media URL
 628       * @param string $title descriptive text
 629       * @param string $align left|center|right
 630       * @param int $width width of media in pixel
 631       * @param int $height height of media in pixel
 632       * @param string $cache cache|recache|nocache
 633       * @param string $linking linkonly|detail|nolink
 634       */
 635      public function externalmedia(
 636          $src,
 637          $title = null,
 638          $align = null,
 639          $width = null,
 640          $height = null,
 641          $cache = null,
 642          $linking = null
 643      ) {
 644          if ($this->capture && $title) {
 645              $this->doc .= '[' . $title . ']';
 646          }
 647          $this->_firstimage($src);
 648      }
 649  
 650      /**
 651       * Render the output of an RSS feed
 652       *
 653       * @param string $url URL of the feed
 654       * @param array $params Finetuning of the output
 655       */
 656      public function rss($url, $params)
 657      {
 658          $this->meta['relation']['haspart'][$url] = true;
 659  
 660          $this->meta['date']['valid']['age'] =
 661              isset($this->meta['date']['valid']['age']) ?
 662                  min($this->meta['date']['valid']['age'], $params['refresh']) :
 663                  $params['refresh'];
 664      }
 665  
 666      #region Utils
 667  
 668      /**
 669       * Removes any Namespace from the given name but keeps
 670       * casing and special chars
 671       *
 672       * @param string $name
 673       *
 674       * @return mixed|string
 675       * @author Andreas Gohr <andi@splitbrain.org>
 676       *
 677       */
 678      public function _simpleTitle($name)
 679      {
 680          global $conf;
 681  
 682          if (is_array($name)) {
 683              return '';
 684          }
 685  
 686          if ($conf['useslash']) {
 687              $nssep = '[:;/]';
 688          } else {
 689              $nssep = '[:;]';
 690          }
 691          $name = preg_replace('!.*' . $nssep . '!', '', $name);
 692          //if there is a hash we use the anchor name only
 693          $name = preg_replace('!.*#!', '', $name);
 694          return $name;
 695      }
 696  
 697      /**
 698       * Construct a title and handle images in titles
 699       *
 700       * @param string|array|null $title either string title or media array
 701       * @param string $default default title if nothing else is found
 702       * @param null|string $id linked page id (used to extract title from first heading)
 703       * @return string title text
 704       * @author Harry Fuecks <hfuecks@gmail.com>
 705       */
 706      public function _getLinkTitle($title, $default, $id = null)
 707      {
 708          if (is_array($title)) {
 709              if ($title['title']) {
 710                  return '[' . $title['title'] . ']';
 711              } else {
 712                  return $default;
 713              }
 714          } elseif (is_null($title) || trim($title) == '') {
 715              if (useHeading('content') && $id) {
 716                  $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
 717                  if ($heading) {
 718                      return $heading;
 719                  }
 720              }
 721              return $default;
 722          } else {
 723              return $title;
 724          }
 725      }
 726  
 727      /**
 728       * Remember first image
 729       *
 730       * @param string $src image URL or ID
 731       */
 732      protected function _firstimage($src)
 733      {
 734          global $ID;
 735  
 736          if ($this->firstimage) {
 737              return;
 738          }
 739  
 740          [$src] = explode('#', $src, 2);
 741          if (!media_isexternal($src)) {
 742              $src = (new MediaResolver($ID))->resolveId($src);
 743          }
 744          if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) {
 745              $this->firstimage = $src;
 746          }
 747      }
 748  
 749      /**
 750       * Store list of used media files in metadata
 751       *
 752       * @param string $src media ID
 753       */
 754      protected function _recordMediaUsage($src)
 755      {
 756          global $ID;
 757  
 758          [$src] = explode('#', $src, 2);
 759          if (media_isexternal($src)) {
 760              return;
 761          }
 762          $src = (new MediaResolver($ID))->resolveId($src);
 763          $file = mediaFN($src);
 764          $this->meta['relation']['media'][$src] = file_exists($file);
 765      }
 766  
 767      #endregion
 768  }
 769  
 770  //Setup VIM: ex: et ts=4 :