[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/vendor/simplepie/simplepie/library/SimplePie/ -> Sanitize.php (source)

   1  <?php
   2  /**
   3   * SimplePie
   4   *
   5   * A PHP-Based RSS and Atom Feed Framework.
   6   * Takes the hard work out of managing a complete RSS/Atom solution.
   7   *
   8   * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
   9   * All rights reserved.
  10   *
  11   * Redistribution and use in source and binary forms, with or without modification, are
  12   * permitted provided that the following conditions are met:
  13   *
  14   *     * Redistributions of source code must retain the above copyright notice, this list of
  15   *       conditions and the following disclaimer.
  16   *
  17   *     * Redistributions in binary form must reproduce the above copyright notice, this list
  18   *       of conditions and the following disclaimer in the documentation and/or other materials
  19   *       provided with the distribution.
  20   *
  21   *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22   *       to endorse or promote products derived from this software without specific prior
  23   *       written permission.
  24   *
  25   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26   * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28   * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33   * POSSIBILITY OF SUCH DAMAGE.
  34   *
  35   * @package SimplePie
  36   * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  37   * @author Ryan Parman
  38   * @author Geoffrey Sneddon
  39   * @author Ryan McCue
  40   * @link http://simplepie.org/ SimplePie
  41   * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42   */
  43  
  44  /**
  45   * Used for data cleanup and post-processing
  46   *
  47   *
  48   * This class can be overloaded with {@see SimplePie::set_sanitize_class()}
  49   *
  50   * @package SimplePie
  51   * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
  52   */
  53  class SimplePie_Sanitize
  54  {
  55      // Private vars
  56      var $base;
  57  
  58      // Options
  59      var $remove_div = true;
  60      var $image_handler = '';
  61      var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
  62      var $encode_instead_of_strip = false;
  63      var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
  64      var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
  65      var $strip_comments = false;
  66      var $output_encoding = 'UTF-8';
  67      var $enable_cache = true;
  68      var $cache_location = './cache';
  69      var $cache_name_function = 'md5';
  70      var $timeout = 10;
  71      var $useragent = '';
  72      var $force_fsockopen = false;
  73      var $replace_url_attributes = null;
  74  
  75  	public function __construct()
  76      {
  77          // Set defaults
  78          $this->set_url_replacements(null);
  79      }
  80  
  81  	public function remove_div($enable = true)
  82      {
  83          $this->remove_div = (bool) $enable;
  84      }
  85  
  86  	public function set_image_handler($page = false)
  87      {
  88          if ($page)
  89          {
  90              $this->image_handler = (string) $page;
  91          }
  92          else
  93          {
  94              $this->image_handler = false;
  95          }
  96      }
  97  
  98  	public function set_registry(SimplePie_Registry $registry)
  99      {
 100          $this->registry = $registry;
 101      }
 102  
 103  	public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
 104      {
 105          if (isset($enable_cache))
 106          {
 107              $this->enable_cache = (bool) $enable_cache;
 108          }
 109  
 110          if ($cache_location)
 111          {
 112              $this->cache_location = (string) $cache_location;
 113          }
 114  
 115          if ($cache_name_function)
 116          {
 117              $this->cache_name_function = (string) $cache_name_function;
 118          }
 119      }
 120  
 121  	public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
 122      {
 123          if ($timeout)
 124          {
 125              $this->timeout = (string) $timeout;
 126          }
 127  
 128          if ($useragent)
 129          {
 130              $this->useragent = (string) $useragent;
 131          }
 132  
 133          if ($force_fsockopen)
 134          {
 135              $this->force_fsockopen = (string) $force_fsockopen;
 136          }
 137      }
 138  
 139  	public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
 140      {
 141          if ($tags)
 142          {
 143              if (is_array($tags))
 144              {
 145                  $this->strip_htmltags = $tags;
 146              }
 147              else
 148              {
 149                  $this->strip_htmltags = explode(',', $tags);
 150              }
 151          }
 152          else
 153          {
 154              $this->strip_htmltags = false;
 155          }
 156      }
 157  
 158  	public function encode_instead_of_strip($encode = false)
 159      {
 160          $this->encode_instead_of_strip = (bool) $encode;
 161      }
 162  
 163  	public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
 164      {
 165          if ($attribs)
 166          {
 167              if (is_array($attribs))
 168              {
 169                  $this->strip_attributes = $attribs;
 170              }
 171              else
 172              {
 173                  $this->strip_attributes = explode(',', $attribs);
 174              }
 175          }
 176          else
 177          {
 178              $this->strip_attributes = false;
 179          }
 180      }
 181  
 182  	public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')))
 183      {
 184          if ($attribs)
 185          {
 186              if (is_array($attribs))
 187              {
 188                  $this->add_attributes = $attribs;
 189              }
 190              else
 191              {
 192                  $this->add_attributes = explode(',', $attribs);
 193              }
 194          }
 195          else
 196          {
 197              $this->add_attributes = false;
 198          }
 199      }
 200  
 201  	public function strip_comments($strip = false)
 202      {
 203          $this->strip_comments = (bool) $strip;
 204      }
 205  
 206  	public function set_output_encoding($encoding = 'UTF-8')
 207      {
 208          $this->output_encoding = (string) $encoding;
 209      }
 210  
 211      /**
 212       * Set element/attribute key/value pairs of HTML attributes
 213       * containing URLs that need to be resolved relative to the feed
 214       *
 215       * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
 216       * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
 217       * |q|@cite
 218       *
 219       * @since 1.0
 220       * @param array|null $element_attribute Element/attribute key/value pairs, null for default
 221       */
 222  	public function set_url_replacements($element_attribute = null)
 223      {
 224          if ($element_attribute === null)
 225          {
 226              $element_attribute = array(
 227                  'a' => 'href',
 228                  'area' => 'href',
 229                  'blockquote' => 'cite',
 230                  'del' => 'cite',
 231                  'form' => 'action',
 232                  'img' => array(
 233                      'longdesc',
 234                      'src'
 235                  ),
 236                  'input' => 'src',
 237                  'ins' => 'cite',
 238                  'q' => 'cite'
 239              );
 240          }
 241          $this->replace_url_attributes = (array) $element_attribute;
 242      }
 243  
 244  	public function sanitize($data, $type, $base = '')
 245      {
 246          $data = trim($data);
 247          if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
 248          {
 249              if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
 250              {
 251                  if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
 252                  {
 253                      $type |= SIMPLEPIE_CONSTRUCT_HTML;
 254                  }
 255                  else
 256                  {
 257                      $type |= SIMPLEPIE_CONSTRUCT_TEXT;
 258                  }
 259              }
 260  
 261              if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
 262              {
 263                  $data = base64_decode($data);
 264              }
 265  
 266              if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
 267              {
 268  
 269                  if (!class_exists('DOMDocument'))
 270                  {
 271                      throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
 272                  }
 273                  $document = new DOMDocument();
 274                  $document->encoding = 'UTF-8';
 275  
 276                  $data = $this->preprocess($data, $type);
 277  
 278                  set_error_handler(array('SimplePie_Misc', 'silence_errors'));
 279                  $document->loadHTML($data);
 280                  restore_error_handler();
 281  
 282                  $xpath = new DOMXPath($document);
 283  
 284                  // Strip comments
 285                  if ($this->strip_comments)
 286                  {
 287                      $comments = $xpath->query('//comment()');
 288  
 289                      foreach ($comments as $comment)
 290                      {
 291                          $comment->parentNode->removeChild($comment);
 292                      }
 293                  }
 294  
 295                  // Strip out HTML tags and attributes that might cause various security problems.
 296                  // Based on recommendations by Mark Pilgrim at:
 297                  // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
 298                  if ($this->strip_htmltags)
 299                  {
 300                      foreach ($this->strip_htmltags as $tag)
 301                      {
 302                          $this->strip_tag($tag, $document, $xpath, $type);
 303                      }
 304                  }
 305  
 306                  if ($this->strip_attributes)
 307                  {
 308                      foreach ($this->strip_attributes as $attrib)
 309                      {
 310                          $this->strip_attr($attrib, $xpath);
 311                      }
 312                  }
 313  
 314                  if ($this->add_attributes)
 315                  {
 316                      foreach ($this->add_attributes as $tag => $valuePairs)
 317                      {
 318                          $this->add_attr($tag, $valuePairs, $document);
 319                      }
 320                  }
 321  
 322                  // Replace relative URLs
 323                  $this->base = $base;
 324                  foreach ($this->replace_url_attributes as $element => $attributes)
 325                  {
 326                      $this->replace_urls($document, $element, $attributes);
 327                  }
 328  
 329                  // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
 330                  if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
 331                  {
 332                      $images = $document->getElementsByTagName('img');
 333                      foreach ($images as $img)
 334                      {
 335                          if ($img->hasAttribute('src'))
 336                          {
 337                              $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
 338                              $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
 339  
 340                              if ($cache->load())
 341                              {
 342                                  $img->setAttribute('src', $this->image_handler . $image_url);
 343                              }
 344                              else
 345                              {
 346                                  $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
 347                                  $headers = $file->headers;
 348  
 349                                  if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
 350                                  {
 351                                      if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
 352                                      {
 353                                          $img->setAttribute('src', $this->image_handler . $image_url);
 354                                      }
 355                                      else
 356                                      {
 357                                          trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
 358                                      }
 359                                  }
 360                              }
 361                          }
 362                      }
 363                  }
 364  
 365                  // Get content node
 366                  $div = $document->getElementsByTagName('body')->item(0)->firstChild;
 367                  // Finally, convert to a HTML string
 368                  if (version_compare(PHP_VERSION, '5.3.6', '>='))
 369                  {
 370                      $data = trim($document->saveHTML($div));
 371                  }
 372                  else
 373                  {
 374                      $data = trim($document->saveXML($div));
 375                  }
 376  
 377                  if ($this->remove_div)
 378                  {
 379                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
 380                      $data = preg_replace('/<\/div>$/', '', $data);
 381                  }
 382                  else
 383                  {
 384                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
 385                  }
 386              }
 387  
 388              if ($type & SIMPLEPIE_CONSTRUCT_IRI)
 389              {
 390                  $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
 391                  if ($absolute !== false)
 392                  {
 393                      $data = $absolute;
 394                  }
 395              }
 396  
 397              if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
 398              {
 399                  $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
 400              }
 401  
 402              if ($this->output_encoding !== 'UTF-8')
 403              {
 404                  $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
 405              }
 406          }
 407          return $data;
 408      }
 409  
 410  	protected function preprocess($html, $type)
 411      {
 412          $ret = '';
 413          $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
 414          if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
 415          {
 416              // Atom XHTML constructs are wrapped with a div by default
 417              // Note: No protection if $html contains a stray </div>!
 418              $html = '<div>' . $html . '</div>';
 419              $ret .= '<!DOCTYPE html>';
 420              $content_type = 'text/html';
 421          }
 422          else
 423          {
 424              $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
 425              $content_type = 'application/xhtml+xml';
 426          }
 427  
 428          $ret .= '<html><head>';
 429          $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
 430          $ret .= '</head><body>' . $html . '</body></html>';
 431          return $ret;
 432      }
 433  
 434  	public function replace_urls($document, $tag, $attributes)
 435      {
 436          if (!is_array($attributes))
 437          {
 438              $attributes = array($attributes);
 439          }
 440  
 441          if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
 442          {
 443              $elements = $document->getElementsByTagName($tag);
 444              foreach ($elements as $element)
 445              {
 446                  foreach ($attributes as $attribute)
 447                  {
 448                      if ($element->hasAttribute($attribute))
 449                      {
 450                          $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
 451                          if ($value !== false)
 452                          {
 453                              $element->setAttribute($attribute, $value);
 454                          }
 455                      }
 456                  }
 457              }
 458          }
 459      }
 460  
 461  	public function do_strip_htmltags($match)
 462      {
 463          if ($this->encode_instead_of_strip)
 464          {
 465              if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 466              {
 467                  $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
 468                  $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
 469                  return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
 470              }
 471              else
 472              {
 473                  return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
 474              }
 475          }
 476          elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 477          {
 478              return $match[4];
 479          }
 480          else
 481          {
 482              return '';
 483          }
 484      }
 485  
 486  	protected function strip_tag($tag, $document, $xpath, $type)
 487      {
 488          $elements = $xpath->query('body//' . $tag);
 489          if ($this->encode_instead_of_strip)
 490          {
 491              foreach ($elements as $element)
 492              {
 493                  $fragment = $document->createDocumentFragment();
 494  
 495                  // For elements which aren't script or style, include the tag itself
 496                  if (!in_array($tag, array('script', 'style')))
 497                  {
 498                      $text = '<' . $tag;
 499                      if ($element->hasAttributes())
 500                      {
 501                          $attrs = array();
 502                          foreach ($element->attributes as $name => $attr)
 503                          {
 504                              $value = $attr->value;
 505  
 506                              // In XHTML, empty values should never exist, so we repeat the value
 507                              if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
 508                              {
 509                                  $value = $name;
 510                              }
 511                              // For HTML, empty is fine
 512                              elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
 513                              {
 514                                  $attrs[] = $name;
 515                                  continue;
 516                              }
 517  
 518                              // Standard attribute text
 519                              $attrs[] = $name . '="' . $attr->value . '"';
 520                          }
 521                          $text .= ' ' . implode(' ', $attrs);
 522                      }
 523                      $text .= '>';
 524                      $fragment->appendChild(new DOMText($text));
 525                  }
 526  
 527                  $number = $element->childNodes->length;
 528                  for ($i = $number; $i > 0; $i--)
 529                  {
 530                      $child = $element->childNodes->item(0);
 531                      $fragment->appendChild($child);
 532                  }
 533  
 534                  if (!in_array($tag, array('script', 'style')))
 535                  {
 536                      $fragment->appendChild(new DOMText('</' . $tag . '>'));
 537                  }
 538  
 539                  $element->parentNode->replaceChild($fragment, $element);
 540              }
 541  
 542              return;
 543          }
 544          elseif (in_array($tag, array('script', 'style')))
 545          {
 546              foreach ($elements as $element)
 547              {
 548                  $element->parentNode->removeChild($element);
 549              }
 550  
 551              return;
 552          }
 553          else
 554          {
 555              foreach ($elements as $element)
 556              {
 557                  $fragment = $document->createDocumentFragment();
 558                  $number = $element->childNodes->length;
 559                  for ($i = $number; $i > 0; $i--)
 560                  {
 561                      $child = $element->childNodes->item(0);
 562                      $fragment->appendChild($child);
 563                  }
 564  
 565                  $element->parentNode->replaceChild($fragment, $element);
 566              }
 567          }
 568      }
 569  
 570  	protected function strip_attr($attrib, $xpath)
 571      {
 572          $elements = $xpath->query('//*[@' . $attrib . ']');
 573  
 574          foreach ($elements as $element)
 575          {
 576              $element->removeAttribute($attrib);
 577          }
 578      }
 579  
 580      protected function add_attr($tag, $valuePairs, $document)
 581      {
 582          $elements = $document->getElementsByTagName($tag);
 583          foreach ($elements as $element)
 584          {
 585              foreach ($valuePairs as $attrib => $value)
 586              {
 587                  $element->setAttribute($attrib, $value);
 588              }
 589          }
 590      }
 591  }