[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/vendor/simplepie/simplepie/src/HTTP/ -> Parser.php (source)

   1  <?php
   2  
   3  declare(strict_types=1);
   4  /**
   5   * SimplePie
   6   *
   7   * A PHP-Based RSS and Atom Feed Framework.
   8   * Takes the hard work out of managing a complete RSS/Atom solution.
   9   *
  10   * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
  11   * All rights reserved.
  12   *
  13   * Redistribution and use in source and binary forms, with or without modification, are
  14   * permitted provided that the following conditions are met:
  15   *
  16   *     * Redistributions of source code must retain the above copyright notice, this list of
  17   *       conditions and the following disclaimer.
  18   *
  19   *     * Redistributions in binary form must reproduce the above copyright notice, this list
  20   *       of conditions and the following disclaimer in the documentation and/or other materials
  21   *       provided with the distribution.
  22   *
  23   *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  24   *       to endorse or promote products derived from this software without specific prior
  25   *       written permission.
  26   *
  27   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  28   * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  29   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  30   * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  34   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35   * POSSIBILITY OF SUCH DAMAGE.
  36   *
  37   * @package SimplePie
  38   * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
  39   * @author Ryan Parman
  40   * @author Sam Sneddon
  41   * @author Ryan McCue
  42   * @link http://simplepie.org/ SimplePie
  43   * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  44   */
  45  
  46  namespace SimplePie\HTTP;
  47  
  48  /**
  49   * HTTP Response Parser
  50   *
  51   * @package SimplePie
  52   * @subpackage HTTP
  53   */
  54  class Parser
  55  {
  56      /**
  57       * HTTP Version
  58       *
  59       * @var float
  60       */
  61      public $http_version = 0.0;
  62  
  63      /**
  64       * Status code
  65       *
  66       * @var int
  67       */
  68      public $status_code = 0;
  69  
  70      /**
  71       * Reason phrase
  72       *
  73       * @var string
  74       */
  75      public $reason = '';
  76  
  77      /**
  78       * Key/value pairs of the headers
  79       *
  80       * @var array
  81       */
  82      public $headers = [];
  83  
  84      /**
  85       * Body of the response
  86       *
  87       * @var string
  88       */
  89      public $body = '';
  90  
  91      private const STATE_HTTP_VERSION = 'http_version';
  92  
  93      private const STATE_STATUS = 'status';
  94  
  95      private const STATE_REASON = 'reason';
  96  
  97      private const STATE_NEW_LINE = 'new_line';
  98  
  99      private const STATE_BODY = 'body';
 100  
 101      private const STATE_NAME = 'name';
 102  
 103      private const STATE_VALUE = 'value';
 104  
 105      private const STATE_VALUE_CHAR = 'value_char';
 106  
 107      private const STATE_QUOTE = 'quote';
 108  
 109      private const STATE_QUOTE_ESCAPED = 'quote_escaped';
 110  
 111      private const STATE_QUOTE_CHAR = 'quote_char';
 112  
 113      private const STATE_CHUNKED = 'chunked';
 114  
 115      private const STATE_EMIT = 'emit';
 116  
 117      private const STATE_ERROR = false;
 118  
 119      /**
 120       * Current state of the state machine
 121       *
 122       * @var self::STATE_*
 123       */
 124      protected $state = self::STATE_HTTP_VERSION;
 125  
 126      /**
 127       * Input data
 128       *
 129       * @var string
 130       */
 131      protected $data = '';
 132  
 133      /**
 134       * Input data length (to avoid calling strlen() everytime this is needed)
 135       *
 136       * @var int
 137       */
 138      protected $data_length = 0;
 139  
 140      /**
 141       * Current position of the pointer
 142       *
 143       * @var int
 144       */
 145      protected $position = 0;
 146  
 147      /**
 148       * Name of the hedaer currently being parsed
 149       *
 150       * @var string
 151       */
 152      protected $name = '';
 153  
 154      /**
 155       * Value of the hedaer currently being parsed
 156       *
 157       * @var string
 158       */
 159      protected $value = '';
 160  
 161      /**
 162       * Create an instance of the class with the input data
 163       *
 164       * @param string $data Input data
 165       */
 166      public function __construct($data)
 167      {
 168          $this->data = $data;
 169          $this->data_length = strlen($this->data);
 170      }
 171  
 172      /**
 173       * Parse the input data
 174       *
 175       * @return bool true on success, false on failure
 176       */
 177      public function parse()
 178      {
 179          while ($this->state && $this->state !== self::STATE_EMIT && $this->has_data()) {
 180              $state = $this->state;
 181              $this->$state();
 182          }
 183          $this->data = '';
 184          if ($this->state === self::STATE_EMIT || $this->state === self::STATE_BODY) {
 185              return true;
 186          }
 187  
 188          $this->http_version = '';
 189          $this->status_code = 0;
 190          $this->reason = '';
 191          $this->headers = [];
 192          $this->body = '';
 193          return false;
 194      }
 195  
 196      /**
 197       * Check whether there is data beyond the pointer
 198       *
 199       * @return bool true if there is further data, false if not
 200       */
 201      protected function has_data()
 202      {
 203          return (bool) ($this->position < $this->data_length);
 204      }
 205  
 206      /**
 207       * See if the next character is LWS
 208       *
 209       * @return bool true if the next character is LWS, false if not
 210       */
 211      protected function is_linear_whitespace()
 212      {
 213          return (bool) ($this->data[$this->position] === "\x09"
 214              || $this->data[$this->position] === "\x20"
 215              || ($this->data[$this->position] === "\x0A"
 216                  && isset($this->data[$this->position + 1])
 217                  && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
 218      }
 219  
 220      /**
 221       * Parse the HTTP version
 222       */
 223      protected function http_version()
 224      {
 225          if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') {
 226              $len = strspn($this->data, '0123456789.', 5);
 227              $this->http_version = substr($this->data, 5, $len);
 228              $this->position += 5 + $len;
 229              if (substr_count($this->http_version, '.') <= 1) {
 230                  $this->http_version = (float) $this->http_version;
 231                  $this->position += strspn($this->data, "\x09\x20", $this->position);
 232                  $this->state = self::STATE_STATUS;
 233              } else {
 234                  $this->state = self::STATE_ERROR;
 235              }
 236          } else {
 237              $this->state = self::STATE_ERROR;
 238          }
 239      }
 240  
 241      /**
 242       * Parse the status code
 243       */
 244      protected function status()
 245      {
 246          if ($len = strspn($this->data, '0123456789', $this->position)) {
 247              $this->status_code = (int) substr($this->data, $this->position, $len);
 248              $this->position += $len;
 249              $this->state = self::STATE_REASON;
 250          } else {
 251              $this->state = self::STATE_ERROR;
 252          }
 253      }
 254  
 255      /**
 256       * Parse the reason phrase
 257       */
 258      protected function reason()
 259      {
 260          $len = strcspn($this->data, "\x0A", $this->position);
 261          $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
 262          $this->position += $len + 1;
 263          $this->state = self::STATE_NEW_LINE;
 264      }
 265  
 266      /**
 267       * Deal with a new line, shifting data around as needed
 268       */
 269      protected function new_line()
 270      {
 271          $this->value = trim($this->value, "\x0D\x20");
 272          if ($this->name !== '' && $this->value !== '') {
 273              $this->name = strtolower($this->name);
 274              // We should only use the last Content-Type header. c.f. issue #1
 275              if (isset($this->headers[$this->name]) && $this->name !== 'content-type') {
 276                  $this->headers[$this->name] .= ', ' . $this->value;
 277              } else {
 278                  $this->headers[$this->name] = $this->value;
 279              }
 280          }
 281          $this->name = '';
 282          $this->value = '';
 283          if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") {
 284              $this->position += 2;
 285              $this->state = self::STATE_BODY;
 286          } elseif ($this->data[$this->position] === "\x0A") {
 287              $this->position++;
 288              $this->state = self::STATE_BODY;
 289          } else {
 290              $this->state = self::STATE_NAME;
 291          }
 292      }
 293  
 294      /**
 295       * Parse a header name
 296       */
 297      protected function name()
 298      {
 299          $len = strcspn($this->data, "\x0A:", $this->position);
 300          if (isset($this->data[$this->position + $len])) {
 301              if ($this->data[$this->position + $len] === "\x0A") {
 302                  $this->position += $len;
 303                  $this->state = self::STATE_NEW_LINE;
 304              } else {
 305                  $this->name = substr($this->data, $this->position, $len);
 306                  $this->position += $len + 1;
 307                  $this->state = self::STATE_VALUE;
 308              }
 309          } else {
 310              $this->state = self::STATE_ERROR;
 311          }
 312      }
 313  
 314      /**
 315       * Parse LWS, replacing consecutive LWS characters with a single space
 316       */
 317      protected function linear_whitespace()
 318      {
 319          do {
 320              if (substr($this->data, $this->position, 2) === "\x0D\x0A") {
 321                  $this->position += 2;
 322              } elseif ($this->data[$this->position] === "\x0A") {
 323                  $this->position++;
 324              }
 325              $this->position += strspn($this->data, "\x09\x20", $this->position);
 326          } while ($this->has_data() && $this->is_linear_whitespace());
 327          $this->value .= "\x20";
 328      }
 329  
 330      /**
 331       * See what state to move to while within non-quoted header values
 332       */
 333      protected function value()
 334      {
 335          if ($this->is_linear_whitespace()) {
 336              $this->linear_whitespace();
 337          } else {
 338              switch ($this->data[$this->position]) {
 339                  case '"':
 340                      // Workaround for ETags: we have to include the quotes as
 341                      // part of the tag.
 342                      if (strtolower($this->name) === 'etag') {
 343                          $this->value .= '"';
 344                          $this->position++;
 345                          $this->state = self::STATE_VALUE_CHAR;
 346                          break;
 347                      }
 348                      $this->position++;
 349                      $this->state = self::STATE_QUOTE;
 350                      break;
 351  
 352                  case "\x0A":
 353                      $this->position++;
 354                      $this->state = self::STATE_NEW_LINE;
 355                      break;
 356  
 357                  default:
 358                      $this->state = self::STATE_VALUE_CHAR;
 359                      break;
 360              }
 361          }
 362      }
 363  
 364      /**
 365       * Parse a header value while outside quotes
 366       */
 367      protected function value_char()
 368      {
 369          $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
 370          $this->value .= substr($this->data, $this->position, $len);
 371          $this->position += $len;
 372          $this->state = self::STATE_VALUE;
 373      }
 374  
 375      /**
 376       * See what state to move to while within quoted header values
 377       */
 378      protected function quote()
 379      {
 380          if ($this->is_linear_whitespace()) {
 381              $this->linear_whitespace();
 382          } else {
 383              switch ($this->data[$this->position]) {
 384                  case '"':
 385                      $this->position++;
 386                      $this->state = self::STATE_VALUE;
 387                      break;
 388  
 389                  case "\x0A":
 390                      $this->position++;
 391                      $this->state = self::STATE_NEW_LINE;
 392                      break;
 393  
 394                  case '\\':
 395                      $this->position++;
 396                      $this->state = self::STATE_QUOTE_ESCAPED;
 397                      break;
 398  
 399                  default:
 400                      $this->state = self::STATE_QUOTE_CHAR;
 401                      break;
 402              }
 403          }
 404      }
 405  
 406      /**
 407       * Parse a header value while within quotes
 408       */
 409      protected function quote_char()
 410      {
 411          $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
 412          $this->value .= substr($this->data, $this->position, $len);
 413          $this->position += $len;
 414          $this->state = self::STATE_VALUE;
 415      }
 416  
 417      /**
 418       * Parse an escaped character within quotes
 419       */
 420      protected function quote_escaped()
 421      {
 422          $this->value .= $this->data[$this->position];
 423          $this->position++;
 424          $this->state = self::STATE_QUOTE;
 425      }
 426  
 427      /**
 428       * Parse the body
 429       */
 430      protected function body()
 431      {
 432          $this->body = substr($this->data, $this->position);
 433          if (!empty($this->headers['transfer-encoding'])) {
 434              unset($this->headers['transfer-encoding']);
 435              $this->state = self::STATE_CHUNKED;
 436          } else {
 437              $this->state = self::STATE_EMIT;
 438          }
 439      }
 440  
 441      /**
 442       * Parsed a "Transfer-Encoding: chunked" body
 443       */
 444      protected function chunked()
 445      {
 446          if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) {
 447              $this->state = self::STATE_EMIT;
 448              return;
 449          }
 450  
 451          $decoded = '';
 452          $encoded = $this->body;
 453  
 454          while (true) {
 455              $is_chunked = (bool) preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches);
 456              if (!$is_chunked) {
 457                  // Looks like it's not chunked after all
 458                  $this->state = self::STATE_EMIT;
 459                  return;
 460              }
 461  
 462              $length = hexdec(trim($matches[1]));
 463              if ($length === 0) {
 464                  // Ignore trailer headers
 465                  $this->state = self::STATE_EMIT;
 466                  $this->body = $decoded;
 467                  return;
 468              }
 469  
 470              $chunk_length = strlen($matches[0]);
 471              $decoded .= substr($encoded, $chunk_length, $length);
 472              $encoded = substr($encoded, $chunk_length + $length + 2);
 473  
 474              // BC for PHP < 8.0: substr() can return bool instead of string
 475              $encoded = ($encoded === false) ? '' : $encoded;
 476  
 477              if (trim($encoded) === '0' || empty($encoded)) {
 478                  $this->state = self::STATE_EMIT;
 479                  $this->body = $decoded;
 480                  return;
 481              }
 482          }
 483      }
 484  
 485      /**
 486       * Prepare headers (take care of proxies headers)
 487       *
 488       * @param string  $headers Raw headers
 489       * @param integer $count   Redirection count. Default to 1.
 490       *
 491       * @return string
 492       */
 493      public static function prepareHeaders($headers, $count = 1)
 494      {
 495          $data = explode("\r\n\r\n", $headers, $count);
 496          $data = array_pop($data);
 497          if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) {
 498              $exploded = explode("\r\n\r\n", $data, 2);
 499              $data = end($exploded);
 500          }
 501          if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) {
 502              $exploded = explode("\r\n\r\n", $data, 2);
 503              $data = end($exploded);
 504          }
 505          return $data;
 506      }
 507  }
 508  
 509  class_alias('SimplePie\HTTP\Parser', 'SimplePie_HTTP_Parser');