[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/vendor/geshi/geshi/src/ -> geshi.php (source)

   1  <?php
   2  /**
   3   * GeSHi - Generic Syntax Highlighter
   4   *
   5   * The GeSHi class for Generic Syntax Highlighting. Please refer to the
   6   * documentation at http://qbnz.com/highlighter/documentation.php for more
   7   * information about how to use this class.
   8   *
   9   * For changes, release notes, TODOs etc, see the relevant files in the docs/
  10   * directory.
  11   *
  12   *   This file is part of GeSHi.
  13   *
  14   *  GeSHi is free software; you can redistribute it and/or modify
  15   *  it under the terms of the GNU General Public License as published by
  16   *  the Free Software Foundation; either version 2 of the License, or
  17   *  (at your option) any later version.
  18   *
  19   *  GeSHi is distributed in the hope that it will be useful,
  20   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  21   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22   *  GNU General Public License for more details.
  23   *
  24   *  You should have received a copy of the GNU General Public License
  25   *  along with GeSHi; if not, write to the Free Software
  26   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27   *
  28   * @package    geshi
  29   * @subpackage core
  30   * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
  31   * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
  32   * @license    http://gnu.org/copyleft/gpl.html GNU GPL
  33   */
  34  
  35  //
  36  // GeSHi Constants
  37  // You should use these constant names in your programs instead of
  38  // their values - you never know when a value may change in a future
  39  // version
  40  //
  41  
  42  /** The version of this GeSHi file */
  43  define('GESHI_VERSION', '1.0.9.1');
  44  
  45  // Define the root directory for the GeSHi code tree
  46  if (!defined('GESHI_ROOT')) {
  47      /** The root directory for GeSHi */
  48      define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
  49  }
  50  /** The language file directory for GeSHi
  51      @access private */
  52  define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
  53  
  54  // Define if GeSHi should be paranoid about security
  55  if (!defined('GESHI_SECURITY_PARANOID')) {
  56      /** Tells GeSHi to be paranoid about security settings */
  57      define('GESHI_SECURITY_PARANOID', false);
  58  }
  59  
  60  // Line numbers - use with enable_line_numbers()
  61  /** Use no line numbers when building the result */
  62  define('GESHI_NO_LINE_NUMBERS', 0);
  63  /** Use normal line numbers when building the result */
  64  define('GESHI_NORMAL_LINE_NUMBERS', 1);
  65  /** Use fancy line numbers when building the result */
  66  define('GESHI_FANCY_LINE_NUMBERS', 2);
  67  
  68  // Container HTML type
  69  /** Use nothing to surround the source */
  70  define('GESHI_HEADER_NONE', 0);
  71  /** Use a "div" to surround the source */
  72  define('GESHI_HEADER_DIV', 1);
  73  /** Use a "pre" to surround the source */
  74  define('GESHI_HEADER_PRE', 2);
  75  /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
  76  define('GESHI_HEADER_PRE_VALID', 3);
  77  /**
  78   * Use a "table" to surround the source:
  79   *
  80   *  <table>
  81   *    <thead><tr><td colspan="2">$header</td></tr></thead>
  82   *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
  83   *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
  84   *  </table>
  85   *
  86   * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
  87   * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
  88   * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
  89   */
  90  define('GESHI_HEADER_PRE_TABLE', 4);
  91  
  92  // Capatalisation constants
  93  /** Lowercase keywords found */
  94  define('GESHI_CAPS_NO_CHANGE', 0);
  95  /** Uppercase keywords found */
  96  define('GESHI_CAPS_UPPER', 1);
  97  /** Leave keywords found as the case that they are */
  98  define('GESHI_CAPS_LOWER', 2);
  99  
 100  // Link style constants
 101  /** Links in the source in the :link state */
 102  define('GESHI_LINK', 0);
 103  /** Links in the source in the :hover state */
 104  define('GESHI_HOVER', 1);
 105  /** Links in the source in the :active state */
 106  define('GESHI_ACTIVE', 2);
 107  /** Links in the source in the :visited state */
 108  define('GESHI_VISITED', 3);
 109  
 110  // Important string starter/finisher
 111  // Note that if you change these, they should be as-is: i.e., don't
 112  // write them as if they had been run through htmlentities()
 113  /** The starter for important parts of the source */
 114  define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
 115  /** The ender for important parts of the source */
 116  define('GESHI_END_IMPORTANT', '<END GeSHi>');
 117  
 118  /**#@+
 119   *  @access private
 120   */
 121  // When strict mode applies for a language
 122  /** Strict mode never applies (this is the most common) */
 123  define('GESHI_NEVER', 0);
 124  /** Strict mode *might* apply, and can be enabled or
 125      disabled by {@link GeSHi->enable_strict_mode()} */
 126  define('GESHI_MAYBE', 1);
 127  /** Strict mode always applies */
 128  define('GESHI_ALWAYS', 2);
 129  
 130  // Advanced regexp handling constants, used in language files
 131  /** The key of the regex array defining what to search for */
 132  define('GESHI_SEARCH', 0);
 133  /** The key of the regex array defining what bracket group in a
 134      matched search to use as a replacement */
 135  define('GESHI_REPLACE', 1);
 136  /** The key of the regex array defining any modifiers to the regular expression */
 137  define('GESHI_MODIFIERS', 2);
 138  /** The key of the regex array defining what bracket group in a
 139      matched search to put before the replacement */
 140  define('GESHI_BEFORE', 3);
 141  /** The key of the regex array defining what bracket group in a
 142      matched search to put after the replacement */
 143  define('GESHI_AFTER', 4);
 144  /** The key of the regex array defining a custom keyword to use
 145      for this regexp's html tag class */
 146  define('GESHI_CLASS', 5);
 147  
 148  /** Used in language files to mark comments */
 149  define('GESHI_COMMENTS', 0);
 150  
 151  /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
 152      regular expressions. Set this to false if your PCRE lib is up to date
 153      @see GeSHi->optimize_regexp_list()
 154      **/
 155  define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
 156  /** it's also important not to generate too long regular expressions
 157      be generous here... but keep in mind, that when reaching this limit we
 158      still have to close open patterns. 12k should do just fine on a 16k limit.
 159      @see GeSHi->optimize_regexp_list()
 160      **/
 161  define('GESHI_MAX_PCRE_LENGTH', 12288);
 162  
 163  //Number format specification
 164  /** Basic number format for integers */
 165  define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
 166  /** Enhanced number format for integers like seen in C */
 167  define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
 168  /** Number format to highlight binary numbers with a suffix "b" */
 169  define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
 170  /** Number format to highlight binary numbers with a prefix % */
 171  define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
 172  /** Number format to highlight binary numbers with a prefix 0b (C) */
 173  define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
 174  /** Number format to highlight octal numbers with a leading zero */
 175  define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
 176  /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
 177  define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
 178  /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
 179  define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
 180  /** Number format to highlight octal numbers with a suffix of o */
 181  define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
 182  /** Number format to highlight hex numbers with a prefix 0x */
 183  define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
 184  /** Number format to highlight hex numbers with a prefix $ */
 185  define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
 186  /** Number format to highlight hex numbers with a suffix of h */
 187  define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
 188  /** Number format to highlight floating-point numbers without support for scientific notation */
 189  define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
 190  /** Number format to highlight floating-point numbers without support for scientific notation */
 191  define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
 192  /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
 193  define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
 194  /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
 195  define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
 196  //Custom formats are passed by RX array
 197  
 198  // Error detection - use these to analyse faults
 199  /** No sourcecode to highlight was specified
 200   * @deprecated
 201   */
 202  define('GESHI_ERROR_NO_INPUT', 1);
 203  /** The language specified does not exist */
 204  define('GESHI_ERROR_NO_SUCH_LANG', 2);
 205  /** GeSHi could not open a file for reading (generally a language file) */
 206  define('GESHI_ERROR_FILE_NOT_READABLE', 3);
 207  /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
 208  define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
 209  /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
 210  define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
 211  /**#@-*/
 212  
 213  
 214  /**
 215   * The GeSHi Class.
 216   *
 217   * Please refer to the documentation for GeSHi 1.0.X that is available
 218   * at http://qbnz.com/highlighter/documentation.php for more information
 219   * about how to use this class.
 220   *
 221   * @package   geshi
 222   * @author    Nigel McNie <nigel@geshi.org>
 223   * @author    Benny Baumann <BenBE@omorphia.de>
 224   * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
 225   */
 226  class GeSHi {
 227  
 228      /**
 229       * The source code to highlight
 230       * @var string
 231       */
 232      protected $source = '';
 233  
 234      /**
 235       * The language to use when highlighting
 236       * @var string
 237       */
 238      protected $language = '';
 239  
 240      /**
 241       * The data for the language used
 242       * @var array
 243       */
 244      protected $language_data = array();
 245  
 246      /**
 247       * The path to the language files
 248       * @var string
 249       */
 250      protected $language_path = GESHI_LANG_ROOT;
 251  
 252      /**
 253       * The error message associated with an error
 254       * @var string
 255       * @todo check err reporting works
 256       */
 257      protected $error = false;
 258  
 259      /**
 260       * Possible error messages
 261       * @var array
 262       */
 263      protected $error_messages = array(
 264          GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
 265          GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
 266          GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
 267          GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
 268      );
 269  
 270      /**
 271       * Whether highlighting is strict or not
 272       * @var boolean
 273       */
 274      protected $strict_mode = false;
 275  
 276      /**
 277       * Whether to use CSS classes in output
 278       * @var boolean
 279       */
 280      protected $use_classes = false;
 281  
 282      /**
 283       * The type of header to use. Can be one of the following
 284       * values:
 285       *
 286       * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
 287       * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
 288       * - GESHI_HEADER_NONE: No header is outputted.
 289       *
 290       * @var int
 291       */
 292      protected $header_type = GESHI_HEADER_PRE;
 293  
 294      /**
 295       * Array of permissions for which lexics should be highlighted
 296       * @var array
 297       */
 298      protected $lexic_permissions = array(
 299          'KEYWORDS' =>    array(),
 300          'COMMENTS' =>    array('MULTI' => true),
 301          'REGEXPS' =>     array(),
 302          'ESCAPE_CHAR' => true,
 303          'BRACKETS' =>    true,
 304          'SYMBOLS' =>     false,
 305          'STRINGS' =>     true,
 306          'NUMBERS' =>     true,
 307          'METHODS' =>     true,
 308          'SCRIPT' =>      true
 309      );
 310  
 311      /**
 312       * The time it took to parse the code
 313       * @var double
 314       */
 315      protected $time = 0;
 316  
 317      /**
 318       * The content of the header block
 319       * @var string
 320       */
 321      protected $header_content = '';
 322  
 323      /**
 324       * The content of the footer block
 325       * @var string
 326       */
 327      protected $footer_content = '';
 328  
 329      /**
 330       * The style of the header block
 331       * @var string
 332       */
 333      protected $header_content_style = '';
 334  
 335      /**
 336       * The style of the footer block
 337       * @var string
 338       */
 339      protected $footer_content_style = '';
 340  
 341      /**
 342       * Tells if a block around the highlighted source should be forced
 343       * if not using line numbering
 344       * @var boolean
 345       */
 346      protected $force_code_block = false;
 347  
 348      /**
 349       * The styles for hyperlinks in the code
 350       * @var array
 351       */
 352      protected $link_styles = array();
 353  
 354      /**
 355       * Whether important blocks should be recognised or not
 356       * @var boolean
 357       * @deprecated
 358       * @todo REMOVE THIS FUNCTIONALITY!
 359       */
 360      protected $enable_important_blocks = false;
 361  
 362      /**
 363       * Styles for important parts of the code
 364       * @var string
 365       * @deprecated
 366       * @todo As above - rethink the whole idea of important blocks as it is buggy and
 367       * will be hard to implement in 1.2
 368       */
 369      protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
 370  
 371      /**
 372       * Whether CSS IDs should be added to the code
 373       * @var boolean
 374       */
 375      protected $add_ids = false;
 376  
 377      /**
 378       * Lines that should be highlighted extra
 379       * @var array
 380       */
 381      protected $highlight_extra_lines = array();
 382  
 383      /**
 384       * Styles of lines that should be highlighted extra
 385       * @var array
 386       */
 387      protected $highlight_extra_lines_styles = array();
 388  
 389      /**
 390       * Styles of extra-highlighted lines
 391       * @var string
 392       */
 393      protected $highlight_extra_lines_style = 'background-color: #ffc;';
 394  
 395      /**
 396       * The line ending
 397       * If null, nl2br() will be used on the result string.
 398       * Otherwise, all instances of \n will be replaced with $line_ending
 399       * @var string
 400       */
 401      protected $line_ending = null;
 402  
 403      /**
 404       * Number at which line numbers should start at
 405       * @var int
 406       */
 407      protected $line_numbers_start = 1;
 408  
 409      /**
 410       * The overall style for this code block
 411       * @var string
 412       */
 413      protected $overall_style = 'font-family:monospace;';
 414  
 415      /**
 416       *  The style for the actual code
 417       * @var string
 418       */
 419      protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 420  
 421      /**
 422       * The overall class for this code block
 423       * @var string
 424       */
 425      protected $overall_class = '';
 426  
 427      /**
 428       * The overall ID for this code block
 429       * @var string
 430       */
 431      protected $overall_id = '';
 432  
 433      /**
 434       * Line number styles
 435       * @var string
 436       */
 437      protected $line_style1 = 'font-weight: normal; vertical-align:top;';
 438  
 439      /**
 440       * Line number styles for fancy lines
 441       * @var string
 442       */
 443      protected $line_style2 = 'font-weight: bold; vertical-align:top;';
 444  
 445      /**
 446       * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
 447       * @var string
 448       */
 449      protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 450  
 451      /**
 452       * Flag for how line numbers are displayed
 453       * @var boolean
 454       */
 455      protected $line_numbers = GESHI_NO_LINE_NUMBERS;
 456  
 457      /**
 458       * Flag to decide if multi line spans are allowed. Set it to false to make sure
 459       * each tag is closed before and reopened after each linefeed.
 460       * @var boolean
 461       */
 462      protected $allow_multiline_span = true;
 463  
 464      /**
 465       * The "nth" value for fancy line highlighting
 466       * @var int
 467       */
 468      protected $line_nth_row = 0;
 469  
 470      /**
 471       * The size of tab stops
 472       * @var int
 473       */
 474      protected $tab_width = 8;
 475  
 476      /**
 477       * Should we use language-defined tab stop widths?
 478       * @var int
 479       */
 480      protected $use_language_tab_width = false;
 481  
 482      /**
 483       * Default target for keyword links
 484       * @var string
 485       */
 486      protected $link_target = '';
 487  
 488      /**
 489       * The encoding to use for entity encoding
 490       * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
 491       * @var string
 492       */
 493      protected $encoding = 'utf-8';
 494  
 495      /**
 496       * Should keywords be linked?
 497       * @var boolean
 498       */
 499      protected $keyword_links = true;
 500  
 501      /**
 502       * Currently loaded language file
 503       * @var    string
 504       * @since 1.0.7.22
 505       */
 506      protected $loaded_language = '';
 507  
 508      /**
 509       * Wether the caches needed for parsing are built or not
 510       *
 511       * @var   bool
 512       * @since 1.0.8
 513       */
 514      protected $parse_cache_built = false;
 515  
 516      /**
 517       * Work around for Suhosin Patch with disabled /e modifier
 518       *
 519       * Note from suhosins author in config file:
 520       * <blockquote>
 521       *   The /e modifier inside <code>preg_replace()</code> allows code execution.
 522       *   Often it is the cause for remote code execution exploits. It is wise to
 523       *   deactivate this feature and test where in the application it is used.
 524       *   The developer using the /e modifier should be made aware that he should
 525       *   use <code>preg_replace_callback()</code> instead
 526       * </blockquote>
 527       *
 528       * @var   array
 529       * @since 1.0.8
 530       */
 531      protected $_kw_replace_group = 0;
 532      protected $_rx_key = 0;
 533  
 534      /**
 535       * some "callback parameters" for handle_multiline_regexps
 536       *
 537       * @since  1.0.8
 538       * @access private
 539       * @var    string
 540       */
 541      protected $_hmr_before = '';
 542      protected $_hmr_replace = '';
 543      protected $_hmr_after = '';
 544      protected $_hmr_key = 0;
 545  
 546      /**
 547       * Creates a new GeSHi object, with source and language
 548       *
 549       * @param string $source   The source code to highlight
 550       * @param string $language The language to highlight the source with
 551       * @param string $path     The path to the language file directory. <b>This
 552       *               is deprecated!</b> I've backported the auto path
 553       *               detection from the 1.1.X dev branch, so now it
 554       *               should be automatically set correctly. If you have
 555       *               renamed the language directory however, you will
 556       *               still need to set the path using this parameter or
 557       *               {@link GeSHi->set_language_path()}
 558       * @since 1.0.0
 559       */
 560      public function __construct($source = '', $language = '', $path = '') {
 561          if ( is_string($source) && ($source !== '') ) {
 562              $this->set_source($source);
 563          }
 564          if ( is_string($language) && ($language !== '') ) {
 565              $this->set_language($language);
 566          }
 567          $this->set_language_path($path);
 568      }
 569  
 570      /**
 571       * Returns the version of GeSHi
 572       *
 573       * @return string
 574       * @since  1.0.8.11
 575       */
 576      public function get_version()
 577      {
 578          return GESHI_VERSION;
 579      }
 580  
 581      /**
 582       * Returns an error message associated with the last GeSHi operation,
 583       * or false if no error has occurred
 584       *
 585       * @return string|false An error message if there has been an error, else false
 586       * @since  1.0.0
 587       */
 588      public function error() {
 589          if ($this->error) {
 590              //Put some template variables for debugging here ...
 591              $debug_tpl_vars = array(
 592                  '{LANGUAGE}' => $this->language,
 593                  '{PATH}' => $this->language_path
 594              );
 595              $msg = str_replace(
 596                  array_keys($debug_tpl_vars),
 597                  array_values($debug_tpl_vars),
 598                  $this->error_messages[$this->error]);
 599  
 600              return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
 601          }
 602          return false;
 603      }
 604  
 605      /**
 606       * Gets a human-readable language name (thanks to Simon Patterson
 607       * for the idea :))
 608       *
 609       * @return string The name for the current language
 610       * @since  1.0.2
 611       */
 612      public function get_language_name() {
 613          if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
 614              return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
 615          }
 616          return $this->language_data['LANG_NAME'];
 617      }
 618  
 619      /**
 620       * Sets the source code for this object
 621       *
 622       * @param string $source The source code to highlight
 623       * @since 1.0.0
 624       */
 625      public function set_source($source) {
 626          $this->source = $source;
 627          $this->highlight_extra_lines = array();
 628      }
 629  
 630      /**
 631       * Clean up the language name to prevent malicious code injection
 632       *
 633       * @param string $language The name of the language to strip
 634       * @since 1.0.9.1
 635       */
 636      public function strip_language_name($language) {
 637          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 638          $language = strtolower($language);
 639  
 640          return $language;
 641      }
 642  
 643      /**
 644       * Sets the language for this object
 645       *
 646       * @note since 1.0.8 this function won't reset language-settings by default anymore!
 647       *       if you need this set $force_reset = true
 648       *
 649       * @param string $language    The name of the language to use
 650       * @param bool   $force_reset
 651       * @since 1.0.0
 652       */
 653      public function set_language($language, $force_reset = false) {
 654          $this->error = false;
 655          $this->strict_mode = GESHI_NEVER;
 656  
 657          if ($force_reset) {
 658              $this->loaded_language = false;
 659          }
 660  
 661          //Clean up the language name to prevent malicious code injection
 662          $language = $this->strip_language_name($language);
 663  
 664          //Retreive the full filename
 665          $file_name = $this->language_path . $language . '.php';
 666          if ($file_name == $this->loaded_language) {
 667              // this language is already loaded!
 668              return;
 669          }
 670  
 671          $this->language = $language;
 672  
 673          //Check if we can read the desired file
 674          if (!is_readable($file_name)) {
 675              $this->error = GESHI_ERROR_NO_SUCH_LANG;
 676              return;
 677          }
 678  
 679          // Load the language for parsing
 680          $this->load_language($file_name);
 681      }
 682  
 683      /**
 684       * Sets the path to the directory containing the language files. Note
 685       * that this path is relative to the directory of the script that included
 686       * geshi.php, NOT geshi.php itself.
 687       *
 688       * @param string $path The path to the language directory
 689       * @since 1.0.0
 690       * @deprecated The path to the language files should now be automatically
 691       *             detected, so this method should no longer be needed. The
 692       *             1.1.X branch handles manual setting of the path differently
 693       *             so this method will disappear in 1.2.0.
 694       */
 695      public function set_language_path($path) {
 696          if(strpos($path,':')) {
 697              //Security Fix to prevent external directories using fopen wrappers.
 698              if(DIRECTORY_SEPARATOR == "\\") {
 699                  if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
 700                      return;
 701                  }
 702              } else {
 703                  return;
 704              }
 705          }
 706          if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
 707              //Security Fix to prevent external directories using fopen wrappers.
 708              return;
 709          }
 710          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
 711              //Security Fix to prevent external directories using fopen wrappers.
 712              return;
 713          }
 714          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
 715              //Security Fix to prevent external directories using fopen wrappers.
 716              return;
 717          }
 718          if ($path) {
 719              $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
 720              $this->set_language($this->language); // otherwise set_language_path has no effect
 721          }
 722      }
 723  
 724      /**
 725       * Get supported langs or an associative array lang=>full_name.
 726       * @param boolean $full_names
 727       * @return array
 728       */
 729      public function get_supported_languages($full_names=false)
 730      {
 731          // return array
 732          $back = array();
 733  
 734          // we walk the lang root
 735          $dir = dir($this->language_path);
 736  
 737          // foreach entry
 738          while (false !== ($entry = $dir->read()))
 739          {
 740              $full_path = $this->language_path.$entry;
 741  
 742              // Skip all dirs
 743              if (is_dir($full_path)) {
 744                  continue;
 745              }
 746  
 747              // we only want lang.php files
 748              if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
 749                  continue;
 750              }
 751  
 752              // Raw lang name is here
 753              $langname = $matches[1];
 754  
 755              // We want the fullname too?
 756              if ($full_names === true)
 757              {
 758                  if (false !== ($fullname = $this->get_language_fullname($langname)))
 759                  {
 760                      $back[$langname] = $fullname; // we go associative
 761                  }
 762              }
 763              else
 764              {
 765                  // just store raw langname
 766                  $back[] = $langname;
 767              }
 768          }
 769  
 770          $dir->close();
 771  
 772          return $back;
 773      }
 774  
 775      /**
 776       * Get full_name for a lang or false.
 777       * @param string $language short langname (html4strict for example)
 778       * @return mixed
 779       */
 780      public function get_language_fullname($language)
 781      {
 782          //Clean up the language name to prevent malicious code injection
 783          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 784  
 785          $language = strtolower($language);
 786  
 787          // get fullpath-filename for a langname
 788          $fullpath = $this->language_path.$language.'.php';
 789  
 790          // we need to get contents :S
 791          if (false === ($data = file_get_contents($fullpath))) {
 792              $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
 793              return false;
 794          }
 795  
 796          // match the langname
 797          if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
 798              $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
 799              return false;
 800          }
 801  
 802          // return fullname for langname
 803          return stripcslashes($matches[1]);
 804      }
 805  
 806      /**
 807       * Sets the type of header to be used.
 808       *
 809       * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
 810       * means more source code but more control over tab width and line-wrapping.
 811       * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
 812       * control. Default is GESHI_HEADER_PRE.
 813       *
 814       * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
 815       * should be outputted.
 816       *
 817       * @param int $type The type of header to be used
 818       * @since 1.0.0
 819       */
 820      public function set_header_type($type) {
 821          //Check if we got a valid header type
 822          if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
 823              GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
 824              $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
 825              return;
 826          }
 827  
 828          //Set that new header type
 829          $this->header_type = $type;
 830      }
 831  
 832      /**
 833       * Sets the styles for the code that will be outputted
 834       * when this object is parsed. The style should be a
 835       * string of valid stylesheet declarations
 836       *
 837       * @param string  $style             The overall style for the outputted code block
 838       * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not
 839       * @since 1.0.0
 840       */
 841      public function set_overall_style($style, $preserve_defaults = false) {
 842          if (!$preserve_defaults) {
 843              $this->overall_style = $style;
 844          } else {
 845              $this->overall_style .= $style;
 846          }
 847      }
 848  
 849      /**
 850       * Sets the overall classname for this block of code. This
 851       * class can then be used in a stylesheet to style this object's
 852       * output
 853       *
 854       * @param string $class The class name to use for this block of code
 855       * @since 1.0.0
 856       */
 857      public function set_overall_class($class) {
 858          $this->overall_class = $class;
 859      }
 860  
 861      /**
 862       * Sets the overall id for this block of code. This id can then
 863       * be used in a stylesheet to style this object's output
 864       *
 865       * @param string $id The ID to use for this block of code
 866       * @since 1.0.0
 867       */
 868      public function set_overall_id($id) {
 869          $this->overall_id = $id;
 870      }
 871  
 872      /**
 873       * Sets whether CSS classes should be used to highlight the source. Default
 874       * is off, calling this method with no arguments will turn it on
 875       *
 876       * @param boolean $flag Whether to turn classes on or not
 877       * @since 1.0.0
 878       */
 879      public function enable_classes($flag = true) {
 880          $this->use_classes = ($flag) ? true : false;
 881      }
 882  
 883      /**
 884       * Sets the style for the actual code. This should be a string
 885       * containing valid stylesheet declarations. If $preserve_defaults is
 886       * true, then styles are merged with the default styles, with the
 887       * user defined styles having priority
 888       *
 889       * Note: Use this method to override any style changes you made to
 890       * the line numbers if you are using line numbers, else the line of
 891       * code will have the same style as the line number! Consult the
 892       * GeSHi documentation for more information about this.
 893       *
 894       * @param string  $style             The style to use for actual code
 895       * @param boolean $preserve_defaults Whether to merge the current styles with the new styles
 896       * @since 1.0.2
 897       */
 898      public function set_code_style($style, $preserve_defaults = false) {
 899          if (!$preserve_defaults) {
 900              $this->code_style = $style;
 901          } else {
 902              $this->code_style .= $style;
 903          }
 904      }
 905  
 906      /**
 907       * Sets the styles for the line numbers.
 908       *
 909       * @param string         $style1 The style for the line numbers that are "normal"
 910       * @param string|boolean $style2 If a string, this is the style of the line
 911       *        numbers that are "fancy", otherwise if boolean then this
 912       *        defines whether the normal styles should be merged with the
 913       *        new normal styles or not
 914       * @param boolean        $preserve_defaults If set, is the flag for whether to merge the "fancy"
 915       *        styles with the current styles or not
 916       * @since 1.0.2
 917       */
 918      public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
 919          //Check if we got 2 or three parameters
 920          if (is_bool($style2)) {
 921              $preserve_defaults = $style2;
 922              $style2 = '';
 923          }
 924  
 925          //Actually set the new styles
 926          if (!$preserve_defaults) {
 927              $this->line_style1 = $style1;
 928              $this->line_style2 = $style2;
 929          } else {
 930              $this->line_style1 .= $style1;
 931              $this->line_style2 .= $style2;
 932          }
 933      }
 934  
 935      /**
 936       * Sets whether line numbers should be displayed.
 937       *
 938       * Valid values for the first parameter are:
 939       *
 940       *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
 941       *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
 942       *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
 943       *
 944       * For fancy line numbers, the second parameter is used to signal which lines
 945       * are to be fancy. For example, if the value of this parameter is 5 then every
 946       * 5th line will be fancy.
 947       *
 948       * @param int $flag    How line numbers should be displayed
 949       * @param int $nth_row Defines which lines are fancy
 950       * @since 1.0.0
 951       */
 952      public function enable_line_numbers($flag, $nth_row = 5) {
 953          if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
 954              && GESHI_FANCY_LINE_NUMBERS != $flag) {
 955              $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
 956          }
 957          $this->line_numbers = $flag;
 958          $this->line_nth_row = $nth_row;
 959      }
 960  
 961      /**
 962       * Sets wether spans and other HTML markup generated by GeSHi can
 963       * span over multiple lines or not. Defaults to true to reduce overhead.
 964       * Set it to false if you want to manipulate the output or manually display
 965       * the code in an ordered list.
 966       *
 967       * @param boolean $flag Wether multiline spans are allowed or not
 968       * @since 1.0.7.22
 969       */
 970      public function enable_multiline_span($flag) {
 971          $this->allow_multiline_span = (bool) $flag;
 972      }
 973  
 974      /**
 975       * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
 976       *
 977       * @see enable_multiline_span
 978       * @return bool
 979       */
 980      public function get_multiline_span() {
 981          return $this->allow_multiline_span;
 982      }
 983  
 984      /**
 985       * Sets the style for a keyword group. If $preserve_defaults is
 986       * true, then styles are merged with the default styles, with the
 987       * user defined styles having priority
 988       *
 989       * @param int     $key               The key of the keyword group to change the styles of
 990       * @param string  $style             The style to make the keywords
 991       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
 992       *                to overwrite them
 993       * @since 1.0.0
 994       */
 995      public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
 996          //Set the style for this keyword group
 997          if('*' == $key) {
 998              foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
 999                  if (!$preserve_defaults) {
1000                      $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1001                  } else {
1002                      $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1003                  }
1004              }
1005          } else {
1006              if (!$preserve_defaults) {
1007                  $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1008              } else {
1009                  $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1010              }
1011          }
1012  
1013          //Update the lexic permissions
1014          if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1015              $this->lexic_permissions['KEYWORDS'][$key] = true;
1016          }
1017      }
1018  
1019      /**
1020       * Turns highlighting on/off for a keyword group
1021       *
1022       * @param int     $key  The key of the keyword group to turn on or off
1023       * @param boolean $flag Whether to turn highlighting for that group on or off
1024       * @since 1.0.0
1025       */
1026      public function set_keyword_group_highlighting($key, $flag = true) {
1027          $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1028      }
1029  
1030      /**
1031       * Sets the styles for comment groups.  If $preserve_defaults is
1032       * true, then styles are merged with the default styles, with the
1033       * user defined styles having priority
1034       *
1035       * @param int     $key               The key of the comment group to change the styles of
1036       * @param string  $style             The style to make the comments
1037       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1038       *                to overwrite them
1039       * @since 1.0.0
1040       */
1041      public function set_comments_style($key, $style, $preserve_defaults = false) {
1042          if('*' == $key) {
1043              foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1044                  if (!$preserve_defaults) {
1045                      $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1046                  } else {
1047                      $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1048                  }
1049              }
1050          } else {
1051              if (!$preserve_defaults) {
1052                  $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1053              } else {
1054                  $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1055              }
1056          }
1057      }
1058  
1059      /**
1060       * Turns highlighting on/off for comment groups
1061       *
1062       * @param int     $key  The key of the comment group to turn on or off
1063       * @param boolean $flag Whether to turn highlighting for that group on or off
1064       * @since 1.0.0
1065       */
1066      public function set_comments_highlighting($key, $flag = true) {
1067          $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1068      }
1069  
1070      /**
1071       * Sets the styles for escaped characters. If $preserve_defaults is
1072       * true, then styles are merged with the default styles, with the
1073       * user defined styles having priority
1074       *
1075       * @param string  $style             The style to make the escape characters
1076       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1077       *                                   to overwrite them
1078       * @param int     $group             Tells the group of symbols for which style should be set.
1079       * @since 1.0.0
1080       */
1081      public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1082          if (!$preserve_defaults) {
1083              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1084          } else {
1085              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1086          }
1087      }
1088  
1089      /**
1090       * Turns highlighting on/off for escaped characters
1091       *
1092       * @param boolean $flag Whether to turn highlighting for escape characters on or off
1093       * @since 1.0.0
1094       */
1095      public function set_escape_characters_highlighting($flag = true) {
1096          $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1097      }
1098  
1099      /**
1100       * Sets the styles for brackets. If $preserve_defaults is
1101       * true, then styles are merged with the default styles, with the
1102       * user defined styles having priority
1103       *
1104       * This method is DEPRECATED: use set_symbols_style instead.
1105       * This method will be removed in 1.2.X
1106       *
1107       * @param string  $style             The style to make the brackets
1108       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1109       *                to overwrite them
1110       * @since 1.0.0
1111       * @deprecated In favour of set_symbols_style
1112       */
1113      public function set_brackets_style($style, $preserve_defaults = false) {
1114          if (!$preserve_defaults) {
1115              $this->language_data['STYLES']['BRACKETS'][0] = $style;
1116          } else {
1117              $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1118          }
1119      }
1120  
1121      /**
1122       * Turns highlighting on/off for brackets
1123       *
1124       * This method is DEPRECATED: use set_symbols_highlighting instead.
1125       * This method will be remove in 1.2.X
1126       *
1127       * @param boolean $flag Whether to turn highlighting for brackets on or off
1128       * @since 1.0.0
1129       * @deprecated In favour of set_symbols_highlighting
1130       */
1131      public function set_brackets_highlighting($flag) {
1132          $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1133      }
1134  
1135      /**
1136       * Sets the styles for symbols. If $preserve_defaults is
1137       * true, then styles are merged with the default styles, with the
1138       * user defined styles having priority
1139       *
1140       * @param string  $style             The style to make the symbols
1141       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1142       *                                   to overwrite them
1143       * @param int     $group             Tells the group of symbols for which style should be set.
1144       * @since 1.0.1
1145       */
1146      public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1147          // Update the style of symbols
1148          if (!$preserve_defaults) {
1149              $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1150          } else {
1151              $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1152          }
1153  
1154          // For backward compatibility
1155          if (0 == $group) {
1156              $this->set_brackets_style ($style, $preserve_defaults);
1157          }
1158      }
1159  
1160      /**
1161       * Turns highlighting on/off for symbols
1162       *
1163       * @param boolean $flag Whether to turn highlighting for symbols on or off
1164       * @since 1.0.0
1165       */
1166      public function set_symbols_highlighting($flag) {
1167          // Update lexic permissions for this symbol group
1168          $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1169  
1170          // For backward compatibility
1171          $this->set_brackets_highlighting ($flag);
1172      }
1173  
1174      /**
1175       * Sets the styles for strings. If $preserve_defaults is
1176       * true, then styles are merged with the default styles, with the
1177       * user defined styles having priority
1178       *
1179       * @param string  $style             The style to make the escape characters
1180       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1181       *                                   to overwrite them
1182       * @param int     $group             Tells the group of strings for which style should be set.
1183       * @since 1.0.0
1184       */
1185      public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1186          if (!$preserve_defaults) {
1187              $this->language_data['STYLES']['STRINGS'][$group] = $style;
1188          } else {
1189              $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1190          }
1191      }
1192  
1193      /**
1194       * Turns highlighting on/off for strings
1195       *
1196       * @param boolean $flag Whether to turn highlighting for strings on or off
1197       * @since 1.0.0
1198       */
1199      public function set_strings_highlighting($flag) {
1200          $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1201      }
1202  
1203      /**
1204       * Sets the styles for strict code blocks. If $preserve_defaults is
1205       * true, then styles are merged with the default styles, with the
1206       * user defined styles having priority
1207       *
1208       * @param string  $style             The style to make the script blocks
1209       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1210       *                                   to overwrite them
1211       * @param int     $group             Tells the group of script blocks for which style should be set.
1212       * @since 1.0.8.4
1213       */
1214      public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1215          // Update the style of symbols
1216          if (!$preserve_defaults) {
1217              $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1218          } else {
1219              $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1220          }
1221      }
1222  
1223      /**
1224       * Sets the styles for numbers. If $preserve_defaults is
1225       * true, then styles are merged with the default styles, with the
1226       * user defined styles having priority
1227       *
1228       * @param string  $style             The style to make the numbers
1229       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1230       *                                   to overwrite them
1231       * @param int     $group             Tells the group of numbers for which style should be set.
1232       * @since 1.0.0
1233       */
1234      public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1235          if (!$preserve_defaults) {
1236              $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1237          } else {
1238              $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1239          }
1240      }
1241  
1242      /**
1243       * Turns highlighting on/off for numbers
1244       *
1245       * @param boolean $flag Whether to turn highlighting for numbers on or off
1246       * @since 1.0.0
1247       */
1248      public function set_numbers_highlighting($flag) {
1249          $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1250      }
1251  
1252      /**
1253       * Sets the styles for methods. $key is a number that references the
1254       * appropriate "object splitter" - see the language file for the language
1255       * you are highlighting to get this number. If $preserve_defaults is
1256       * true, then styles are merged with the default styles, with the
1257       * user defined styles having priority
1258       *
1259       * @param int     $key               The key of the object splitter to change the styles of
1260       * @param string  $style             The style to make the methods
1261       * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1262       *                                   to overwrite them
1263       * @since 1.0.0
1264       */
1265      public function set_methods_style($key, $style, $preserve_defaults = false) {
1266          if (!$preserve_defaults) {
1267              $this->language_data['STYLES']['METHODS'][$key] = $style;
1268          } else {
1269              $this->language_data['STYLES']['METHODS'][$key] .= $style;
1270          }
1271      }
1272  
1273      /**
1274       * Turns highlighting on/off for methods
1275       *
1276       * @param boolean $flag Whether to turn highlighting for methods on or off
1277       * @since 1.0.0
1278       */
1279      public function set_methods_highlighting($flag) {
1280          $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1281      }
1282  
1283      /**
1284       * Sets the styles for regexps. If $preserve_defaults is
1285       * true, then styles are merged with the default styles, with the
1286       * user defined styles having priority
1287       *
1288       * @param string  $key               The style to make the regular expression matches
1289       * @param boolean $style             Whether to merge the new styles with the old or just
1290       *                                   to overwrite them
1291       * @param bool    $preserve_defaults Whether to merge the new styles with the old or just
1292       *                                to overwrite them
1293       * @since 1.0.0
1294       */
1295      public function set_regexps_style($key, $style, $preserve_defaults = false) {
1296          if (!$preserve_defaults) {
1297              $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1298          } else {
1299              $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1300          }
1301      }
1302  
1303      /**
1304       * Turns highlighting on/off for regexps
1305       *
1306       * @param int     $key  The key of the regular expression group to turn on or off
1307       * @param boolean $flag Whether to turn highlighting for the regular expression group on or off
1308       * @since 1.0.0
1309       */
1310      public function set_regexps_highlighting($key, $flag) {
1311          $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1312      }
1313  
1314      /**
1315       * Sets whether a set of keywords are checked for in a case sensitive manner
1316       *
1317       * @param int     $key  The key of the keyword group to change the case sensitivity of
1318       * @param boolean $case Whether to check in a case sensitive manner or not
1319       * @since 1.0.0
1320       */
1321      public function set_case_sensitivity($key, $case) {
1322          $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1323      }
1324  
1325      /**
1326       * Sets the case that keywords should use when found. Use the constants:
1327       *
1328       *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1329       *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1330       *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1331       *
1332       * @param int $case A constant specifying what to do with matched keywords
1333       * @since 1.0.1
1334       */
1335      public function set_case_keywords($case) {
1336          if (in_array($case, array(
1337              GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1338              $this->language_data['CASE_KEYWORDS'] = $case;
1339          }
1340      }
1341  
1342      /**
1343       * Sets how many spaces a tab is substituted for
1344       *
1345       * Widths below zero are ignored
1346       *
1347       * @param int $width The tab width
1348       * @since 1.0.0
1349       */
1350      public function set_tab_width($width) {
1351          $this->tab_width = intval($width);
1352  
1353          //Check if it fit's the constraints:
1354          if ($this->tab_width < 1) {
1355              //Return it to the default
1356              $this->tab_width = 8;
1357          }
1358      }
1359  
1360      /**
1361       * Sets whether or not to use tab-stop width specifed by language
1362       *
1363       * @param boolean $use Whether to use language-specific tab-stop widths
1364       * @since 1.0.7.20
1365       */
1366      public function set_use_language_tab_width($use) {
1367          $this->use_language_tab_width = (bool) $use;
1368      }
1369  
1370      /**
1371       * Returns the tab width to use, based on the current language and user
1372       * preference
1373       *
1374       * @return int Tab width
1375       * @since 1.0.7.20
1376       */
1377      public function get_real_tab_width() {
1378          if (!$this->use_language_tab_width ||
1379              !isset($this->language_data['TAB_WIDTH'])) {
1380              return $this->tab_width;
1381          } else {
1382              return $this->language_data['TAB_WIDTH'];
1383          }
1384      }
1385  
1386      /**
1387       * Enables/disables strict highlighting. Default is off, calling this
1388       * method without parameters will turn it on. See documentation
1389       * for more details on strict mode and where to use it.
1390       *
1391       * @param boolean $mode Whether to enable strict mode or not
1392       * @since 1.0.0
1393       */
1394      public function enable_strict_mode($mode = true) {
1395          if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1396              $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1397          }
1398      }
1399  
1400      /**
1401       * Disables all highlighting
1402       *
1403       * @since 1.0.0
1404       * @todo  Rewrite with array traversal
1405       * @deprecated In favour of enable_highlighting
1406       */
1407      public function disable_highlighting() {
1408          $this->enable_highlighting(false);
1409      }
1410  
1411      /**
1412       * Enables all highlighting
1413       *
1414       * The optional flag parameter was added in version 1.0.7.21 and can be used
1415       * to enable (true) or disable (false) all highlighting.
1416       *
1417       * @since 1.0.0
1418       * @param boolean $flag A flag specifying whether to enable or disable all highlighting
1419       * @todo  Rewrite with array traversal
1420       */
1421      public function enable_highlighting($flag = true) {
1422          $flag = $flag ? true : false;
1423          foreach ($this->lexic_permissions as $key => $value) {
1424              if (is_array($value)) {
1425                  foreach ($value as $k => $v) {
1426                      $this->lexic_permissions[$key][$k] = $flag;
1427                  }
1428              } else {
1429                  $this->lexic_permissions[$key] = $flag;
1430              }
1431          }
1432  
1433          // Context blocks
1434          $this->enable_important_blocks = $flag;
1435      }
1436  
1437      /**
1438       * Given a file extension, this method returns either a valid geshi language
1439       * name, or the empty string if it couldn't be found
1440       *
1441       * @param string $extension The extension to get a language name for
1442       * @param array  $lookup    A lookup array to use instead of the default one
1443       * @since 1.0.5
1444       * @todo Re-think about how this method works (maybe make it private and/or make it
1445       *       a extension->lang lookup?)
1446       * @return int|string
1447       */
1448      public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1449          $extension = strtolower($extension);
1450  
1451          if ( !is_array($lookup) || empty($lookup)) {
1452              $lookup = array(
1453                  '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1454                  '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1455                  '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1456                  '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1457                  'abap' => array('abap'),
1458                  'actionscript' => array('as'),
1459                  'ada' => array('a', 'ada', 'adb', 'ads'),
1460                  'apache' => array('conf'),
1461                  'asm' => array('ash', 'asm', 'inc'),
1462                  'asp' => array('asp'),
1463                  'bash' => array('sh'),
1464                  'bf' => array('bf'),
1465                  'c' => array('c', 'h'),
1466                  'c_mac' => array('c', 'h'),
1467                  'caddcl' => array(),
1468                  'cadlisp' => array(),
1469                  'cdfg' => array('cdfg'),
1470                  'cobol' => array('cbl'),
1471                  'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1472                  'csharp' => array('cs'),
1473                  'css' => array('css'),
1474                  'd' => array('d'),
1475                  'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1476                  'diff' => array('diff', 'patch'),
1477                  'dos' => array('bat', 'cmd'),
1478                  'gdb' => array('kcrash', 'crash', 'bt'),
1479                  'gettext' => array('po', 'pot'),
1480                  'gml' => array('gml'),
1481                  'gnuplot' => array('plt'),
1482                  'groovy' => array('groovy'),
1483                  'haskell' => array('hs'),
1484                  'haxe' => array('hx'),
1485                  'html4strict' => array('html', 'htm'),
1486                  'ini' => array('ini', 'desktop', 'vbp'),
1487                  'java' => array('java'),
1488                  'javascript' => array('js'),
1489                  'klonec' => array('kl1'),
1490                  'klonecpp' => array('klx'),
1491                  'latex' => array('tex'),
1492                  'lisp' => array('lisp'),
1493                  'lua' => array('lua'),
1494                  'matlab' => array('m'),
1495                  'mpasm' => array(),
1496                  'mysql' => array('sql'),
1497                  'nsis' => array(),
1498                  'objc' => array(),
1499                  'oobas' => array(),
1500                  'oracle8' => array(),
1501                  'oracle10' => array(),
1502                  'pascal' => array('pas'),
1503                  'perl' => array('pl', 'pm'),
1504                  'php' => array('php', 'php5', 'phtml', 'phps'),
1505                  'povray' => array('pov'),
1506                  'providex' => array('pvc', 'pvx'),
1507                  'prolog' => array('pl'),
1508                  'python' => array('py'),
1509                  'qbasic' => array('bi'),
1510                  'reg' => array('reg'),
1511                  'ruby' => array('rb'),
1512                  'sas' => array('sas'),
1513                  'scala' => array('scala'),
1514                  'scheme' => array('scm'),
1515                  'scilab' => array('sci'),
1516                  'smalltalk' => array('st'),
1517                  'smarty' => array(),
1518                  'tcl' => array('tcl'),
1519                  'text' => array('txt'),
1520                  'vb' => array('bas', 'ctl', 'frm'),
1521                  'vbnet' => array('vb', 'sln'),
1522                  'visualfoxpro' => array(),
1523                  'whitespace' => array('ws'),
1524                  'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'),
1525                  'z80' => array('z80', 'asm', 'inc')
1526              );
1527          }
1528  
1529          foreach ($lookup as $lang => $extensions) {
1530              if (in_array($extension, $extensions)) {
1531                  return $lang;
1532              }
1533          }
1534  
1535          return 'text';
1536      }
1537  
1538      /**
1539       * Given a file name, this method loads its contents in, and attempts
1540       * to set the language automatically. An optional lookup table can be
1541       * passed for looking up the language name. If not specified a default
1542       * table is used
1543       *
1544       * The language table is in the form
1545       * <pre>array(
1546       *   'lang_name' => array('extension', 'extension', ...),
1547       *   'lang_name' ...
1548       * );</pre>
1549       *
1550       * @param string $file_name The filename to load the source from
1551       * @param array  $lookup    A lookup array to use instead of the default one
1552       * @todo Complete rethink of this and above method
1553       * @since 1.0.5
1554       */
1555      public function load_from_file($file_name, $lookup = array()) {
1556          if (is_readable($file_name)) {
1557              $this->set_source(file_get_contents($file_name));
1558              $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1559          } else {
1560              $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1561          }
1562      }
1563  
1564      /**
1565       * Adds a keyword to a keyword group for highlighting
1566       *
1567       * @param int    $key  The key of the keyword group to add the keyword to
1568       * @param string $word The word to add to the keyword group
1569       * @since 1.0.0
1570       */
1571      public function add_keyword($key, $word) {
1572          if (!is_array($this->language_data['KEYWORDS'][$key])) {
1573              $this->language_data['KEYWORDS'][$key] = array();
1574          }
1575          if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1576              $this->language_data['KEYWORDS'][$key][] = $word;
1577  
1578              //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1579              if ($this->parse_cache_built) {
1580                  $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1581                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1582              }
1583          }
1584      }
1585  
1586      /**
1587       * Removes a keyword from a keyword group
1588       *
1589       * @param int    $key       The key of the keyword group to remove the keyword from
1590       * @param string $word      The word to remove from the keyword group
1591       * @param bool   $recompile Wether to automatically recompile the optimized regexp list or not.
1592       *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1593       *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1594       *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1595       *               it might be too expensive to recompile the regexp list for every removal if you want to
1596       *               remove a lot of keywords.
1597       * @since 1.0.0
1598       */
1599      public function remove_keyword($key, $word, $recompile = true) {
1600          $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1601          if ($key_to_remove !== false) {
1602              unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1603  
1604              //NEW in 1.0.8, optionally recompile keyword group
1605              if ($recompile && $this->parse_cache_built) {
1606                  $this->optimize_keyword_group($key);
1607              }
1608          }
1609      }
1610  
1611      /**
1612       * Creates a new keyword group
1613       *
1614       * @param int     $key            The key of the keyword group to create
1615       * @param string  $styles         The styles for the keyword group
1616       * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot
1617       * @param array   $words          The words to use for the keyword group
1618       * @since 1.0.0
1619       * @return bool
1620       */
1621      public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1622          $words = (array) $words;
1623          if  (empty($words)) {
1624              // empty word lists mess up highlighting
1625              return false;
1626          }
1627  
1628          //Add the new keyword group internally
1629          $this->language_data['KEYWORDS'][$key] = $words;
1630          $this->lexic_permissions['KEYWORDS'][$key] = true;
1631          $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1632          $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1633  
1634          //NEW in 1.0.8, cache keyword regexp
1635          if ($this->parse_cache_built) {
1636              $this->optimize_keyword_group($key);
1637          }
1638          return true;
1639      }
1640  
1641      /**
1642       * Removes a keyword group
1643       *
1644       * @param int $key The key of the keyword group to remove
1645       * @since 1.0.0
1646       */
1647      public function remove_keyword_group ($key) {
1648          //Remove the keyword group internally
1649          unset($this->language_data['KEYWORDS'][$key]);
1650          unset($this->lexic_permissions['KEYWORDS'][$key]);
1651          unset($this->language_data['CASE_SENSITIVE'][$key]);
1652          unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653  
1654          //NEW in 1.0.8
1655          unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656      }
1657  
1658      /**
1659       * compile optimized regexp list for keyword group
1660       *
1661       * @param int $key The key of the keyword group to compile & optimize
1662       * @since 1.0.8
1663       */
1664      public function optimize_keyword_group($key) {
1665          $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666              $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667          $space_as_whitespace = false;
1668          if(isset($this->language_data['PARSER_CONTROL'])) {
1669              if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671                      $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672                  }
1673                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674                      if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675                          $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676                      }
1677                  }
1678              }
1679          }
1680          if($space_as_whitespace) {
1681              foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683                      str_replace(" ", "\\s+", $rxv);
1684              }
1685          }
1686      }
1687  
1688      /**
1689       * Sets the content of the header block
1690       *
1691       * @param string $content The content of the header block
1692       * @since 1.0.2
1693       */
1694      public function set_header_content($content) {
1695          $this->header_content = $content;
1696      }
1697  
1698      /**
1699       * Sets the content of the footer block
1700       *
1701       * @param string $content The content of the footer block
1702       * @since 1.0.2
1703       */
1704      public function set_footer_content($content) {
1705          $this->footer_content = $content;
1706      }
1707  
1708      /**
1709       * Sets the style for the header content
1710       *
1711       * @param string $style The style for the header content
1712       * @since 1.0.2
1713       */
1714      public function set_header_content_style($style) {
1715          $this->header_content_style = $style;
1716      }
1717  
1718      /**
1719       * Sets the style for the footer content
1720       *
1721       * @param string $style The style for the footer content
1722       * @since 1.0.2
1723       */
1724      public function set_footer_content_style($style) {
1725          $this->footer_content_style = $style;
1726      }
1727  
1728      /**
1729       * Sets whether to force a surrounding block around
1730       * the highlighted code or not
1731       *
1732       * @param boolean $flag Tells whether to enable or disable this feature
1733       * @since 1.0.7.20
1734       */
1735      public function enable_inner_code_block($flag) {
1736          $this->force_code_block = (bool)$flag;
1737      }
1738  
1739      /**
1740       * Sets the base URL to be used for keywords
1741       *
1742       * @param int    $group The key of the keyword group to set the URL for
1743       * @param string $url   The URL to set for the group. If {FNAME} is in
1744       *                      the url somewhere, it is replaced by the keyword
1745       *                      that the URL is being made for
1746       * @since 1.0.2
1747       */
1748      public function set_url_for_keyword_group($group, $url) {
1749          $this->language_data['URLS'][$group] = $url;
1750      }
1751  
1752      /**
1753       * Sets styles for links in code
1754       *
1755       * @param int    $type   A constant that specifies what state the style is being
1756       *                       set for - e.g. :hover or :visited
1757       * @param string $styles The styles to use for that state
1758       * @since 1.0.2
1759       */
1760      public function set_link_styles($type, $styles) {
1761          $this->link_styles[$type] = $styles;
1762      }
1763  
1764      /**
1765       * Sets the target for links in code
1766       *
1767       * @param string $target The target for links in the code, e.g. _blank
1768       * @since 1.0.3
1769       */
1770      public function set_link_target($target) {
1771          if (!$target) {
1772              $this->link_target = '';
1773          } else {
1774              $this->link_target = ' target="' . $target . '"';
1775          }
1776      }
1777  
1778      /**
1779       * Sets styles for important parts of the code
1780       *
1781       * @param string $styles The styles to use on important parts of the code
1782       * @since 1.0.2
1783       */
1784      public function set_important_styles($styles) {
1785          $this->important_styles = $styles;
1786      }
1787  
1788      /**
1789       * Sets whether context-important blocks are highlighted
1790       *
1791       * @param boolean $flag Tells whether to enable or disable highlighting of important blocks
1792       * @todo REMOVE THIS SHIZ FROM GESHI!
1793       * @deprecated
1794       * @since 1.0.2
1795       */
1796      public function enable_important_blocks($flag) {
1797          $this->enable_important_blocks = ( $flag ) ? true : false;
1798      }
1799  
1800      /**
1801       * Whether CSS IDs should be added to each line
1802       *
1803       * @param boolean $flag If true, IDs will be added to each line.
1804       * @since 1.0.2
1805       */
1806      public function enable_ids($flag = true) {
1807          $this->add_ids = ($flag) ? true : false;
1808      }
1809  
1810      /**
1811       * Specifies which lines to highlight extra
1812       *
1813       * The extra style parameter was added in 1.0.7.21.
1814       *
1815       * @param mixed  $lines An array of line numbers to highlight, or just a line
1816       *                      number on its own.
1817       * @param string $style A string specifying the style to use for this line.
1818       *                      If null is specified, the default style is used.
1819       *                      If false is specified, the line will be removed from
1820       *                      special highlighting
1821       * @since 1.0.2
1822       * @todo  Some data replication here that could be cut down on
1823       */
1824      public function highlight_lines_extra($lines, $style = null) {
1825          if (is_array($lines)) {
1826              //Split up the job using single lines at a time
1827              foreach ($lines as $line) {
1828                  $this->highlight_lines_extra($line, $style);
1829              }
1830          } else {
1831              //Mark the line as being highlighted specially
1832              $lines = intval($lines);
1833              $this->highlight_extra_lines[$lines] = $lines;
1834  
1835              //Decide on which style to use
1836              if ($style === null) { //Check if we should use default style
1837                  unset($this->highlight_extra_lines_styles[$lines]);
1838              } elseif ($style === false) { //Check if to remove this line
1839                  unset($this->highlight_extra_lines[$lines]);
1840                  unset($this->highlight_extra_lines_styles[$lines]);
1841              } else {
1842                  $this->highlight_extra_lines_styles[$lines] = $style;
1843              }
1844          }
1845      }
1846  
1847      /**
1848       * Sets the style for extra-highlighted lines
1849       *
1850       * @param string $styles The style for extra-highlighted lines
1851       * @since 1.0.2
1852       */
1853      public function set_highlight_lines_extra_style($styles) {
1854          $this->highlight_extra_lines_style = $styles;
1855      }
1856  
1857      /**
1858       * Sets the line-ending
1859       *
1860       * @param string $line_ending The new line-ending
1861       * @since 1.0.2
1862       */
1863      public function set_line_ending($line_ending) {
1864          $this->line_ending = (string)$line_ending;
1865      }
1866  
1867      /**
1868       * Sets what number line numbers should start at. Should
1869       * be a positive integer, and will be converted to one.
1870       *
1871       * <b>Warning:</b> Using this method will add the "start"
1872       * attribute to the &lt;ol&gt; that is used for line numbering.
1873       * This is <b>not</b> valid XHTML strict, so if that's what you
1874       * care about then don't use this method. Firefox is getting
1875       * support for the CSS method of doing this in 1.1 and Opera
1876       * has support for the CSS method, but (of course) IE doesn't
1877       * so it's not worth doing it the CSS way yet.
1878       *
1879       * @param int $number The number to start line numbers at
1880       * @since 1.0.2
1881       */
1882      public function start_line_numbers_at($number) {
1883          $this->line_numbers_start = abs(intval($number));
1884      }
1885  
1886      /**
1887       * Sets the encoding used for htmlspecialchars(), for international
1888       * support.
1889       *
1890       * NOTE: This is not needed for now because htmlspecialchars() is not
1891       * being used (it has a security hole in PHP4 that has not been patched).
1892       * Maybe in a future version it may make a return for speed reasons, but
1893       * I doubt it.
1894       *
1895       * @param string $encoding The encoding to use for the source
1896       * @since 1.0.3
1897       */
1898      public function set_encoding($encoding) {
1899          if ($encoding) {
1900            $this->encoding = strtolower($encoding);
1901          }
1902      }
1903  
1904      /**
1905       * Turns linking of keywords on or off.
1906       *
1907       * @param boolean $enable If true, links will be added to keywords
1908       * @since 1.0.2
1909       */
1910      public function enable_keyword_links($enable = true) {
1911          $this->keyword_links = (bool) $enable;
1912      }
1913  
1914      /**
1915       * Setup caches needed for styling. This is automatically called in
1916       * parse_code() and get_stylesheet() when appropriate. This function helps
1917       * stylesheet generators as they rely on some style information being
1918       * preprocessed
1919       *
1920       * @since 1.0.8
1921       */
1922      protected function build_style_cache() {
1923          //Build the style cache needed to highlight numbers appropriate
1924          if($this->lexic_permissions['NUMBERS']) {
1925              //First check what way highlighting information for numbers are given
1926              if(!isset($this->language_data['NUMBERS'])) {
1927                  $this->language_data['NUMBERS'] = 0;
1928              }
1929  
1930              if(is_array($this->language_data['NUMBERS'])) {
1931                  $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1932              } else {
1933                  $this->language_data['NUMBERS_CACHE'] = array();
1934                  if(!$this->language_data['NUMBERS']) {
1935                      $this->language_data['NUMBERS'] =
1936                          GESHI_NUMBER_INT_BASIC |
1937                          GESHI_NUMBER_FLT_NONSCI;
1938                  }
1939  
1940                  for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1941                      //Rearrange style indices if required ...
1942                      if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1943                          $this->language_data['STYLES']['NUMBERS'][$i] =
1944                              $this->language_data['STYLES']['NUMBERS'][1<<$i];
1945                          unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1946                      }
1947  
1948                      //Check if this bit is set for highlighting
1949                      if($j&1) {
1950                          //So this bit is set ...
1951                          //Check if it belongs to group 0 or the actual stylegroup
1952                          if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1953                              $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1954                          } else {
1955                              if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1956                                  $this->language_data['NUMBERS_CACHE'][0] = 0;
1957                              }
1958                              $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1959                          }
1960                      }
1961                  }
1962              }
1963          }
1964      }
1965  
1966      /**
1967       * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1968       * This function makes stylesheet generators much faster as they do not need these caches.
1969       *
1970       * @since 1.0.8
1971       */
1972      protected function build_parse_cache() {
1973          // check whether language_data is available
1974          if (empty($this->language_data)) {
1975              return false;
1976          }
1977  
1978          // cache symbol regexp
1979          //As this is a costy operation, we avoid doing it for multiple groups ...
1980          //Instead we perform it for all symbols at once.
1981          //
1982          //For this to work, we need to reorganize the data arrays.
1983          if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1984              $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1985  
1986              $this->language_data['SYMBOL_DATA'] = array();
1987              $symbol_preg_multi = array(); // multi char symbols
1988              $symbol_preg_single = array(); // single char symbols
1989              foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1990                  if (is_array($symbols)) {
1991                      foreach ($symbols as $sym) {
1992                          $sym = $this->hsc($sym);
1993                          if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1994                              $this->language_data['SYMBOL_DATA'][$sym] = $key;
1995                              if (isset($sym[1])) { // multiple chars
1996                                  $symbol_preg_multi[] = preg_quote($sym, '/');
1997                              } else { // single char
1998                                  if ($sym == '-') {
1999                                      // don't trigger range out of order error
2000                                      $symbol_preg_single[] = '\-';
2001                                  } else {
2002                                      $symbol_preg_single[] = preg_quote($sym, '/');
2003                                  }
2004                              }
2005                          }
2006                      }
2007                  } else {
2008                      $symbols = $this->hsc($symbols);
2009                      if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2010                          $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2011                          if (isset($symbols[1])) { // multiple chars
2012                              $symbol_preg_multi[] = preg_quote($symbols, '/');
2013                          } elseif ($symbols == '-') {
2014                              // don't trigger range out of order error
2015                              $symbol_preg_single[] = '\-';
2016                          } else { // single char
2017                              $symbol_preg_single[] = preg_quote($symbols, '/');
2018                          }
2019                      }
2020                  }
2021              }
2022  
2023              //Now we have an array with each possible symbol as the key and the style as the actual data.
2024              //This way we can set the correct style just the moment we highlight ...
2025              //
2026              //Now we need to rewrite our array to get a search string that
2027              $symbol_preg = array();
2028              if (!empty($symbol_preg_multi)) {
2029                  rsort($symbol_preg_multi);
2030                  $symbol_preg[] = implode('|', $symbol_preg_multi);
2031              }
2032              if (!empty($symbol_preg_single)) {
2033                  rsort($symbol_preg_single);
2034                  $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2035              }
2036              $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2037          }
2038  
2039          // cache optimized regexp for keyword matching
2040          // remove old cache
2041          $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2042          foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2043              if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2044                      $this->lexic_permissions['KEYWORDS'][$key]) {
2045                  $this->optimize_keyword_group($key);
2046              }
2047          }
2048  
2049          // brackets
2050          if ($this->lexic_permissions['BRACKETS']) {
2051              $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2052              if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2053                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2054                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2055                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2056                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2057                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2058                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2059                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2060                  );
2061              }
2062              else {
2063                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2064                      '<| class="br0">&#91;|>',
2065                      '<| class="br0">&#93;|>',
2066                      '<| class="br0">&#40;|>',
2067                      '<| class="br0">&#41;|>',
2068                      '<| class="br0">&#123;|>',
2069                      '<| class="br0">&#125;|>',
2070                  );
2071              }
2072          }
2073  
2074          //Build the parse cache needed to highlight numbers appropriate
2075          if($this->lexic_permissions['NUMBERS']) {
2076              //Check if the style rearrangements have been processed ...
2077              //This also does some preprocessing to check which style groups are useable ...
2078              if(!isset($this->language_data['NUMBERS_CACHE'])) {
2079                  $this->build_style_cache();
2080              }
2081  
2082              //Number format specification
2083              //All this formats are matched case-insensitively!
2084              static $numbers_format = array(
2085                  GESHI_NUMBER_INT_BASIC =>
2086                      '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2087                  GESHI_NUMBER_INT_CSTYLE =>
2088                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2089                  GESHI_NUMBER_BIN_SUFFIX =>
2090                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2091                  GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2092                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2093                  GESHI_NUMBER_BIN_PREFIX_0B =>
2094                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2095                  GESHI_NUMBER_OCT_PREFIX =>
2096                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2097                  GESHI_NUMBER_OCT_PREFIX_0O =>
2098                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2099                  GESHI_NUMBER_OCT_PREFIX_AT =>
2100                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2101                  GESHI_NUMBER_OCT_SUFFIX =>
2102                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2103                  GESHI_NUMBER_HEX_PREFIX =>
2104                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2105                  GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2106                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2107                  GESHI_NUMBER_HEX_SUFFIX =>
2108                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2109                  GESHI_NUMBER_FLT_NONSCI =>
2110                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2111                  GESHI_NUMBER_FLT_NONSCI_F =>
2112                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2113                  GESHI_NUMBER_FLT_SCI_SHORT =>
2114                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2115                  GESHI_NUMBER_FLT_SCI_ZERO =>
2116                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2117                  );
2118  
2119              //At this step we have an associative array with flag groups for a
2120              //specific style or an string denoting a regexp given its index.
2121              $this->language_data['NUMBERS_RXCACHE'] = array();
2122              foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2123                  if(is_string($rxdata)) {
2124                      $regexp = $rxdata;
2125                  } else {
2126                      //This is a bitfield of number flags to highlight:
2127                      //Build an array, implode them together and make this the actual RX
2128                      $rxuse = array();
2129                      for($i = 1; $i <= $rxdata; $i<<=1) {
2130                          if($rxdata & $i) {
2131                              $rxuse[] = $numbers_format[$i];
2132                          }
2133                      }
2134                      $regexp = implode("|", $rxuse);
2135                  }
2136  
2137                  $this->language_data['NUMBERS_RXCACHE'][$key] =
2138                      "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2139              }
2140  
2141              if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2142                  $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2143              }
2144          }
2145  
2146          $this->parse_cache_built = true;
2147      }
2148  
2149      /**
2150       * Returns the code in $this->source, highlighted and surrounded by the
2151       * nessecary HTML.
2152       *
2153       * This should only be called ONCE, cos it's SLOW! If you want to highlight
2154       * the same source multiple times, you're better off doing a whole lot of
2155       * str_replaces to replace the &lt;span&gt;s
2156       *
2157       * @since 1.0.0
2158       */
2159      public function parse_code() {
2160          // Start the timer
2161          $start_time = microtime();
2162  
2163          // Replace all newlines to a common form.
2164          $code = str_replace("\r\n", "\n", $this->source);
2165          $code = str_replace("\r", "\n", $code);
2166  
2167          // check whether language_data is available
2168          if (empty($this->language_data)) {
2169              $this->error = GESHI_ERROR_NO_SUCH_LANG;
2170          }
2171  
2172          // Firstly, if there is an error, we won't highlight
2173          if ($this->error) {
2174              //Escape the source for output
2175              $result = $this->hsc($this->source);
2176  
2177              //This fix is related to SF#1923020, but has to be applied regardless of
2178              //actually highlighting symbols.
2179              $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2180  
2181              // Timing is irrelevant
2182              $this->set_time($start_time, $start_time);
2183              $this->finalise($result);
2184              return $result;
2185          }
2186  
2187          // make sure the parse cache is up2date
2188          if (!$this->parse_cache_built) {
2189              $this->build_parse_cache();
2190          }
2191  
2192          // Initialise various stuff
2193          $length           = strlen($code);
2194          $COMMENT_MATCHED  = false;
2195          $stuff_to_parse   = '';
2196          $endresult        = '';
2197  
2198          // "Important" selections are handled like multiline comments
2199          // @todo GET RID OF THIS SHIZ
2200          if ($this->enable_important_blocks) {
2201              $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2202          }
2203  
2204          if ($this->strict_mode) {
2205              // Break the source into bits. Each bit will be a portion of the code
2206              // within script delimiters - for example, HTML between < and >
2207              $k = 0;
2208              $parts = array();
2209              $matches = array();
2210              $next_match_pointer = null;
2211              // we use a copy to unset delimiters on demand (when they are not found)
2212              $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2213              $i = 0;
2214              while ($i < $length) {
2215                  $next_match_pos = $length + 1; // never true
2216                  foreach ($delim_copy as $dk => $delimiters) {
2217                      if(is_array($delimiters)) {
2218                          foreach ($delimiters as $open => $close) {
2219                              // make sure the cache is setup properly
2220                              if (!isset($matches[$dk][$open])) {
2221                                  $matches[$dk][$open] = array(
2222                                      'next_match' => -1,
2223                                      'dk' => $dk,
2224  
2225                                      'open' => $open, // needed for grouping of adjacent code blocks (see below)
2226                                      'open_strlen' => strlen($open),
2227  
2228                                      'close' => $close,
2229                                      'close_strlen' => strlen($close),
2230                                  );
2231                              }
2232                              // Get the next little bit for this opening string
2233                              if ($matches[$dk][$open]['next_match'] < $i) {
2234                                  // only find the next pos if it was not already cached
2235                                  $open_pos = strpos($code, $open, $i);
2236                                  if ($open_pos === false) {
2237                                      // no match for this delimiter ever
2238                                      unset($delim_copy[$dk][$open]);
2239                                      continue;
2240                                  }
2241                                  $matches[$dk][$open]['next_match'] = $open_pos;
2242                              }
2243                              if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2244                                  //So we got a new match, update the close_pos
2245                                  $matches[$dk][$open]['close_pos'] =
2246                                      strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2247  
2248                                  $next_match_pointer =& $matches[$dk][$open];
2249                                  $next_match_pos = $matches[$dk][$open]['next_match'];
2250                              }
2251                          }
2252                      } else {
2253                          //So we should match an RegExp as Strict Block ...
2254                          /**
2255                           * The value in $delimiters is expected to be an RegExp
2256                           * containing exactly 2 matching groups:
2257                           *  - Group 1 is the opener
2258                           *  - Group 2 is the closer
2259                           */
2260                          if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2261                              //We got a match ...
2262                              if(isset($matches_rx['start']) && isset($matches_rx['end']))
2263                              {
2264                                  $matches[$dk] = array(
2265                                      'next_match' => $matches_rx['start'][1],
2266                                      'dk' => $dk,
2267  
2268                                      'close_strlen' => strlen($matches_rx['end'][0]),
2269                                      'close_pos' => $matches_rx['end'][1],
2270                                      );
2271                              } else {
2272                                  $matches[$dk] = array(
2273                                      'next_match' => $matches_rx[1][1],
2274                                      'dk' => $dk,
2275  
2276                                      'close_strlen' => strlen($matches_rx[2][0]),
2277                                      'close_pos' => $matches_rx[2][1],
2278                                      );
2279                              }
2280                          } else {
2281                              // no match for this delimiter ever
2282                              unset($delim_copy[$dk]);
2283                              continue;
2284                          }
2285  
2286                          if ($matches[$dk]['next_match'] <= $next_match_pos) {
2287                              $next_match_pointer =& $matches[$dk];
2288                              $next_match_pos = $matches[$dk]['next_match'];
2289                          }
2290                      }
2291                  }
2292  
2293                  // non-highlightable text
2294                  $parts[$k] = array(
2295                      1 => substr($code, $i, $next_match_pos - $i)
2296                  );
2297                  ++$k;
2298  
2299                  if ($next_match_pos > $length) {
2300                      // out of bounds means no next match was found
2301                      break;
2302                  }
2303  
2304                  // highlightable code
2305                  $parts[$k][0] = $next_match_pointer['dk'];
2306  
2307                  //Only combine for non-rx script blocks
2308                  if(is_array($delim_copy[$next_match_pointer['dk']])) {
2309                      // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2310                      $i = $next_match_pos + $next_match_pointer['open_strlen'];
2311                      while (true) {
2312                          $close_pos = strpos($code, $next_match_pointer['close'], $i);
2313                          if ($close_pos == false) {
2314                              break;
2315                          }
2316                          $i = $close_pos + $next_match_pointer['close_strlen'];
2317                          if ($i == $length) {
2318                              break;
2319                          }
2320                          if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2321                              substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2322                              // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2323                              foreach ($matches as $submatches) {
2324                                  foreach ($submatches as $match) {
2325                                      if ($match['next_match'] == $i) {
2326                                          // a different block already matches here!
2327                                          break 3;
2328                                      }
2329                                  }
2330                              }
2331                          } else {
2332                              break;
2333                          }
2334                      }
2335                  } else {
2336                      $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2337                      $i = $close_pos;
2338                  }
2339  
2340                  if ($close_pos === false) {
2341                      // no closing delimiter found!
2342                      $parts[$k][1] = substr($code, $next_match_pos);
2343                      ++$k;
2344                      break;
2345                  } else {
2346                      $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2347                      ++$k;
2348                  }
2349              }
2350              unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2351              $num_parts = $k;
2352  
2353              if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2354                  // when we have only one part, we don't have anything to highlight at all.
2355                  // if we have a "maybe" strict language, this should be handled as highlightable code
2356                  $parts = array(
2357                      0 => array(
2358                          0 => '',
2359                          1 => ''
2360                      ),
2361                      1 => array(
2362                          0 => null,
2363                          1 => $parts[0][1]
2364                      )
2365                  );
2366                  $num_parts = 2;
2367              }
2368  
2369          } else {
2370              // Not strict mode - simply dump the source into
2371              // the array at index 1 (the first highlightable block)
2372              $parts = array(
2373                  0 => array(
2374                      0 => '',
2375                      1 => ''
2376                  ),
2377                  1 => array(
2378                      0 => null,
2379                      1 => $code
2380                  )
2381              );
2382              $num_parts = 2;
2383          }
2384  
2385          //Unset variables we won't need any longer
2386          unset($code);
2387  
2388          //Preload some repeatedly used values regarding hardquotes ...
2389          $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2390          $hq_strlen = strlen($hq);
2391  
2392          //Preload if line numbers are to be generated afterwards
2393          //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2394          $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2395              !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2396  
2397          //preload the escape char for faster checking ...
2398          $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2399  
2400          // this is used for single-line comments
2401          $sc_disallowed_before = "";
2402          $sc_disallowed_after = "";
2403  
2404          if (isset($this->language_data['PARSER_CONTROL'])) {
2405              if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2406                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2407                      $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2408                  }
2409                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2410                      $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2411                  }
2412              }
2413          }
2414  
2415          //Fix for SF#1932083: Multichar Quotemarks unsupported
2416          $is_string_starter = array();
2417          if ($this->lexic_permissions['STRINGS']) {
2418              foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2419                  if (!isset($is_string_starter[$quotemark[0]])) {
2420                      $is_string_starter[$quotemark[0]] = (string)$quotemark;
2421                  } elseif (is_string($is_string_starter[$quotemark[0]])) {
2422                      $is_string_starter[$quotemark[0]] = array(
2423                          $is_string_starter[$quotemark[0]],
2424                          $quotemark);
2425                  } else {
2426                      $is_string_starter[$quotemark[0]][] = $quotemark;
2427                  }
2428              }
2429          }
2430  
2431          // Now we go through each part. We know that even-indexed parts are
2432          // code that shouldn't be highlighted, and odd-indexed parts should
2433          // be highlighted
2434          for ($key = 0; $key < $num_parts; ++$key) {
2435              $STRICTATTRS = '';
2436  
2437              // If this block should be highlighted...
2438              if (!($key & 1)) {
2439                  // Else not a block to highlight
2440                  $endresult .= $this->hsc($parts[$key][1]);
2441                  unset($parts[$key]);
2442                  continue;
2443              }
2444  
2445              $result = '';
2446              $part = $parts[$key][1];
2447  
2448              $highlight_part = true;
2449              if ($this->strict_mode && !is_null($parts[$key][0])) {
2450                  // get the class key for this block of code
2451                  $script_key = $parts[$key][0];
2452                  $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2453                  if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2454                      $this->lexic_permissions['SCRIPT']) {
2455                      // Add a span element around the source to
2456                      // highlight the overall source block
2457                      if (!$this->use_classes &&
2458                          $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2459                          $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2460                      } else {
2461                          $attributes = ' class="sc' . $script_key . '"';
2462                      }
2463                      $result .= "<span$attributes>";
2464                      $STRICTATTRS = $attributes;
2465                  }
2466              }
2467  
2468              if ($highlight_part) {
2469                  // Now, highlight the code in this block. This code
2470                  // is really the engine of GeSHi (along with the method
2471                  // parse_non_string_part).
2472  
2473                  // cache comment regexps incrementally
2474                  $next_comment_regexp_key = '';
2475                  $next_comment_regexp_pos = -1;
2476                  $next_comment_multi_pos = -1;
2477                  $next_comment_single_pos = -1;
2478                  $comment_regexp_cache_per_key = array();
2479                  $comment_multi_cache_per_key = array();
2480                  $comment_single_cache_per_key = array();
2481                  $next_open_comment_multi = '';
2482                  $next_comment_single_key = '';
2483                  $escape_regexp_cache_per_key = array();
2484                  $next_escape_regexp_key = '';
2485                  $next_escape_regexp_pos = -1;
2486  
2487                  $length = strlen($part);
2488                  for ($i = 0; $i < $length; ++$i) {
2489                      // Get the next char
2490                      $char = $part[$i];
2491                      $char_len = 1;
2492  
2493                      // update regexp comment cache if needed
2494                      if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2495                          $next_comment_regexp_pos = $length;
2496                          foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2497                              $match_i = false;
2498                              if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2499                                  ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2500                                   $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2501                                  // we have already matched something
2502                                  if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2503                                      // this comment is never matched
2504                                      continue;
2505                                  }
2506                                  $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2507                              } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2508                                  $match_i = $match[0][1];
2509  
2510                                  $comment_regexp_cache_per_key[$comment_key] = array(
2511                                      'key' => $comment_key,
2512                                      'length' => strlen($match[0][0]),
2513                                      'pos' => $match_i
2514                                  );
2515                              } else {
2516                                  $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517                                  continue;
2518                              }
2519  
2520                              if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521                                  $next_comment_regexp_pos = $match_i;
2522                                  $next_comment_regexp_key = $comment_key;
2523                                  if ($match_i === $i) {
2524                                      break;
2525                                  }
2526                              }
2527                          }
2528                      }
2529  
2530                      $string_started = false;
2531  
2532                      if (isset($is_string_starter[$char])) {
2533                          // Possibly the start of a new string ...
2534  
2535                          //Check which starter it was ...
2536                          //Fix for SF#1932083: Multichar Quotemarks unsupported
2537                          if (is_array($is_string_starter[$char])) {
2538                              $char_new = '';
2539                              foreach ($is_string_starter[$char] as $testchar) {
2540                                  if ($testchar === substr($part, $i, strlen($testchar)) &&
2541                                      strlen($testchar) > strlen($char_new)) {
2542                                      $char_new = $testchar;
2543                                      $string_started = true;
2544                                  }
2545                              }
2546                              if ($string_started) {
2547                                  $char = $char_new;
2548                              }
2549                          } else {
2550                              $testchar = $is_string_starter[$char];
2551                              if ($testchar === substr($part, $i, strlen($testchar))) {
2552                                  $char = $testchar;
2553                                  $string_started = true;
2554                              }
2555                          }
2556                          $char_len = strlen($char);
2557                      }
2558  
2559                      if ($string_started && ($i != $next_comment_regexp_pos)) {
2560                          // Hand out the correct style information for this string
2561                          $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562                          if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563                              !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564                              $string_key = 0;
2565                          }
2566  
2567                          // parse the stuff before this
2568                          $result .= $this->parse_non_string_part($stuff_to_parse);
2569                          $stuff_to_parse = '';
2570  
2571                          if (!$this->use_classes) {
2572                              $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573                          } else {
2574                              $string_attributes = ' class="st'.$string_key.'"';
2575                          }
2576  
2577                          // now handle the string
2578                          $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579                          $start = $i + $char_len;
2580                          $string_open = true;
2581  
2582                          if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583                              $next_escape_regexp_pos = $length;
2584                          }
2585  
2586                          do {
2587                              //Get the regular ending pos ...
2588                              $close_pos = strpos($part, $char, $start);
2589                              if(false === $close_pos) {
2590                                  $close_pos = $length;
2591                              }
2592  
2593                              if($this->lexic_permissions['ESCAPE_CHAR']) {
2594                                  // update escape regexp cache if needed
2595                                  if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596                                      $next_escape_regexp_pos = $length;
2597                                      foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598                                          $match_i = false;
2599                                          if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600                                              ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601                                               $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602                                              // we have already matched something
2603                                              if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604                                                  // this comment is never matched
2605                                                  continue;
2606                                              }
2607                                              $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608                                          } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2609                                              $match_i = $match[0][1];
2610  
2611                                              $escape_regexp_cache_per_key[$escape_key] = array(
2612                                                  'key' => $escape_key,
2613                                                  'length' => strlen($match[0][0]),
2614                                                  'pos' => $match_i
2615                                              );
2616                                          } else {
2617                                              $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2618                                              continue;
2619                                          }
2620  
2621                                          if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2622                                              $next_escape_regexp_pos = $match_i;
2623                                              $next_escape_regexp_key = $escape_key;
2624                                              if ($match_i === $start) {
2625                                                  break;
2626                                              }
2627                                          }
2628                                      }
2629                                  }
2630  
2631                                  //Find the next simple escape position
2632                                  if('' != $this->language_data['ESCAPE_CHAR']) {
2633                                      $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2634                                      if(false === $simple_escape) {
2635                                          $simple_escape = $length;
2636                                      }
2637                                  } else {
2638                                      $simple_escape = $length;
2639                                  }
2640                              } else {
2641                                  $next_escape_regexp_pos = $length;
2642                                  $simple_escape = $length;
2643                              }
2644  
2645                              if($simple_escape < $next_escape_regexp_pos &&
2646                                  $simple_escape < $length &&
2647                                  $simple_escape < $close_pos) {
2648                                  //The nexxt escape sequence is a simple one ...
2649                                  $es_pos = $simple_escape;
2650  
2651                                  //Add the stuff not in the string yet ...
2652                                  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2653  
2654                                  //Get the style for this escaped char ...
2655                                  if (!$this->use_classes) {
2656                                      $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2657                                  } else {
2658                                      $escape_char_attributes = ' class="es0"';
2659                                  }
2660  
2661                                  //Add the style for the escape char ...
2662                                  $string .= "<span$escape_char_attributes>" .
2663                                      GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2664  
2665                                  //Get the byte AFTER the ESCAPE_CHAR we just found
2666                                  $es_char = $part[$es_pos + 1];
2667                                  if ($es_char == "\n") {
2668                                      // don't put a newline around newlines
2669                                      $string .= "</span>\n";
2670                                      $start = $es_pos + 2;
2671                                  } elseif (ord($es_char) >= 128) {
2672                                      //This is an non-ASCII char (UTF8 or single byte)
2673                                      //This code tries to work around SF#2037598 ...
2674                                      if(function_exists('mb_substr')) {
2675                                          $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2676                                          $string .= $es_char_m . '</span>';
2677                                      } elseif ('utf-8' == $this->encoding) {
2678                                          if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2679                                              "|\xE0[\xA0-\xBF][\x80-\xBF]".
2680                                              "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2681                                              "|\xED[\x80-\x9F][\x80-\xBF]".
2682                                              "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2683                                              "|[\xF1-\xF3][\x80-\xBF]{3}".
2684                                              "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2685                                              $part, $es_char_m, null, $es_pos + 1)) {
2686                                              $es_char_m = $es_char_m[0];
2687                                          } else {
2688                                              $es_char_m = $es_char;
2689                                          }
2690                                          $string .= $this->hsc($es_char_m) . '</span>';
2691                                      } else {
2692                                          $es_char_m = $this->hsc($es_char);
2693                                      }
2694                                      $start = $es_pos + strlen($es_char_m) + 1;
2695                                  } else {
2696                                      $string .= $this->hsc($es_char) . '</span>';
2697                                      $start = $es_pos + 2;
2698                                  }
2699                              } elseif ($next_escape_regexp_pos < $length &&
2700                                  $next_escape_regexp_pos < $close_pos) {
2701                                  $es_pos = $next_escape_regexp_pos;
2702                                  //Add the stuff not in the string yet ...
2703                                  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2704  
2705                                  //Get the key and length of this match ...
2706                                  $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2707                                  $escape_str = substr($part, $es_pos, $escape['length']);
2708                                  $escape_key = $escape['key'];
2709  
2710                                  //Get the style for this escaped char ...
2711                                  if (!$this->use_classes) {
2712                                      $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2713                                  } else {
2714                                      $escape_char_attributes = ' class="es' . $escape_key . '"';
2715                                  }
2716  
2717                                  //Add the style for the escape char ...
2718                                  $string .= "<span$escape_char_attributes>" .
2719                                      $this->hsc($escape_str) . '</span>';
2720  
2721                                  $start = $es_pos + $escape['length'];
2722                              } else {
2723                                  //Copy the remainder of the string ...
2724                                  $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2725                                  $start = $close_pos + $char_len;
2726                                  $string_open = false;
2727                              }
2728                          } while($string_open);
2729  
2730                          if ($check_linenumbers) {
2731                              // Are line numbers used? If, we should end the string before
2732                              // the newline and begin it again (so when <li>s are put in the source
2733                              // remains XHTML compliant)
2734                              // note to self: This opens up possibility of config files specifying
2735                              // that languages can/cannot have multiline strings???
2736                              $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737                          }
2738  
2739                          $result .= $string;
2740                          $string = '';
2741                          $i = $start - 1;
2742                          continue;
2743                      } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2744                          substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2745                          // The start of a hard quoted string
2746                          if (!$this->use_classes) {
2747                              $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2748                              $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2749                          } else {
2750                              $string_attributes = ' class="st_h"';
2751                              $escape_char_attributes = ' class="es_h"';
2752                          }
2753                          // parse the stuff before this
2754                          $result .= $this->parse_non_string_part($stuff_to_parse);
2755                          $stuff_to_parse = '';
2756  
2757                          // now handle the string
2758                          $string = '';
2759  
2760                          // look for closing quote
2761                          $start = $i + $hq_strlen;
2762                          while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2763                              $start = $close_pos + 1;
2764                              if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2765                                  (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2766                                  // make sure this quote is not escaped
2767                                  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2768                                      if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2769                                          // check wether this quote is escaped or if it is something like '\\'
2770                                          $escape_char_pos = $close_pos - 1;
2771                                          while ($escape_char_pos > 0
2772                                                  && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2773                                              --$escape_char_pos;
2774                                          }
2775                                          if (($close_pos - $escape_char_pos) & 1) {
2776                                              // uneven number of escape chars => this quote is escaped
2777                                              continue 2;
2778                                          }
2779                                      }
2780                                  }
2781                              }
2782  
2783                              // found closing quote
2784                              break;
2785                          }
2786  
2787                          //Found the closing delimiter?
2788                          if (!$close_pos) {
2789                              // span till the end of this $part when no closing delimiter is found
2790                              $close_pos = $length;
2791                          }
2792  
2793                          //Get the actual string
2794                          $string = substr($part, $i, $close_pos - $i + 1);
2795                          $i = $close_pos;
2796  
2797                          // handle escape chars and encode html chars
2798                          // (special because when we have escape chars within our string they may not be escaped)
2799                          if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2800                              $start = 0;
2801                              $new_string = '';
2802                              while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2803                                  // hmtl escape stuff before
2804                                  $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2805                                  // check if this is a hard escape
2806                                  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2807                                      if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2808                                          // indeed, this is a hardescape
2809                                          $new_string .= "<span$escape_char_attributes>" .
2810                                              $this->hsc($hardescape) . '</span>';
2811                                          $start = $es_pos + strlen($hardescape);
2812                                          continue 2;
2813                                      }
2814                                  }
2815                                  // not a hard escape, but a normal escape
2816                                  // they come in pairs of two
2817                                  $c = 0;
2818                                  while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2819                                      && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2820                                      && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2821                                      $c += 2;
2822                                  }
2823                                  if ($c) {
2824                                      $new_string .= "<span$escape_char_attributes>" .
2825                                          str_repeat($escaped_escape_char, $c) .
2826                                          '</span>';
2827                                      $start = $es_pos + $c;
2828                                  } else {
2829                                      // this is just a single lonely escape char...
2830                                      $new_string .= $escaped_escape_char;
2831                                      $start = $es_pos + 1;
2832                                  }
2833                              }
2834                              $string = $new_string . $this->hsc(substr($string, $start));
2835                          } else {
2836                              $string = $this->hsc($string);
2837                          }
2838  
2839                          if ($check_linenumbers) {
2840                              // Are line numbers used? If, we should end the string before
2841                              // the newline and begin it again (so when <li>s are put in the source
2842                              // remains XHTML compliant)
2843                              // note to self: This opens up possibility of config files specifying
2844                              // that languages can/cannot have multiline strings???
2845                              $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2846                          }
2847  
2848                          $result .= "<span$string_attributes>" . $string . '</span>';
2849                          $string = '';
2850                          continue;
2851                      } else {
2852                          //Have a look for regexp comments
2853                          if ($i == $next_comment_regexp_pos) {
2854                              $COMMENT_MATCHED = true;
2855                              $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2856                              $test_str = $this->hsc(substr($part, $i, $comment['length']));
2857  
2858                              //@todo If remove important do remove here
2859                              if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2860                                  if (!$this->use_classes) {
2861                                      $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2862                                  } else {
2863                                      $attributes = ' class="co' . $comment['key'] . '"';
2864                                  }
2865  
2866                                  $test_str = "<span$attributes>" . $test_str . "</span>";
2867  
2868                                  // Short-cut through all the multiline code
2869                                  if ($check_linenumbers) {
2870                                      // strreplace to put close span and open span around multiline newlines
2871                                      $test_str = str_replace(
2872                                          "\n", "</span>\n<span$attributes>",
2873                                          str_replace("\n ", "\n&nbsp;", $test_str)
2874                                      );
2875                                  }
2876                              }
2877  
2878                              $i += $comment['length'] - 1;
2879  
2880                              // parse the rest
2881                              $result .= $this->parse_non_string_part($stuff_to_parse);
2882                              $stuff_to_parse = '';
2883                          }
2884  
2885                          // If we haven't matched a regexp comment, try multi-line comments
2886                          if (!$COMMENT_MATCHED) {
2887                              // Is this a multiline comment?
2888                              if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2889                                  $next_comment_multi_pos = $length;
2890                                  foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2891                                      $match_i = false;
2892                                      if (isset($comment_multi_cache_per_key[$open]) &&
2893                                          ($comment_multi_cache_per_key[$open] >= $i ||
2894                                           $comment_multi_cache_per_key[$open] === false)) {
2895                                          // we have already matched something
2896                                          if ($comment_multi_cache_per_key[$open] === false) {
2897                                              // this comment is never matched
2898                                              continue;
2899                                          }
2900                                          $match_i = $comment_multi_cache_per_key[$open];
2901                                      } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2902                                          $comment_multi_cache_per_key[$open] = $match_i;
2903                                      } else {
2904                                          $comment_multi_cache_per_key[$open] = false;
2905                                          continue;
2906                                      }
2907                                      if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2908                                          $next_comment_multi_pos = $match_i;
2909                                          $next_open_comment_multi = $open;
2910                                          if ($match_i === $i) {
2911                                              break;
2912                                          }
2913                                      }
2914                                  }
2915                              }
2916                              if ($i == $next_comment_multi_pos) {
2917                                  $open = $next_open_comment_multi;
2918                                  $close = $this->language_data['COMMENT_MULTI'][$open];
2919                                  $open_strlen = strlen($open);
2920                                  $close_strlen = strlen($close);
2921                                  $COMMENT_MATCHED = true;
2922                                  $test_str_match = $open;
2923                                  //@todo If remove important do remove here
2924                                  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2925                                      $open == GESHI_START_IMPORTANT) {
2926                                      if ($open != GESHI_START_IMPORTANT) {
2927                                          if (!$this->use_classes) {
2928                                              $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2929                                          } else {
2930                                              $attributes = ' class="coMULTI"';
2931                                          }
2932                                          $test_str = "<span$attributes>" . $this->hsc($open);
2933                                      } else {
2934                                          if (!$this->use_classes) {
2935                                              $attributes = ' style="' . $this->important_styles . '"';
2936                                          } else {
2937                                              $attributes = ' class="imp"';
2938                                          }
2939  
2940                                          // We don't include the start of the comment if it's an
2941                                          // "important" part
2942                                          $test_str = "<span$attributes>";
2943                                      }
2944                                  } else {
2945                                      $test_str = $this->hsc($open);
2946                                  }
2947  
2948                                  $close_pos = strpos( $part, $close, $i + $open_strlen );
2949  
2950                                  if ($close_pos === false) {
2951                                      $close_pos = $length;
2952                                  }
2953  
2954                                  // Short-cut through all the multiline code
2955                                  $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2956                                  if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2957                                      $test_str_match == GESHI_START_IMPORTANT) &&
2958                                      $check_linenumbers) {
2959  
2960                                      // strreplace to put close span and open span around multiline newlines
2961                                      $test_str .= str_replace(
2962                                          "\n", "</span>\n<span$attributes>",
2963                                          str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2964                                      );
2965                                  } else {
2966                                      $test_str .= $rest_of_comment;
2967                                  }
2968  
2969                                  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2970                                      $test_str_match == GESHI_START_IMPORTANT) {
2971                                      $test_str .= '</span>';
2972                                  }
2973  
2974                                  $i = $close_pos + $close_strlen - 1;
2975  
2976                                  // parse the rest
2977                                  $result .= $this->parse_non_string_part($stuff_to_parse);
2978                                  $stuff_to_parse = '';
2979                              }
2980                          }
2981  
2982                          // If we haven't matched a multiline comment, try single-line comments
2983                          if (!$COMMENT_MATCHED) {
2984                              // cache potential single line comment occurances
2985                              if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2986                                  $next_comment_single_pos = $length;
2987                                  foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2988                                      $match_i = false;
2989                                      if (isset($comment_single_cache_per_key[$comment_key]) &&
2990                                          ($comment_single_cache_per_key[$comment_key] >= $i ||
2991                                           $comment_single_cache_per_key[$comment_key] === false)) {
2992                                          // we have already matched something
2993                                          if ($comment_single_cache_per_key[$comment_key] === false) {
2994                                              // this comment is never matched
2995                                              continue;
2996                                          }
2997                                          $match_i = $comment_single_cache_per_key[$comment_key];
2998                                      } elseif (
2999                                          // case sensitive comments
3000                                          ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3001                                          ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3002                                          // non case sensitive
3003                                          (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3004                                            (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3005                                          $comment_single_cache_per_key[$comment_key] = $match_i;
3006                                      } else {
3007                                          $comment_single_cache_per_key[$comment_key] = false;
3008                                          continue;
3009                                      }
3010                                      if ($match_i !== false && $match_i < $next_comment_single_pos) {
3011                                          $next_comment_single_pos = $match_i;
3012                                          $next_comment_single_key = $comment_key;
3013                                          if ($match_i === $i) {
3014                                              break;
3015                                          }
3016                                      }
3017                                  }
3018                              }
3019                              if ($next_comment_single_pos == $i) {
3020                                  $comment_key = $next_comment_single_key;
3021                                  $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3022                                  $com_len = strlen($comment_mark);
3023  
3024                                  // This check will find special variables like $# in bash
3025                                  // or compiler directives of Delphi beginning {$
3026                                  if ((empty($sc_disallowed_before) || ($i == 0) ||
3027                                      (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3028                                      (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3029                                      (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3030                                  {
3031                                      // this is a valid comment
3032                                      $COMMENT_MATCHED = true;
3033                                      if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3034                                          if (!$this->use_classes) {
3035                                              $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3036                                          } else {
3037                                              $attributes = ' class="co' . $comment_key . '"';
3038                                          }
3039                                          $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3040                                      } else {
3041                                          $test_str = $this->hsc($comment_mark);
3042                                      }
3043  
3044                                      //Check if this comment is the last in the source
3045                                      $close_pos = strpos($part, "\n", $i);
3046                                      $oops = false;
3047                                      if ($close_pos === false) {
3048                                          $close_pos = $length;
3049                                          $oops = true;
3050                                      }
3051                                      $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3052                                      if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3053                                          $test_str .= "</span>";
3054                                      }
3055  
3056                                      // Take into account that the comment might be the last in the source
3057                                      if (!$oops) {
3058                                        $test_str .= "\n";
3059                                      }
3060  
3061                                      $i = $close_pos;
3062  
3063                                      // parse the rest
3064                                      $result .= $this->parse_non_string_part($stuff_to_parse);
3065                                      $stuff_to_parse = '';
3066                                  }
3067                              }
3068                          }
3069                      }
3070  
3071                      // Where are we adding this char?
3072                      if (!$COMMENT_MATCHED) {
3073                          $stuff_to_parse .= $char;
3074                      } else {
3075                          $result .= $test_str;
3076                          unset($test_str);
3077                          $COMMENT_MATCHED = false;
3078                      }
3079                  }
3080                  // Parse the last bit
3081                  $result .= $this->parse_non_string_part($stuff_to_parse);
3082                  $stuff_to_parse = '';
3083              } else {
3084                  $result .= $this->hsc($part);
3085              }
3086              // Close the <span> that surrounds the block
3087              if ($STRICTATTRS != '') {
3088                  $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3089                  $result .= '</span>';
3090              }
3091  
3092              $endresult .= $result;
3093              unset($part, $parts[$key], $result);
3094          }
3095  
3096          //This fix is related to SF#1923020, but has to be applied regardless of
3097          //actually highlighting symbols.
3098          /** NOTE: memorypeak #3 */
3099          $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3100  
3101  //        // Parse the last stuff (redundant?)
3102  //        $result .= $this->parse_non_string_part($stuff_to_parse);
3103  
3104          // Lop off the very first and last spaces
3105  //        $result = substr($result, 1, -1);
3106  
3107          // We're finished: stop timing
3108          $this->set_time($start_time, microtime());
3109  
3110          $this->finalise($endresult);
3111          return $endresult;
3112      }
3113  
3114      /**
3115       * Swaps out spaces and tabs for HTML indentation. Not needed if
3116       * the code is in a pre block...
3117       *
3118       * @param  string $result The source to indent (reference!)
3119       * @since  1.0.0
3120       */
3121      protected function indent(&$result) {
3122          /// Replace tabs with the correct number of spaces
3123          if (false !== strpos($result, "\t")) {
3124              $lines = explode("\n", $result);
3125              $result = null;//Save memory while we process the lines individually
3126              $tab_width = $this->get_real_tab_width();
3127              $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3128  
3129              for ($key = 0, $n = count($lines); $key < $n; $key++) {
3130                  $line = $lines[$key];
3131                  if (false === strpos($line, "\t")) {
3132                      continue;
3133                  }
3134  
3135                  $pos = 0;
3136                  $length = strlen($line);
3137                  $lines[$key] = ''; // reduce memory
3138  
3139                  $IN_TAG = false;
3140                  for ($i = 0; $i < $length; ++$i) {
3141                      $char = $line[$i];
3142                      // Simple engine to work out whether we're in a tag.
3143                      // If we are we modify $pos. This is so we ignore HTML
3144                      // in the line and only workout the tab replacement
3145                      // via the actual content of the string
3146                      // This test could be improved to include strings in the
3147                      // html so that < or > would be allowed in user's styles
3148                      // (e.g. quotes: '<' '>'; or similar)
3149                      if ($IN_TAG) {
3150                          if ('>' == $char) {
3151                              $IN_TAG = false;
3152                          }
3153                          $lines[$key] .= $char;
3154                      } elseif ('<' == $char) {
3155                          $IN_TAG = true;
3156                          $lines[$key] .= '<';
3157                      } elseif ('&' == $char) {
3158                          $substr = substr($line, $i + 3, 5);
3159                          $posi = strpos($substr, ';');
3160                          if (false === $posi) {
3161                              ++$pos;
3162                          } else {
3163                              $pos -= $posi+2;
3164                          }
3165                          $lines[$key] .= $char;
3166                      } elseif ("\t" == $char) {
3167                          $str = '';
3168                          // OPTIMISE - move $strs out. Make an array:
3169                          // $tabs = array(
3170                          //  1 => '&nbsp;',
3171                          //  2 => '&nbsp; ',
3172                          //  3 => '&nbsp; &nbsp;' etc etc
3173                          // to use instead of building a string every time
3174                          $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3175                          if (($pos & 1) || 1 == $tab_end_width) {
3176                              $str .= substr($tab_string, 6, $tab_end_width);
3177                          } else {
3178                              $str .= substr($tab_string, 0, $tab_end_width+5);
3179                          }
3180                          $lines[$key] .= $str;
3181                          $pos += $tab_end_width;
3182  
3183                          if (false === strpos($line, "\t", $i + 1)) {
3184                              $lines[$key] .= substr($line, $i + 1);
3185                              break;
3186                          }
3187                      } elseif (0 == $pos && ' ' == $char) {
3188                          $lines[$key] .= '&nbsp;';
3189                          ++$pos;
3190                      } else {
3191                          $lines[$key] .= $char;
3192                          ++$pos;
3193                      }
3194                  }
3195              }
3196              $result = implode("\n", $lines);
3197              unset($lines);//We don't need the lines separated beyond this --- free them!
3198          }
3199          // Other whitespace
3200          // BenBE: Fix to reduce the number of replacements to be done
3201          $result = preg_replace('/^ /m', '&nbsp;', $result);
3202          $result = str_replace('  ', ' &nbsp;', $result);
3203  
3204          if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3205              if ($this->line_ending === null) {
3206                  $result = nl2br($result);
3207              } else {
3208                  $result = str_replace("\n", $this->line_ending, $result);
3209              }
3210          }
3211      }
3212  
3213      /**
3214       * Changes the case of a keyword for those languages where a change is asked for
3215       *
3216       * @param  string $instr The keyword to change the case of
3217       * @return string The keyword with its case changed
3218       * @since  1.0.0
3219       */
3220      protected function change_case($instr) {
3221          switch ($this->language_data['CASE_KEYWORDS']) {
3222              case GESHI_CAPS_UPPER:
3223                  return strtoupper($instr);
3224              case GESHI_CAPS_LOWER:
3225                  return strtolower($instr);
3226              default:
3227                  return $instr;
3228          }
3229      }
3230  
3231      /**
3232       * Handles replacements of keywords to include markup and links if requested
3233       *
3234       * @param  string $match The keyword to add the Markup to
3235       * @return string The HTML for the match found
3236       * @since  1.0.8
3237       *
3238       * @todo   Get rid of ender in keyword links
3239       */
3240      protected function handle_keyword_replace($match) {
3241          $k = $this->_kw_replace_group;
3242          $keyword = $match[0];
3243          $keyword_match = $match[1];
3244  
3245          $before = '';
3246          $after = '';
3247  
3248          if ($this->keyword_links) {
3249              // Keyword links have been ebabled
3250  
3251              if (isset($this->language_data['URLS'][$k]) &&
3252                  $this->language_data['URLS'][$k] != '') {
3253                  // There is a base group for this keyword
3254  
3255                  // Old system: strtolower
3256                  //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3257                  // New system: get keyword from language file to get correct case
3258                  if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3259                      strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3260                      foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3261                          if (strcasecmp($word, $keyword_match) == 0) {
3262                              break;
3263                          }
3264                      }
3265                  } else {
3266                      $word = $keyword_match;
3267                  }
3268  
3269                  $before = '<|UR1|"' .
3270                      str_replace(
3271                          array(
3272                              '{FNAME}',
3273                              '{FNAMEL}',
3274                              '{FNAMEU}',
3275                              '{FNAMEUF}',
3276                              '.'),
3277                          array(
3278                              str_replace('+', '%20', urlencode($this->hsc($word))),
3279                              str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3280                              str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3281                              str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3282                              '<DOT>'),
3283                          $this->language_data['URLS'][$k]
3284                      ) . '">';
3285                  $after = '</a>';
3286              }
3287          }
3288  
3289          return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3290      }
3291  
3292      /**
3293       * handles regular expressions highlighting-definitions with callback functions
3294       *
3295       * @note this is a callback, don't use it directly
3296       *
3297       * @param array $matches the matches array
3298       * @return string The highlighted string
3299       * @since 1.0.8
3300       */
3301      protected function handle_regexps_callback($matches) {
3302          // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3303          return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3304      }
3305  
3306      /**
3307       * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3308       *
3309       * @note this is a callback, don't use it directly
3310       *
3311       * @param array $matches the matches array
3312       * @return string
3313       * @since 1.0.8
3314       */
3315      protected function handle_multiline_regexps($matches) {
3316          $before = $this->_hmr_before;
3317          $after = $this->_hmr_after;
3318          if ($this->_hmr_replace) {
3319              $replace = $this->_hmr_replace;
3320              $search = array();
3321  
3322              foreach (array_keys($matches) as $k) {
3323                  $search[] = '\\' . $k;
3324              }
3325  
3326              $before = str_replace($search, $matches, $before);
3327              $after = str_replace($search, $matches, $after);
3328              $replace = str_replace($search, $matches, $replace);
3329          } else {
3330              $replace = $matches[0];
3331          }
3332          return $before
3333                      . '<|!REG3XP' . $this->_hmr_key .'!>'
3334                          . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3335                      . '|>'
3336                . $after;
3337      }
3338  
3339      /**
3340       * Takes a string that has no strings or comments in it, and highlights
3341       * stuff like keywords, numbers and methods.
3342       *
3343       * @param string $stuff_to_parse The string to parse for keyword, numbers etc.
3344       * @since 1.0.0
3345       * @todo BUGGY! Why? Why not build string and return?
3346       * @return string
3347       */
3348      protected function parse_non_string_part($stuff_to_parse) {
3349          $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3350  
3351          // Highlight keywords
3352          $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3353          $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3354          if ($this->lexic_permissions['STRINGS']) {
3355              $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3356              $disallowed_before .= $quotemarks;
3357              $disallowed_after .= $quotemarks;
3358          }
3359          $disallowed_before .= "])";
3360          $disallowed_after .= "])";
3361  
3362          $parser_control_pergroup = false;
3363          if (isset($this->language_data['PARSER_CONTROL'])) {
3364              if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3365                  $x = 0; // check wether per-keyword-group parser_control is enabled
3366                  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3367                      $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3368                      ++$x;
3369                  }
3370                  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3371                      $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3372                      ++$x;
3373                  }
3374                  $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3375              }
3376          }
3377  
3378          foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3379              if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3380                  $this->lexic_permissions['KEYWORDS'][$k]) {
3381  
3382                  $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3383                  $modifiers = $case_sensitive ? '' : 'i';
3384  
3385                  // NEW in 1.0.8 - per-keyword-group parser control
3386                  $disallowed_before_local = $disallowed_before;
3387                  $disallowed_after_local = $disallowed_after;
3388                  if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3389                      if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3390                          $disallowed_before_local =
3391                              $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3392                      }
3393  
3394                      if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3395                          $disallowed_after_local =
3396                              $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3397                      }
3398                  }
3399  
3400                  $this->_kw_replace_group = $k;
3401  
3402                  //NEW in 1.0.8, the cached regexp list
3403                  // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3404                  for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3405                      $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3406                      // Might make a more unique string for putting the number in soon
3407                      // Basically, we don't put the styles in yet because then the styles themselves will
3408                      // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3409                      $stuff_to_parse = preg_replace_callback(
3410                          "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3411                          array($this, 'handle_keyword_replace'),
3412                          $stuff_to_parse
3413                          );
3414                  }
3415              }
3416          }
3417  
3418          // Regular expressions
3419          foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3420              if ($this->lexic_permissions['REGEXPS'][$key]) {
3421                  if (is_array($regexp)) {
3422                      if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3423                          // produce valid HTML when we match multiple lines
3424                          $this->_hmr_replace = $regexp[GESHI_REPLACE];
3425                          $this->_hmr_before = $regexp[GESHI_BEFORE];
3426                          $this->_hmr_key = $key;
3427                          $this->_hmr_after = $regexp[GESHI_AFTER];
3428                          $stuff_to_parse = preg_replace_callback(
3429                              "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3430                              array($this, 'handle_multiline_regexps'),
3431                              $stuff_to_parse);
3432                          $this->_hmr_replace = false;
3433                          $this->_hmr_before = '';
3434                          $this->_hmr_after = '';
3435                      } else {
3436                          $stuff_to_parse = preg_replace(
3437                              '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3438                              $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3439                              $stuff_to_parse);
3440                      }
3441                  } else {
3442                      if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3443                          // produce valid HTML when we match multiple lines
3444                          $this->_hmr_key = $key;
3445                          $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3446                                                array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3447                          $this->_hmr_key = '';
3448                      } else {
3449                          $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3450                      }
3451                  }
3452              }
3453          }
3454  
3455          // Highlight numbers. As of 1.0.8 we support different types of numbers
3456          $numbers_found = false;
3457  
3458          if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3459              $numbers_found = true;
3460  
3461              //For each of the formats ...
3462              foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3463                  //Check if it should be highlighted ...
3464                  $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3465              }
3466          }
3467  
3468          //
3469          // Now that's all done, replace /[number]/ with the correct styles
3470          //
3471          foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3472              if (!$this->use_classes) {
3473                  $attributes = ' style="' .
3474                      (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3475                      $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3476              } else {
3477                  $attributes = ' class="kw' . $k . '"';
3478              }
3479              $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3480          }
3481  
3482          if ($numbers_found) {
3483              // Put number styles in
3484              foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3485                  //Commented out for now, as this needs some review ...
3486                  //                if ($numbers_permissions & $id) {
3487                  //Get the appropriate style ...
3488                  //Checking for unset styles is done by the style cache builder ...
3489                  if (!$this->use_classes) {
3490                      $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3491                  } else {
3492                      $attributes = ' class="nu'.$id.'"';
3493                  }
3494  
3495                  //Set in the correct styles ...
3496                  $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3497                  //                }
3498              }
3499          }
3500  
3501          // Highlight methods and fields in objects
3502          if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3503              $oolang_spaces = "[\s]*";
3504              $oolang_before = "";
3505              $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3506              if (isset($this->language_data['PARSER_CONTROL'])) {
3507                  if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3508                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3509                          $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3510                      }
3511                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3512                          $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3513                      }
3514                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3515                          $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3516                      }
3517                  }
3518              }
3519  
3520              foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3521                  if (false !== strpos($stuff_to_parse, $splitter)) {
3522                      if (!$this->use_classes) {
3523                          $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3524                      } else {
3525                          $attributes = ' class="me' . $key . '"';
3526                      }
3527                      $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3528                  }
3529              }
3530          }
3531  
3532          //
3533          // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3534          // You try it, and see what happens ;)
3535          // TODO: Fix lexic permissions not converting entities if shouldn't
3536          // be highlighting regardless
3537          //
3538          if ($this->lexic_permissions['BRACKETS']) {
3539              $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3540                                $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3541          }
3542  
3543  
3544          //FIX for symbol highlighting ...
3545          if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3546              //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3547              $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3548              $global_offset = 0;
3549              for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3550                  $symbol_match = $pot_symbols[$s_id][0][0];
3551                  if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3552                      // already highlighted blocks _must_ include either < or >
3553                      // so if this conditional applies, we have to skip this match
3554                      // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3555                      if(strpos($symbol_match, '<SEMI>') === false &&
3556                          strpos($symbol_match, '<PIPE>') === false) {
3557                          continue;
3558                      }
3559                  }
3560  
3561                  // if we reach this point, we have a valid match which needs to be highlighted
3562  
3563                  $symbol_length = strlen($symbol_match);
3564                  $symbol_offset = $pot_symbols[$s_id][0][1];
3565                  unset($pot_symbols[$s_id]);
3566                  $symbol_hl = "";
3567  
3568                  // if we have multiple styles, we have to handle them properly
3569                  if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3570                      $old_sym = -1;
3571                      // Split the current stuff to replace into its atomic symbols ...
3572                      preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3573                      foreach ($sym_match_syms[0] as $sym_ms) {
3574                          //Check if consequtive symbols belong to the same group to save output ...
3575                          if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3576                              && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3577                              if (-1 != $old_sym) {
3578                                  $symbol_hl .= "|>";
3579                              }
3580                              $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3581                              if (!$this->use_classes) {
3582                                  $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3583                              } else {
3584                                  $symbol_hl .= '<| class="sy' . $old_sym . '">';
3585                              }
3586                          }
3587                          $symbol_hl .= $sym_ms;
3588                      }
3589                      unset($sym_match_syms);
3590  
3591                      //Close remaining tags and insert the replacement at the right position ...
3592                      //Take caution if symbol_hl is empty to avoid doubled closing spans.
3593                      if (-1 != $old_sym) {
3594                          $symbol_hl .= "|>";
3595                      }
3596                  } else {
3597                      if (!$this->use_classes) {
3598                          $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3599                      } else {
3600                          $symbol_hl = '<| class="sy0">';
3601                      }
3602                      $symbol_hl .= $symbol_match . '|>';
3603                  }
3604  
3605                  $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3606  
3607                  // since we replace old text with something of different size,
3608                  // we'll have to keep track of the differences
3609                  $global_offset += strlen($symbol_hl) - $symbol_length;
3610              }
3611          }
3612          //FIX for symbol highlighting ...
3613  
3614          // Add class/style for regexps
3615          foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3616              if ($this->lexic_permissions['REGEXPS'][$key]) {
3617                  if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3618                      $this->_rx_key = $key;
3619                      $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3620                          array($this, 'handle_regexps_callback'),
3621                          $stuff_to_parse);
3622                  } else {
3623                      if (!$this->use_classes) {
3624                          $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3625                      } else {
3626                          if (is_array($this->language_data['REGEXPS'][$key]) &&
3627                              array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3628                              $attributes = ' class="' .
3629                                  $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3630                          } else {
3631                             $attributes = ' class="re' . $key . '"';
3632                          }
3633                      }
3634                      $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3635                  }
3636              }
3637          }
3638  
3639          // Replace <DOT> with . for urls
3640          $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3641          // Replace <|UR1| with <a href= for urls also
3642          if (isset($this->link_styles[GESHI_LINK])) {
3643              if ($this->use_classes) {
3644                  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3645              } else {
3646                  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3647              }
3648          } else {
3649              $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3650          }
3651  
3652          //
3653          // NOW we add the span thingy ;)
3654          //
3655  
3656          $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3657          $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3658          return substr($stuff_to_parse, 1);
3659      }
3660  
3661      /**
3662       * Sets the time taken to parse the code
3663       *
3664       * @param string $start_time The time when parsing started as returned by @see microtime()
3665       * @param string $end_time   The time when parsing ended as returned by @see microtime()
3666       * @since 1.0.2
3667       */
3668      protected function set_time($start_time, $end_time) {
3669          $start = explode(' ', $start_time);
3670          $end = explode(' ', $end_time);
3671          $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3672      }
3673  
3674      /**
3675       * Gets the time taken to parse the code
3676       *
3677       * @return double The time taken to parse the code
3678       * @since  1.0.2
3679       */
3680      public function get_time() {
3681          return $this->time;
3682      }
3683  
3684      /**
3685       * Merges arrays recursively, overwriting values of the first array with values of later arrays
3686       *
3687       * @since 1.0.8
3688       */
3689      protected function merge_arrays() {
3690          $arrays = func_get_args();
3691          $narrays = count($arrays);
3692  
3693          // check arguments
3694          // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3695          for ($i = 0; $i < $narrays; $i ++) {
3696              if (!is_array($arrays[$i])) {
3697                  // also array_merge_recursive returns nothing in this case
3698                  trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3699                  return false;
3700              }
3701          }
3702  
3703          // the first array is in the output set in every case
3704          $ret = $arrays[0];
3705  
3706          // merege $ret with the remaining arrays
3707          for ($i = 1; $i < $narrays; $i ++) {
3708              foreach ($arrays[$i] as $key => $value) {
3709                  if (is_array($value) && isset($ret[$key])) {
3710                      // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3711                      // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3712                      $ret[$key] = $this->merge_arrays($ret[$key], $value);
3713                  } else {
3714                      $ret[$key] = $value;
3715                  }
3716              }
3717          }
3718  
3719          return $ret;
3720      }
3721  
3722      /**
3723       * Gets language information and stores it for later use
3724       *
3725       * @param string $file_name The filename of the language file you want to load
3726       * @since 1.0.0
3727       * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3728       */
3729      protected function load_language($file_name) {
3730          if ($file_name == $this->loaded_language) {
3731              // this file is already loaded!
3732              return;
3733          }
3734  
3735          //Prepare some stuff before actually loading the language file
3736          $this->loaded_language = $file_name;
3737          $this->parse_cache_built = false;
3738          $this->enable_highlighting();
3739          $language_data = array();
3740  
3741          //Load the language file
3742          require $file_name;
3743  
3744          // Perhaps some checking might be added here later to check that
3745          // $language data is a valid thing but maybe not
3746          $this->language_data = $language_data;
3747  
3748          // Set strict mode if should be set
3749          $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3750  
3751          // Set permissions for all lexics to true
3752          // so they'll be highlighted by default
3753          foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3754              if (!empty($this->language_data['KEYWORDS'][$key])) {
3755                  $this->lexic_permissions['KEYWORDS'][$key] = true;
3756              } else {
3757                  $this->lexic_permissions['KEYWORDS'][$key] = false;
3758              }
3759          }
3760  
3761          foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3762              $this->lexic_permissions['COMMENTS'][$key] = true;
3763          }
3764          foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3765              $this->lexic_permissions['REGEXPS'][$key] = true;
3766          }
3767  
3768          // for BenBE and future code reviews:
3769          // we can use empty here since we only check for existance and emptiness of an array
3770          // if it is not an array at all but rather false or null this will work as intended as well
3771          // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3772          if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3773              foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3774                  // it's either true or false and maybe is true as well
3775                  $perm = $value !== GESHI_NEVER;
3776                  if ($flag == 'ALL') {
3777                      $this->enable_highlighting($perm);
3778                      continue;
3779                  }
3780                  if (!isset($this->lexic_permissions[$flag])) {
3781                      // unknown lexic permission
3782                      continue;
3783                  }
3784                  if (is_array($this->lexic_permissions[$flag])) {
3785                      foreach ($this->lexic_permissions[$flag] as $key => $val) {
3786                          $this->lexic_permissions[$flag][$key] = $perm;
3787                      }
3788                  } else {
3789                      $this->lexic_permissions[$flag] = $perm;
3790                  }
3791              }
3792              unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3793          }
3794  
3795          //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3796          //You need to set one for HARDESCAPES only in this case.
3797          if(!isset($this->language_data['HARDCHAR'])) {
3798              $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3799          }
3800  
3801          //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3802          $style_filename = substr($file_name, 0, -4) . '.style.php';
3803          if (is_readable($style_filename)) {
3804              //Clear any style_data that could have been set before ...
3805              if (isset($style_data)) {
3806                  unset($style_data);
3807              }
3808  
3809              //Read the Style Information from the style file
3810              include $style_filename;
3811  
3812              //Apply the new styles to our current language styles
3813              if (isset($style_data) && is_array($style_data)) {
3814                  $this->language_data['STYLES'] =
3815                      $this->merge_arrays($this->language_data['STYLES'], $style_data);
3816              }
3817          }
3818      }
3819  
3820      /**
3821       * Takes the parsed code and various options, and creates the HTML
3822       * surrounding it to make it look nice.
3823       *
3824       * @param  string $parsed_code The code already parsed (reference!)
3825       * @since  1.0.0
3826       */
3827      protected function finalise(&$parsed_code) {
3828          // Remove end parts of important declarations
3829          // This is BUGGY!! My fault for bad code: fix coming in 1.2
3830          // @todo Remove this crap
3831          if ($this->enable_important_blocks &&
3832              (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3833              $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3834          }
3835  
3836          // Add HTML whitespace stuff if we're using the <div> header
3837          if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3838              $this->indent($parsed_code);
3839          }
3840  
3841          // purge some unnecessary stuff
3842          /** NOTE: memorypeak #1 */
3843          $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3844  
3845          // If we are using IDs for line numbers, there needs to be an overall
3846          // ID set to prevent collisions.
3847          if ($this->add_ids && !$this->overall_id) {
3848              $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3849          }
3850  
3851          // Get code into lines
3852          /** NOTE: memorypeak #2 */
3853          $code = explode("\n", $parsed_code);
3854          $parsed_code = $this->header();
3855  
3856          // If we're using line numbers, we insert <li>s and appropriate
3857          // markup to style them (otherwise we don't need to do anything)
3858          if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3859              // If we're using the <pre> header, we shouldn't add newlines because
3860              // the <pre> will line-break them (and the <li>s already do this for us)
3861              $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3862  
3863              // Foreach line...
3864              for ($i = 0, $n = count($code); $i < $n;) {
3865                  //Reset the attributes for a new line ...
3866                  $attrs = array();
3867  
3868                  // Make lines have at least one space in them if they're empty
3869                  // BenBE: Checking emptiness using trim instead of relying on blanks
3870                  if ('' == trim($code[$i])) {
3871                      $code[$i] = '&nbsp;';
3872                  }
3873  
3874                  // If this is a "special line"...
3875                  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3876                      $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3877                      // Set the attributes to style the line
3878                      if ($this->use_classes) {
3879                          //$attr = ' class="li2"';
3880                          $attrs['class'][] = 'li2';
3881                          $def_attr = ' class="de2"';
3882                      } else {
3883                          //$attr = ' style="' . $this->line_style2 . '"';
3884                          $attrs['style'][] = $this->line_style2;
3885                          // This style "covers up" the special styles set for special lines
3886                          // so that styles applied to special lines don't apply to the actual
3887                          // code on that line
3888                          $def_attr = ' style="' . $this->code_style . '"';
3889                      }
3890                  } else {
3891                      if ($this->use_classes) {
3892                          //$attr = ' class="li1"';
3893                          $attrs['class'][] = 'li1';
3894                          $def_attr = ' class="de1"';
3895                      } else {
3896                          //$attr = ' style="' . $this->line_style1 . '"';
3897                          $attrs['style'][] = $this->line_style1;
3898                          $def_attr = ' style="' . $this->code_style . '"';
3899                      }
3900                  }
3901  
3902                  //Check which type of tag to insert for this line
3903                  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3904                      $start = "<pre$def_attr>";
3905                      $end = '</pre>';
3906                  } else {
3907                      // Span or div?
3908                      $start = "<div$def_attr>";
3909                      $end = '</div>';
3910                  }
3911  
3912                  ++$i;
3913  
3914                  // Are we supposed to use ids? If so, add them
3915                  if ($this->add_ids) {
3916                      $attrs['id'][] = "$this->overall_id-$i";
3917                  }
3918  
3919                  //Is this some line with extra styles???
3920                  if (in_array($i, $this->highlight_extra_lines)) {
3921                      if ($this->use_classes) {
3922                          if (isset($this->highlight_extra_lines_styles[$i])) {
3923                              $attrs['class'][] = "lx$i";
3924                          } else {
3925                              $attrs['class'][] = "ln-xtra";
3926                          }
3927                      } else {
3928                          array_push($attrs['style'], $this->get_line_style($i));
3929                      }
3930                  }
3931  
3932                  // Add in the line surrounded by appropriate list HTML
3933                  $attr_string = '';
3934                  foreach ($attrs as $key => $attr) {
3935                      $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3936                  }
3937  
3938                  $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3939                  unset($code[$i - 1]);
3940              }
3941          } else {
3942              $n = count($code);
3943              if ($this->use_classes) {
3944                  $attributes = ' class="de1"';
3945              } else {
3946                  $attributes = ' style="'. $this->code_style .'"';
3947              }
3948              if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3949                  $parsed_code .= '<pre'. $attributes .'>';
3950              } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3951                  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952                      if ($this->use_classes) {
3953                          $attrs = ' class="ln"';
3954                      } else {
3955                          $attrs = ' style="'. $this->table_linenumber_style .'"';
3956                      }
3957                      $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3958                      // get linenumbers
3959                      // we don't merge it with the for below, since it should be better for
3960                      // memory consumption this way
3961                      // @todo: but... actually it would still be somewhat nice to merge the two loops
3962                      //        the mem peaks are at different positions
3963                      for ($i = 0; $i < $n; ++$i) {
3964                          $close = 0;
3965                          // fancy lines
3966                          if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3967                              $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3968                              // Set the attributes to style the line
3969                              if ($this->use_classes) {
3970                                  $parsed_code .= '<span class="xtra li2"><span class="de2">';
3971                              } else {
3972                                  // This style "covers up" the special styles set for special lines
3973                                  // so that styles applied to special lines don't apply to the actual
3974                                  // code on that line
3975                                  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3976                                                    .'<span style="' . $this->code_style .'">';
3977                              }
3978                              $close += 2;
3979                          }
3980                          //Is this some line with extra styles???
3981                          if (in_array($i + 1, $this->highlight_extra_lines)) {
3982                              if ($this->use_classes) {
3983                                  if (isset($this->highlight_extra_lines_styles[$i])) {
3984                                      $parsed_code .= "<span class=\"xtra lx$i\">";
3985                                  } else {
3986                                      $parsed_code .= "<span class=\"xtra ln-xtra\">";
3987                                  }
3988                              } else {
3989                                  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3990                              }
3991                              ++$close;
3992                          }
3993                          $parsed_code .= $this->line_numbers_start + $i;
3994                          if ($close) {
3995                              $parsed_code .= str_repeat('</span>', $close);
3996                          } elseif ($i != $n) {
3997                              $parsed_code .= "\n";
3998                          }
3999                      }
4000                      $parsed_code .= '</pre></td><td'.$attributes.'>';
4001                  }
4002                  $parsed_code .= '<pre'. $attributes .'>';
4003              }
4004              // No line numbers, but still need to handle highlighting lines extra.
4005              // Have to use divs so the full width of the code is highlighted
4006              $close = 0;
4007              for ($i = 0; $i < $n; ++$i) {
4008                  // Make lines have at least one space in them if they're empty
4009                  // BenBE: Checking emptiness using trim instead of relying on blanks
4010                  if ('' == trim($code[$i])) {
4011                      $code[$i] = '&nbsp;';
4012                  }
4013                  // fancy lines
4014                  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4015                      $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4016                      // Set the attributes to style the line
4017                      if ($this->use_classes) {
4018                          $parsed_code .= '<span class="xtra li2"><span class="de2">';
4019                      } else {
4020                          // This style "covers up" the special styles set for special lines
4021                          // so that styles applied to special lines don't apply to the actual
4022                          // code on that line
4023                          $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4024                                            .'<span style="' . $this->code_style .'">';
4025                      }
4026                      $close += 2;
4027                  }
4028                  //Is this some line with extra styles???
4029                  if (in_array($i + 1, $this->highlight_extra_lines)) {
4030                      if ($this->use_classes) {
4031                          if (isset($this->highlight_extra_lines_styles[$i])) {
4032                              $parsed_code .= "<span class=\"xtra lx$i\">";
4033                          } else {
4034                              $parsed_code .= "<span class=\"xtra ln-xtra\">";
4035                          }
4036                      } else {
4037                          $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4038                      }
4039                      ++$close;
4040                  }
4041  
4042                  $parsed_code .= $code[$i];
4043  
4044                  if ($close) {
4045                    $parsed_code .= str_repeat('</span>', $close);
4046                    $close = 0;
4047                  }
4048                  if ($i + 1 < $n) {
4049                      $parsed_code .= "\n";
4050                  }
4051                  unset($code[$i]);
4052              }
4053  
4054              if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4055                  $parsed_code .= '</pre>';
4056              }
4057              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4058                  $parsed_code .= '</td>';
4059              }
4060          }
4061  
4062          $parsed_code .= $this->footer();
4063      }
4064  
4065      /**
4066       * Creates the header for the code block (with correct attributes)
4067       *
4068       * @return string The header for the code block
4069       * @since  1.0.0
4070       */
4071      protected function header() {
4072          // Get attributes needed
4073          /**
4074           * @todo   Document behaviour change - class is outputted regardless of whether
4075           *         we're using classes or not. Same with style
4076           */
4077          $attributes = ' class="' . $this->_genCSSName($this->language);
4078          if ($this->overall_class != '') {
4079              $attributes .= " ".$this->_genCSSName($this->overall_class);
4080          }
4081          $attributes .= '"';
4082  
4083          if ($this->overall_id != '') {
4084              $attributes .= " id=\"{$this->overall_id}\"";
4085          }
4086          if ($this->overall_style != '' && !$this->use_classes) {
4087              $attributes .= ' style="' . $this->overall_style . '"';
4088          }
4089  
4090          $ol_attributes = '';
4091  
4092          if ($this->line_numbers_start != 1) {
4093              $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4094          }
4095  
4096          // Get the header HTML
4097          $header = $this->header_content;
4098          if ($header) {
4099              if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4100                  $header = str_replace("\n", '', $header);
4101              }
4102              $header = $this->replace_keywords($header);
4103  
4104              if ($this->use_classes) {
4105                  $attr = ' class="head"';
4106              } else {
4107                  $attr = " style=\"{$this->header_content_style}\"";
4108              }
4109              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4110                  $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4111              } else {
4112                  $header = "<div$attr>$header</div>";
4113              }
4114          }
4115  
4116          if (GESHI_HEADER_NONE == $this->header_type) {
4117              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4118                  return "$header<ol$attributes$ol_attributes>";
4119              }
4120              return $header . ($this->force_code_block ? '<div>' : '');
4121          }
4122  
4123          // Work out what to return and do it
4124          if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4125              if ($this->header_type == GESHI_HEADER_PRE) {
4126                  return "<pre$attributes>$header<ol$ol_attributes>";
4127              } elseif ($this->header_type == GESHI_HEADER_DIV ||
4128                  $this->header_type == GESHI_HEADER_PRE_VALID) {
4129                  return "<div$attributes>$header<ol$ol_attributes>";
4130              } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4131                  return "<table$attributes>$header<tbody><tr class=\"li1\">";
4132              }
4133          } else {
4134              if ($this->header_type == GESHI_HEADER_PRE) {
4135                  return "<pre$attributes>$header"  .
4136                      ($this->force_code_block ? '<div>' : '');
4137              } else {
4138                  return "<div$attributes>$header" .
4139                      ($this->force_code_block ? '<div>' : '');
4140              }
4141          }
4142      }
4143  
4144      /**
4145       * Returns the footer for the code block.
4146       *
4147       * @return string The footer for the code block
4148       * @since  1.0.0
4149       */
4150      protected function footer() {
4151          $footer = $this->footer_content;
4152          if ($footer) {
4153              if ($this->header_type == GESHI_HEADER_PRE) {
4154                  $footer = str_replace("\n", '', $footer);;
4155              }
4156              $footer = $this->replace_keywords($footer);
4157  
4158              if ($this->use_classes) {
4159                  $attr = ' class="foot"';
4160              } else {
4161                  $attr = " style=\"{$this->footer_content_style}\"";
4162              }
4163              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4164                  $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4165              } else {
4166                  $footer = "<div$attr>$footer</div>";
4167              }
4168          }
4169  
4170          if (GESHI_HEADER_NONE == $this->header_type) {
4171              return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4172          }
4173  
4174          if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4175              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4176                  return "</ol>$footer</div>";
4177              }
4178              return ($this->force_code_block ? '</div>' : '') .
4179                  "$footer</div>";
4180          }
4181          elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4182              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4183                  return "</tr></tbody>$footer</table>";
4184              }
4185              return ($this->force_code_block ? '</div>' : '') .
4186                  "$footer</div>";
4187          }
4188          else {
4189              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4190                  return "</ol>$footer</pre>";
4191              }
4192              return ($this->force_code_block ? '</div>' : '') .
4193                  "$footer</pre>";
4194          }
4195      }
4196  
4197      /**
4198       * Replaces certain keywords in the header and footer with
4199       * certain configuration values
4200       *
4201       * @param  string $instr The header or footer content to do replacement on
4202       * @return string The header or footer with replaced keywords
4203       * @since  1.0.2
4204       */
4205      protected function replace_keywords($instr) {
4206          $keywords = $replacements = array();
4207  
4208          $keywords[] = '<TIME>';
4209          $keywords[] = '{TIME}';
4210          $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4211  
4212          $keywords[] = '<LANGUAGE>';
4213          $keywords[] = '{LANGUAGE}';
4214          $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4215  
4216          $keywords[] = '<VERSION>';
4217          $keywords[] = '{VERSION}';
4218          $replacements[] = $replacements[] = GESHI_VERSION;
4219  
4220          $keywords[] = '<SPEED>';
4221          $keywords[] = '{SPEED}';
4222          if ($time <= 0) {
4223              $speed = 'N/A';
4224          } else {
4225              $speed = strlen($this->source) / $time;
4226              if ($speed >= 1024) {
4227                  $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4228              } else {
4229                  $speed = sprintf("%.0f B/s", $speed);
4230              }
4231          }
4232          $replacements[] = $replacements[] = $speed;
4233  
4234          return str_replace($keywords, $replacements, $instr);
4235      }
4236  
4237      /**
4238       * Secure replacement for PHP built-in function htmlspecialchars().
4239       *
4240       * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4241       * for this replacement function.
4242       *
4243       * The INTERFACE for this function is almost the same as that for
4244       * htmlspecialchars(), with the same default for quote style; however, there
4245       * is no 'charset' parameter. The reason for this is as follows:
4246       *
4247       * The PHP docs say:
4248       *      "The third argument charset defines character set used in conversion."
4249       *
4250       * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4251       * thus _needs_ to know (or asssume) a character set because the special
4252       * characters to be replaced could exist at different code points in
4253       * different character sets. (If indeed htmlspecialchars() works at
4254       * byte-value level that goes some  way towards explaining why the
4255       * vulnerability would exist in this function, too, and not only in
4256       * htmlentities() which certainly is working at byte-value level.)
4257       *
4258       * This replacement function however works at character level and should
4259       * therefore be "immune" to character set differences - so no charset
4260       * parameter is needed or provided. If a third parameter is passed, it will
4261       * be silently ignored.
4262       *
4263       * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4264       * of PHP's '&#039;' for a single quote: this provides compatibility with
4265       *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4266       * (see comment by mikiwoz at yahoo dot co dot uk on
4267       * http://php.net/htmlspecialchars); it also matches the entity definition
4268       * for XML 1.0
4269       * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4270       * Like PHP we use a numeric character reference instead of '&apos;' for the
4271       * single quote. For the other special characters we use the named entity
4272       * references, as PHP is doing.
4273       *
4274       * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4275       *
4276       * @license     http://www.gnu.org/copyleft/lgpl.html
4277       *              GNU Lesser General Public License
4278       * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4279       *              Wikka Development Team}
4280       *
4281       * @param       string  $string string to be converted
4282       * @param       integer $quote_style
4283       *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4284       *                      - ENT_NOQUOTES: escapes only &, < and >
4285       *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4286       * @return      string  converted string
4287       * @since       1.0.7.18
4288       */
4289      protected function hsc($string, $quote_style = ENT_COMPAT) {
4290          // init
4291          static $aTransSpecchar = array(
4292              '&' => '&amp;',
4293              '"' => '&quot;',
4294              '<' => '&lt;',
4295              '>' => '&gt;',
4296  
4297              //This fix is related to SF#1923020, but has to be applied
4298              //regardless of actually highlighting symbols.
4299  
4300              //Circumvent a bug with symbol highlighting
4301              //This is required as ; would produce undesirable side-effects if it
4302              //was not to be processed as an entity.
4303              ';' => '<SEMI>', // Force ; to be processed as entity
4304              '|' => '<PIPE>' // Force | to be processed as entity
4305              );                      // ENT_COMPAT set
4306  
4307          switch ($quote_style) {
4308              case ENT_NOQUOTES: // don't convert double quotes
4309                  unset($aTransSpecchar['"']);
4310                  break;
4311              case ENT_QUOTES: // convert single quotes as well
4312                  $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4313                  break;
4314          }
4315  
4316          // return translated string
4317          return strtr($string, $aTransSpecchar);
4318      }
4319  
4320      /**
4321       * Generate a CSS class name from a given string.
4322       * Prevents invalid CSS classes.
4323       *
4324       * @param string $name Proposed class name
4325       *
4326       * @return string Safe CSS class name
4327       */
4328      protected function _genCSSName($name) {
4329          return (is_numeric($name[0]) ? '_' : '') . $name;
4330      }
4331  
4332      /**
4333       * Returns a stylesheet for the highlighted code. If $economy mode
4334       * is true, we only return the stylesheet declarations that matter for
4335       * this code block instead of the whole thing
4336       *
4337       * @param  boolean $economy_mode Whether to use economy mode or not
4338       * @return string A stylesheet built on the data for the current language
4339       * @since  1.0.0
4340       */
4341      public function get_stylesheet($economy_mode = true) {
4342          // If there's an error, chances are that the language file
4343          // won't have populated the language data file, so we can't
4344          // risk getting a stylesheet...
4345          if ($this->error) {
4346              return '';
4347          }
4348  
4349          //Check if the style rearrangements have been processed ...
4350          //This also does some preprocessing to check which style groups are useable ...
4351          if(!isset($this->language_data['NUMBERS_CACHE'])) {
4352              $this->build_style_cache();
4353          }
4354  
4355          // First, work out what the selector should be. If there's an ID,
4356          // that should be used, the same for a class. Otherwise, a selector
4357          // of '' means that these styles will be applied anywhere
4358          if ($this->overall_id) {
4359              $selector = '#' . $this->_genCSSName($this->overall_id);
4360          } else {
4361              $selector = '.' . $this->_genCSSName($this->language);
4362              if ($this->overall_class) {
4363                  $selector .= '.' . $this->_genCSSName($this->overall_class);
4364              }
4365          }
4366          $selector .= ' ';
4367  
4368          // Header of the stylesheet
4369          if (!$economy_mode) {
4370              $stylesheet = "/**\n".
4371                  " * GeSHi Dynamically Generated Stylesheet\n".
4372                  " * --------------------------------------\n".
4373                  " * Dynamically generated stylesheet for {$this->language}\n".
4374                  " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4375                  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4376                  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4377                  " * --------------------------------------\n".
4378                  " */\n";
4379          } else {
4380              $stylesheet = "/**\n".
4381                  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4382                  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4383                  " */\n";
4384          }
4385  
4386          // Set the <ol> to have no effect at all if there are line numbers
4387          // (<ol>s have margins that should be destroyed so all layout is
4388          // controlled by the set_overall_style method, which works on the
4389          // <pre> or <div> container). Additionally, set default styles for lines
4390          if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4391              //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4392              $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4393          }
4394  
4395          // Add overall styles
4396          // note: neglect economy_mode, empty styles are meaningless
4397          if ($this->overall_style != '') {
4398              $stylesheet .= "$selector {{$this->overall_style}}\n";
4399          }
4400  
4401          // Add styles for links
4402          // note: economy mode does not make _any_ sense here
4403          //       either the style is empty and thus no selector is needed
4404          //       or the appropriate key is given.
4405          foreach ($this->link_styles as $key => $style) {
4406              if ($style != '') {
4407                  switch ($key) {
4408                      case GESHI_LINK:
4409                          $stylesheet .= "{$selector}a:link {{$style}}\n";
4410                          break;
4411                      case GESHI_HOVER:
4412                          $stylesheet .= "{$selector}a:hover {{$style}}\n";
4413                          break;
4414                      case GESHI_ACTIVE:
4415                          $stylesheet .= "{$selector}a:active {{$style}}\n";
4416                          break;
4417                      case GESHI_VISITED:
4418                          $stylesheet .= "{$selector}a:visited {{$style}}\n";
4419                          break;
4420                  }
4421              }
4422          }
4423  
4424          // Header and footer
4425          // note: neglect economy_mode, empty styles are meaningless
4426          if ($this->header_content_style != '') {
4427              $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4428          }
4429          if ($this->footer_content_style != '') {
4430              $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4431          }
4432  
4433          // Styles for important stuff
4434          // note: neglect economy_mode, empty styles are meaningless
4435          if ($this->important_styles != '') {
4436              $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4437          }
4438  
4439          // Simple line number styles
4440          if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4441              $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4442          }
4443          if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4444              $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4445          }
4446          // If there is a style set for fancy line numbers, echo it out
4447          if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4448              $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4449          }
4450  
4451          // note: empty styles are meaningless
4452          foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4453              if ($styles != '' && (!$economy_mode ||
4454                  (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4455                  $this->lexic_permissions['KEYWORDS'][$group]))) {
4456                  $stylesheet .= "$selector.kw$group {{$styles}}\n";
4457              }
4458          }
4459          foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4460              if ($styles != '' && (!$economy_mode ||
4461                  (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4462                  $this->lexic_permissions['COMMENTS'][$group]) ||
4463                  (!empty($this->language_data['COMMENT_REGEXP']) &&
4464                  !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4465                  $stylesheet .= "$selector.co$group {{$styles}}\n";
4466              }
4467          }
4468          foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4469              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4470                  // NEW: since 1.0.8 we have to handle hardescapes
4471                  if ($group === 'HARD') {
4472                      $group = '_h';
4473                  }
4474                  $stylesheet .= "$selector.es$group {{$styles}}\n";
4475              }
4476          }
4477          foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4478              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4479                  $stylesheet .= "$selector.br$group {{$styles}}\n";
4480              }
4481          }
4482          foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4483              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4484                  $stylesheet .= "$selector.sy$group {{$styles}}\n";
4485              }
4486          }
4487          foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4488              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4489                  // NEW: since 1.0.8 we have to handle hardquotes
4490                  if ($group === 'HARD') {
4491                      $group = '_h';
4492                  }
4493                  $stylesheet .= "$selector.st$group {{$styles}}\n";
4494              }
4495          }
4496          foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4497              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4498                  $stylesheet .= "$selector.nu$group {{$styles}}\n";
4499              }
4500          }
4501          foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4502              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4503                  $stylesheet .= "$selector.me$group {{$styles}}\n";
4504              }
4505          }
4506          // note: neglect economy_mode, empty styles are meaningless
4507          foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4508              if ($styles != '') {
4509                  $stylesheet .= "$selector.sc$group {{$styles}}\n";
4510              }
4511          }
4512          foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4513              if ($styles != '' && (!$economy_mode ||
4514                  (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4515                  $this->lexic_permissions['REGEXPS'][$group]))) {
4516                  if (is_array($this->language_data['REGEXPS'][$group]) &&
4517                      array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4518                      $stylesheet .= "$selector.";
4519                      $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4520                      $stylesheet .= " {{$styles}}\n";
4521                  } else {
4522                      $stylesheet .= "$selector.re$group {{$styles}}\n";
4523                  }
4524              }
4525          }
4526          // Styles for lines being highlighted extra
4527          if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4528              $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4529          }
4530          $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4531          foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4532              $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4533          }
4534  
4535          return $stylesheet;
4536      }
4537  
4538      /**
4539       * Get's the style that is used for the specified line
4540       *
4541       * @param int $line The line number information is requested for
4542       * @since 1.0.7.21
4543       */
4544      protected function get_line_style($line) {
4545          $style = null;
4546          if (isset($this->highlight_extra_lines_styles[$line])) {
4547              $style = $this->highlight_extra_lines_styles[$line];
4548          } else { // if no "extra" style assigned
4549              $style = $this->highlight_extra_lines_style;
4550          }
4551  
4552          return $style;
4553      }
4554  
4555      /**
4556      * this functions creates an optimized regular expression list
4557      * of an array of strings.
4558      *
4559      * Example:
4560      * <code>$list = array('faa', 'foo', 'foobar');
4561      *          => string 'f(aa|oo(bar)?)'</code>
4562      *
4563      * @param array  $list             array of (unquoted) strings
4564      * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote()
4565      * @return string for regular expression
4566      * @author Milian Wolff <mail@milianw.de>
4567      * @since 1.0.8
4568      */
4569      protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4570          $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4571              '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4572          sort($list);
4573          $regexp_list = array('');
4574          $num_subpatterns = 0;
4575          $list_key = 0;
4576  
4577          // the tokens which we will use to generate the regexp list
4578          $tokens = array();
4579          $prev_keys = array();
4580          // go through all entries of the list and generate the token list
4581          $cur_len = 0;
4582          for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4583              if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4584                  // seems like the length of this pcre is growing exorbitantly
4585                  $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4586                  $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4587                  $tokens = array();
4588                  $cur_len = 0;
4589              }
4590              $level = 0;
4591              $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4592              $pointer = &$tokens;
4593              // properly assign the new entry to the correct position in the token array
4594              // possibly generate smaller common denominator keys
4595              while (true) {
4596                  // get the common denominator
4597                  if (isset($prev_keys[$level])) {
4598                      if ($prev_keys[$level] == $entry) {
4599                          // this is a duplicate entry, skip it
4600                          continue 2;
4601                      }
4602                      $char = 0;
4603                      while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4604                              && $entry[$char] == $prev_keys[$level][$char]) {
4605                          ++$char;
4606                      }
4607                      if ($char > 0) {
4608                          // this entry has at least some chars in common with the current key
4609                          if ($char == strlen($prev_keys[$level])) {
4610                              // current key is totally matched, i.e. this entry has just some bits appended
4611                              $pointer = &$pointer[$prev_keys[$level]];
4612                          } else {
4613                              // only part of the keys match
4614                              $new_key_part1 = substr($prev_keys[$level], 0, $char);
4615                              $new_key_part2 = substr($prev_keys[$level], $char);
4616  
4617                              if (in_array($new_key_part1[0], $regex_chars)
4618                                  || in_array($new_key_part2[0], $regex_chars)) {
4619                                  // this is bad, a regex char as first character
4620                                  $pointer[$entry] = array('' => true);
4621                                  array_splice($prev_keys, $level, count($prev_keys), $entry);
4622                                  $cur_len += strlen($entry);
4623                                  continue;
4624                              } else {
4625                                  // relocate previous tokens
4626                                  $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4627                                  unset($pointer[$prev_keys[$level]]);
4628                                  $pointer = &$pointer[$new_key_part1];
4629                                  // recreate key index
4630                                  array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4631                                  $cur_len += strlen($new_key_part2);
4632                              }
4633                          }
4634                          ++$level;
4635                          $entry = substr($entry, $char);
4636                          continue;
4637                      }
4638                      // else: fall trough, i.e. no common denominator was found
4639                  }
4640                  if ($level == 0 && !empty($tokens)) {
4641                      // we can dump current tokens into the string and throw them away afterwards
4642                      $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4643                      $new_subpatterns = substr_count($new_entry, '(?:');
4644                      if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4645                          $regexp_list[++$list_key] = $new_entry;
4646                          $num_subpatterns = $new_subpatterns;
4647                      } else {
4648                          if (!empty($regexp_list[$list_key])) {
4649                              $new_entry = '|' . $new_entry;
4650                          }
4651                          $regexp_list[$list_key] .= $new_entry;
4652                          $num_subpatterns += $new_subpatterns;
4653                      }
4654                      $tokens = array();
4655                      $cur_len = 0;
4656                  }
4657                  // no further common denominator found
4658                  $pointer[$entry] = array('' => true);
4659                  array_splice($prev_keys, $level, count($prev_keys), $entry);
4660  
4661                  $cur_len += strlen($entry);
4662                  break;
4663              }
4664              unset($list[$i]);
4665          }
4666          // make sure the last tokens get converted as well
4667          $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4668          if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4669              if ( !empty($regexp_list[$list_key]) ) {
4670                ++$list_key;
4671              }
4672              $regexp_list[$list_key] = $new_entry;
4673          } else {
4674              if (!empty($regexp_list[$list_key])) {
4675                  $new_entry = '|' . $new_entry;
4676              }
4677              $regexp_list[$list_key] .= $new_entry;
4678          }
4679          return $regexp_list;
4680      }
4681  
4682      /**
4683      * this function creates the appropriate regexp string of an token array
4684      * you should not call this function directly, @see $this->optimize_regexp_list().
4685      *
4686      * @param array $tokens   array of tokens
4687      * @param bool  $recursed to know wether we recursed or not
4688      * @return string
4689      * @author Milian Wolff <mail@milianw.de>
4690      * @since 1.0.8
4691      */
4692      protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4693          $list = '';
4694          foreach ($tokens as $token => $sub_tokens) {
4695              $list .= $token;
4696              $close_entry = isset($sub_tokens['']);
4697              unset($sub_tokens['']);
4698              if (!empty($sub_tokens)) {
4699                  $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4700                  if ($close_entry) {
4701                      // make sub_tokens optional
4702                      $list .= '?';
4703                  }
4704              }
4705              $list .= '|';
4706          }
4707          if (!$recursed) {
4708              // do some optimizations
4709              // common trailing strings
4710              // BUGGY!
4711              //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4712              //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4713              // (?:p)? => p?
4714              $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4715              // (?:a|b|c|d|...)? => [abcd...]?
4716              // TODO: a|bb|c => [ac]|bb
4717              static $callback_2;
4718              if (!isset($callback_2)) {
4719                  $callback_2 = function($matches) {
4720                      return "[" . str_replace("|", "", $matches[1]) . "]";
4721                  };
4722              }
4723              $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4724          }
4725          // return $list without trailing pipe
4726          return substr($list, 0, -1);
4727      }
4728  } // End Class GeSHi
4729  
4730  
4731  if (!function_exists('geshi_highlight')) {
4732      /**
4733       * Easy way to highlight stuff. Behaves just like highlight_string
4734       *
4735       * @param string $string   The code to highlight
4736       * @param string $language The language to highlight the code in
4737       * @param string $path     The path to the language files. You can leave this blank if you need
4738       *                         as from version 1.0.7 the path should be automatically detected
4739       * @param boolean $return  Whether to return the result or to echo
4740       * @return string The code highlighted (if $return is true)
4741       * @since 1.0.2
4742       */
4743      function geshi_highlight($string, $language, $path = null, $return = false) {
4744          $geshi = new GeSHi($string, $language, $path);
4745          $geshi->set_header_type(GESHI_HEADER_NONE);
4746  
4747          if ($return) {
4748              return '<code>' . $geshi->parse_code() . '</code>';
4749          }
4750  
4751          echo '<code>' . $geshi->parse_code() . '</code>';
4752  
4753          if ($geshi->error()) {
4754              return false;
4755          }
4756          return true;
4757      }
4758  }