[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/vendor/easybook/geshi/ -> geshi.php (source)

   1  <?php
   2  /**
   3   * GeSHi - Generic Syntax Highlighter
   4   *
   5   * The GeSHi class for Generic Syntax Highlighting. Please refer to the
   6   * documentation at http://qbnz.com/highlighter/documentation.php for more
   7   * information about how to use this class.
   8   *
   9   * For changes, release notes, TODOs etc, see the relevant files in the docs/
  10   * directory.
  11   *
  12   *   This file is part of GeSHi.
  13   *
  14   *  GeSHi is free software; you can redistribute it and/or modify
  15   *  it under the terms of the GNU General Public License as published by
  16   *  the Free Software Foundation; either version 2 of the License, or
  17   *  (at your option) any later version.
  18   *
  19   *  GeSHi is distributed in the hope that it will be useful,
  20   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  21   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22   *  GNU General Public License for more details.
  23   *
  24   *  You should have received a copy of the GNU General Public License
  25   *  along with GeSHi; if not, write to the Free Software
  26   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27   *
  28   * @package    geshi
  29   * @subpackage core
  30   * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
  31   * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
  32   * @license    http://gnu.org/copyleft/gpl.html GNU GPL
  33   *
  34   */
  35  
  36  //
  37  // GeSHi Constants
  38  // You should use these constant names in your programs instead of
  39  // their values - you never know when a value may change in a future
  40  // version
  41  //
  42  
  43  /** The version of this GeSHi file */
  44  define('GESHI_VERSION', '1.0.8.12');
  45  
  46  // Define the root directory for the GeSHi code tree
  47  if (!defined('GESHI_ROOT')) {
  48      /** The root directory for GeSHi */
  49      define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
  50  }
  51  /** The language file directory for GeSHi
  52      @access private */
  53  define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
  54  
  55  // Define if GeSHi should be paranoid about security
  56  if (!defined('GESHI_SECURITY_PARANOID')) {
  57      /** Tells GeSHi to be paranoid about security settings */
  58      define('GESHI_SECURITY_PARANOID', false);
  59  }
  60  
  61  // Line numbers - use with enable_line_numbers()
  62  /** Use no line numbers when building the result */
  63  define('GESHI_NO_LINE_NUMBERS', 0);
  64  /** Use normal line numbers when building the result */
  65  define('GESHI_NORMAL_LINE_NUMBERS', 1);
  66  /** Use fancy line numbers when building the result */
  67  define('GESHI_FANCY_LINE_NUMBERS', 2);
  68  
  69  // Container HTML type
  70  /** Use nothing to surround the source */
  71  define('GESHI_HEADER_NONE', 0);
  72  /** Use a "div" to surround the source */
  73  define('GESHI_HEADER_DIV', 1);
  74  /** Use a "pre" to surround the source */
  75  define('GESHI_HEADER_PRE', 2);
  76  /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
  77  define('GESHI_HEADER_PRE_VALID', 3);
  78  /**
  79   * Use a "table" to surround the source:
  80   *
  81   *  <table>
  82   *    <thead><tr><td colspan="2">$header</td></tr></thead>
  83   *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
  84   *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
  85   *  </table>
  86   *
  87   * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
  88   * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
  89   * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
  90   */
  91  define('GESHI_HEADER_PRE_TABLE', 4);
  92  
  93  // Capatalisation constants
  94  /** Lowercase keywords found */
  95  define('GESHI_CAPS_NO_CHANGE', 0);
  96  /** Uppercase keywords found */
  97  define('GESHI_CAPS_UPPER', 1);
  98  /** Leave keywords found as the case that they are */
  99  define('GESHI_CAPS_LOWER', 2);
 100  
 101  // Link style constants
 102  /** Links in the source in the :link state */
 103  define('GESHI_LINK', 0);
 104  /** Links in the source in the :hover state */
 105  define('GESHI_HOVER', 1);
 106  /** Links in the source in the :active state */
 107  define('GESHI_ACTIVE', 2);
 108  /** Links in the source in the :visited state */
 109  define('GESHI_VISITED', 3);
 110  
 111  // Important string starter/finisher
 112  // Note that if you change these, they should be as-is: i.e., don't
 113  // write them as if they had been run through htmlentities()
 114  /** The starter for important parts of the source */
 115  define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
 116  /** The ender for important parts of the source */
 117  define('GESHI_END_IMPORTANT', '<END GeSHi>');
 118  
 119  /**#@+
 120   *  @access private
 121   */
 122  // When strict mode applies for a language
 123  /** Strict mode never applies (this is the most common) */
 124  define('GESHI_NEVER', 0);
 125  /** Strict mode *might* apply, and can be enabled or
 126      disabled by {@link GeSHi->enable_strict_mode()} */
 127  define('GESHI_MAYBE', 1);
 128  /** Strict mode always applies */
 129  define('GESHI_ALWAYS', 2);
 130  
 131  // Advanced regexp handling constants, used in language files
 132  /** The key of the regex array defining what to search for */
 133  define('GESHI_SEARCH', 0);
 134  /** The key of the regex array defining what bracket group in a
 135      matched search to use as a replacement */
 136  define('GESHI_REPLACE', 1);
 137  /** The key of the regex array defining any modifiers to the regular expression */
 138  define('GESHI_MODIFIERS', 2);
 139  /** The key of the regex array defining what bracket group in a
 140      matched search to put before the replacement */
 141  define('GESHI_BEFORE', 3);
 142  /** The key of the regex array defining what bracket group in a
 143      matched search to put after the replacement */
 144  define('GESHI_AFTER', 4);
 145  /** The key of the regex array defining a custom keyword to use
 146      for this regexp's html tag class */
 147  define('GESHI_CLASS', 5);
 148  
 149  /** Used in language files to mark comments */
 150  define('GESHI_COMMENTS', 0);
 151  
 152  /** Used to work around missing PHP features **/
 153  define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
 154  
 155  /** make sure we can call stripos **/
 156  if (!function_exists('stripos')) {
 157      // the offset param of preg_match is not supported below PHP 4.3.3
 158      if (GESHI_PHP_PRE_433) {
 159          /**
 160           * @ignore
 161           */
 162          function stripos($haystack, $needle, $offset = null) {
 163              if (!is_null($offset)) {
 164                  $haystack = substr($haystack, $offset);
 165              }
 166              if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
 167                  return $match[0][1];
 168              }
 169              return false;
 170          }
 171      }
 172      else {
 173          /**
 174           * @ignore
 175           */
 176          function stripos($haystack, $needle, $offset = null) {
 177              if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
 178                  return $match[0][1];
 179              }
 180              return false;
 181          }
 182      }
 183  }
 184  
 185  /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
 186      regular expressions. Set this to false if your PCRE lib is up to date
 187      @see GeSHi->optimize_regexp_list()
 188      **/
 189  define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
 190  /** it's also important not to generate too long regular expressions
 191      be generous here... but keep in mind, that when reaching this limit we
 192      still have to close open patterns. 12k should do just fine on a 16k limit.
 193      @see GeSHi->optimize_regexp_list()
 194      **/
 195  define('GESHI_MAX_PCRE_LENGTH', 12288);
 196  
 197  //Number format specification
 198  /** Basic number format for integers */
 199  define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
 200  /** Enhanced number format for integers like seen in C */
 201  define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
 202  /** Number format to highlight binary numbers with a suffix "b" */
 203  define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
 204  /** Number format to highlight binary numbers with a prefix % */
 205  define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
 206  /** Number format to highlight binary numbers with a prefix 0b (C) */
 207  define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
 208  /** Number format to highlight octal numbers with a leading zero */
 209  define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
 210  /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
 211  define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
 212  /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
 213  define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
 214  /** Number format to highlight octal numbers with a suffix of o */
 215  define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
 216  /** Number format to highlight hex numbers with a prefix 0x */
 217  define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
 218  /** Number format to highlight hex numbers with a prefix $ */
 219  define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
 220  /** Number format to highlight hex numbers with a suffix of h */
 221  define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
 222  /** Number format to highlight floating-point numbers without support for scientific notation */
 223  define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
 224  /** Number format to highlight floating-point numbers without support for scientific notation */
 225  define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
 226  /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
 227  define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
 228  /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
 229  define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
 230  //Custom formats are passed by RX array
 231  
 232  // Error detection - use these to analyse faults
 233  /** No sourcecode to highlight was specified
 234   * @deprecated
 235   */
 236  define('GESHI_ERROR_NO_INPUT', 1);
 237  /** The language specified does not exist */
 238  define('GESHI_ERROR_NO_SUCH_LANG', 2);
 239  /** GeSHi could not open a file for reading (generally a language file) */
 240  define('GESHI_ERROR_FILE_NOT_READABLE', 3);
 241  /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
 242  define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
 243  /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
 244  define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
 245  /**#@-*/
 246  
 247  
 248  /**
 249   * The GeSHi Class.
 250   *
 251   * Please refer to the documentation for GeSHi 1.0.X that is available
 252   * at http://qbnz.com/highlighter/documentation.php for more information
 253   * about how to use this class.
 254   *
 255   * @package   geshi
 256   * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
 257   * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
 258   */
 259  class GeSHi {
 260      /**#@+
 261       * @access private
 262       */
 263      /**
 264       * The source code to highlight
 265       * @var string
 266       */
 267      var $source = '';
 268  
 269      /**
 270       * The language to use when highlighting
 271       * @var string
 272       */
 273      var $language = '';
 274  
 275      /**
 276       * The data for the language used
 277       * @var array
 278       */
 279      var $language_data = array();
 280  
 281      /**
 282       * The path to the language files
 283       * @var string
 284       */
 285      var $language_path = GESHI_LANG_ROOT;
 286  
 287      /**
 288       * The error message associated with an error
 289       * @var string
 290       * @todo check err reporting works
 291       */
 292      var $error = false;
 293  
 294      /**
 295       * Possible error messages
 296       * @var array
 297       */
 298      var $error_messages = array(
 299          GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
 300          GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
 301          GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
 302          GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
 303      );
 304  
 305      /**
 306       * Whether highlighting is strict or not
 307       * @var boolean
 308       */
 309      var $strict_mode = false;
 310  
 311      /**
 312       * Whether to use CSS classes in output
 313       * @var boolean
 314       */
 315      var $use_classes = false;
 316  
 317      /**
 318       * The type of header to use. Can be one of the following
 319       * values:
 320       *
 321       * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
 322       * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
 323       * - GESHI_HEADER_NONE: No header is outputted.
 324       *
 325       * @var int
 326       */
 327      var $header_type = GESHI_HEADER_PRE;
 328  
 329      /**
 330       * Array of permissions for which lexics should be highlighted
 331       * @var array
 332       */
 333      var $lexic_permissions = array(
 334          'KEYWORDS' =>    array(),
 335          'COMMENTS' =>    array('MULTI' => true),
 336          'REGEXPS' =>     array(),
 337          'ESCAPE_CHAR' => true,
 338          'BRACKETS' =>    true,
 339          'SYMBOLS' =>     false,
 340          'STRINGS' =>     true,
 341          'NUMBERS' =>     true,
 342          'METHODS' =>     true,
 343          'SCRIPT' =>      true
 344      );
 345  
 346      /**
 347       * The time it took to parse the code
 348       * @var double
 349       */
 350      var $time = 0;
 351  
 352      /**
 353       * The content of the header block
 354       * @var string
 355       */
 356      var $header_content = '';
 357  
 358      /**
 359       * The content of the footer block
 360       * @var string
 361       */
 362      var $footer_content = '';
 363  
 364      /**
 365       * The style of the header block
 366       * @var string
 367       */
 368      var $header_content_style = '';
 369  
 370      /**
 371       * The style of the footer block
 372       * @var string
 373       */
 374      var $footer_content_style = '';
 375  
 376      /**
 377       * Tells if a block around the highlighted source should be forced
 378       * if not using line numbering
 379       * @var boolean
 380       */
 381      var $force_code_block = false;
 382  
 383      /**
 384       * The styles for hyperlinks in the code
 385       * @var array
 386       */
 387      var $link_styles = array();
 388  
 389      /**
 390       * Whether important blocks should be recognised or not
 391       * @var boolean
 392       * @deprecated
 393       * @todo REMOVE THIS FUNCTIONALITY!
 394       */
 395      var $enable_important_blocks = false;
 396  
 397      /**
 398       * Styles for important parts of the code
 399       * @var string
 400       * @deprecated
 401       * @todo As above - rethink the whole idea of important blocks as it is buggy and
 402       * will be hard to implement in 1.2
 403       */
 404      var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
 405  
 406      /**
 407       * Whether CSS IDs should be added to the code
 408       * @var boolean
 409       */
 410      var $add_ids = false;
 411  
 412      /**
 413       * Lines that should be highlighted extra
 414       * @var array
 415       */
 416      var $highlight_extra_lines = array();
 417  
 418      /**
 419       * Styles of lines that should be highlighted extra
 420       * @var array
 421       */
 422      var $highlight_extra_lines_styles = array();
 423  
 424      /**
 425       * Styles of extra-highlighted lines
 426       * @var string
 427       */
 428      var $highlight_extra_lines_style = 'background-color: #ffc;';
 429  
 430      /**
 431       * The line ending
 432       * If null, nl2br() will be used on the result string.
 433       * Otherwise, all instances of \n will be replaced with $line_ending
 434       * @var string
 435       */
 436      var $line_ending = null;
 437  
 438      /**
 439       * Number at which line numbers should start at
 440       * @var int
 441       */
 442      var $line_numbers_start = 1;
 443  
 444      /**
 445       * The overall style for this code block
 446       * @var string
 447       */
 448      var $overall_style = 'font-family:monospace;';
 449  
 450      /**
 451       *  The style for the actual code
 452       * @var string
 453       */
 454      var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 455  
 456      /**
 457       * The overall class for this code block
 458       * @var string
 459       */
 460      var $overall_class = '';
 461  
 462      /**
 463       * The overall ID for this code block
 464       * @var string
 465       */
 466      var $overall_id = '';
 467  
 468      /**
 469       * Line number styles
 470       * @var string
 471       */
 472      var $line_style1 = 'font-weight: normal; vertical-align:top;';
 473  
 474      /**
 475       * Line number styles for fancy lines
 476       * @var string
 477       */
 478      var $line_style2 = 'font-weight: bold; vertical-align:top;';
 479  
 480      /**
 481       * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
 482       * @var string
 483       */
 484      var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 485  
 486      /**
 487       * Flag for how line numbers are displayed
 488       * @var boolean
 489       */
 490      var $line_numbers = GESHI_NO_LINE_NUMBERS;
 491  
 492      /**
 493       * Flag to decide if multi line spans are allowed. Set it to false to make sure
 494       * each tag is closed before and reopened after each linefeed.
 495       * @var boolean
 496       */
 497      var $allow_multiline_span = true;
 498  
 499      /**
 500       * The "nth" value for fancy line highlighting
 501       * @var int
 502       */
 503      var $line_nth_row = 0;
 504  
 505      /**
 506       * The size of tab stops
 507       * @var int
 508       */
 509      var $tab_width = 8;
 510  
 511      /**
 512       * Should we use language-defined tab stop widths?
 513       * @var int
 514       */
 515      var $use_language_tab_width = false;
 516  
 517      /**
 518       * Default target for keyword links
 519       * @var string
 520       */
 521      var $link_target = '';
 522  
 523      /**
 524       * The encoding to use for entity encoding
 525       * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
 526       * @var string
 527       */
 528      var $encoding = 'utf-8';
 529  
 530      /**
 531       * Should keywords be linked?
 532       * @var boolean
 533       */
 534      var $keyword_links = true;
 535  
 536      /**
 537       * Currently loaded language file
 538       * @var string
 539       * @since 1.0.7.22
 540       */
 541      var $loaded_language = '';
 542  
 543      /**
 544       * Wether the caches needed for parsing are built or not
 545       *
 546       * @var bool
 547       * @since 1.0.8
 548       */
 549      var $parse_cache_built = false;
 550  
 551      /**
 552       * Work around for Suhosin Patch with disabled /e modifier
 553       *
 554       * Note from suhosins author in config file:
 555       * <blockquote>
 556       *   The /e modifier inside <code>preg_replace()</code> allows code execution.
 557       *   Often it is the cause for remote code execution exploits. It is wise to
 558       *   deactivate this feature and test where in the application it is used.
 559       *   The developer using the /e modifier should be made aware that he should
 560       *   use <code>preg_replace_callback()</code> instead
 561       * </blockquote>
 562       *
 563       * @var array
 564       * @since 1.0.8
 565       */
 566      var $_kw_replace_group = 0;
 567      var $_rx_key = 0;
 568  
 569      /**
 570       * some "callback parameters" for handle_multiline_regexps
 571       *
 572       * @since 1.0.8
 573       * @access private
 574       * @var string
 575       */
 576      var $_hmr_before = '';
 577      var $_hmr_replace = '';
 578      var $_hmr_after = '';
 579      var $_hmr_key = 0;
 580  
 581      /**#@-*/
 582  
 583      /**
 584       * Creates a new GeSHi object, with source and language
 585       *
 586       * @param string The source code to highlight
 587       * @param string The language to highlight the source with
 588       * @param string The path to the language file directory. <b>This
 589       *               is deprecated!</b> I've backported the auto path
 590       *               detection from the 1.1.X dev branch, so now it
 591       *               should be automatically set correctly. If you have
 592       *               renamed the language directory however, you will
 593       *               still need to set the path using this parameter or
 594       *               {@link GeSHi->set_language_path()}
 595       * @since 1.0.0
 596       */
 597      function __construct($source = '', $language = '', $path = '') {
 598          if (!empty($source)) {
 599              $this->set_source($source);
 600          }
 601          if (!empty($language)) {
 602              $this->set_language($language);
 603          }
 604          $this->set_language_path($path);
 605      }
 606  
 607      /**
 608       * Returns the version of GeSHi
 609       *
 610       * @return string
 611       * @since 1 0.8.11
 612       */
 613      function get_version()
 614      {
 615          return GESHI_VERSION;
 616      }
 617  
 618      /**
 619       * Returns an error message associated with the last GeSHi operation,
 620       * or false if no error has occured
 621       *
 622       * @return string|false An error message if there has been an error, else false
 623       * @since  1.0.0
 624       */
 625      function error() {
 626          if ($this->error) {
 627              //Put some template variables for debugging here ...
 628              $debug_tpl_vars = array(
 629                  '{LANGUAGE}' => $this->language,
 630                  '{PATH}' => $this->language_path
 631              );
 632              $msg = str_replace(
 633                  array_keys($debug_tpl_vars),
 634                  array_values($debug_tpl_vars),
 635                  $this->error_messages[$this->error]);
 636  
 637              return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
 638          }
 639          return false;
 640      }
 641  
 642      /**
 643       * Gets a human-readable language name (thanks to Simon Patterson
 644       * for the idea :))
 645       *
 646       * @return string The name for the current language
 647       * @since  1.0.2
 648       */
 649      function get_language_name() {
 650          if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
 651              return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
 652          }
 653          return $this->language_data['LANG_NAME'];
 654      }
 655  
 656      /**
 657       * Sets the source code for this object
 658       *
 659       * @param string The source code to highlight
 660       * @since 1.0.0
 661       */
 662      function set_source($source) {
 663          $this->source = $source;
 664          $this->highlight_extra_lines = array();
 665      }
 666  
 667      /**
 668       * Sets the language for this object
 669       *
 670       * @note since 1.0.8 this function won't reset language-settings by default anymore!
 671       *       if you need this set $force_reset = true
 672       *
 673       * @param string The name of the language to use
 674       * @since 1.0.0
 675       */
 676      function set_language($language, $force_reset = false) {
 677          if ($force_reset) {
 678              $this->loaded_language = false;
 679          }
 680  
 681          //Clean up the language name to prevent malicious code injection
 682          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 683  
 684          $language = strtolower($language);
 685  
 686          //Retreive the full filename
 687          $file_name = $this->language_path . $language . '.php';
 688          if ($file_name == $this->loaded_language) {
 689              // this language is already loaded!
 690              return;
 691          }
 692  
 693          $this->language = $language;
 694  
 695          $this->error = false;
 696          $this->strict_mode = GESHI_NEVER;
 697  
 698          //Check if we can read the desired file
 699          if (!is_readable($file_name)) {
 700              $this->error = GESHI_ERROR_NO_SUCH_LANG;
 701              return;
 702          }
 703  
 704          // Load the language for parsing
 705          $this->load_language($file_name);
 706      }
 707  
 708      /**
 709       * Sets the path to the directory containing the language files. Note
 710       * that this path is relative to the directory of the script that included
 711       * geshi.php, NOT geshi.php itself.
 712       *
 713       * @param string The path to the language directory
 714       * @since 1.0.0
 715       * @deprecated The path to the language files should now be automatically
 716       *             detected, so this method should no longer be needed. The
 717       *             1.1.X branch handles manual setting of the path differently
 718       *             so this method will disappear in 1.2.0.
 719       */
 720      function set_language_path($path) {
 721          if(strpos($path,':')) {
 722              //Security Fix to prevent external directories using fopen wrappers.
 723              if(DIRECTORY_SEPARATOR == "\\") {
 724                  if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
 725                      return;
 726                  }
 727              } else {
 728                  return;
 729              }
 730          }
 731          if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
 732              //Security Fix to prevent external directories using fopen wrappers.
 733              return;
 734          }
 735          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
 736              //Security Fix to prevent external directories using fopen wrappers.
 737              return;
 738          }
 739          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
 740              //Security Fix to prevent external directories using fopen wrappers.
 741              return;
 742          }
 743          if ($path) {
 744              $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
 745              $this->set_language($this->language); // otherwise set_language_path has no effect
 746          }
 747      }
 748  
 749      /**
 750       * Get supported langs or an associative array lang=>full_name.
 751       * @param boolean $longnames
 752       * @return array
 753       */
 754      function get_supported_languages($full_names=false)
 755      {
 756          // return array
 757          $back = array();
 758  
 759          // we walk the lang root
 760          $dir = dir($this->language_path);
 761  
 762          // foreach entry
 763          while (false !== ($entry = $dir->read()))
 764          {
 765              $full_path = $this->language_path.$entry;
 766  
 767              // Skip all dirs
 768              if (is_dir($full_path)) {
 769                  continue;
 770              }
 771  
 772              // we only want lang.php files
 773              if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
 774                  continue;
 775              }
 776  
 777              // Raw lang name is here
 778              $langname = $matches[1];
 779  
 780              // We want the fullname too?
 781              if ($full_names === true)
 782              {
 783                  if (false !== ($fullname = $this->get_language_fullname($langname)))
 784                  {
 785                      $back[$langname] = $fullname; // we go associative
 786                  }
 787              }
 788              else
 789              {
 790                  // just store raw langname
 791                  $back[] = $langname;
 792              }
 793          }
 794  
 795          $dir->close();
 796  
 797          return $back;
 798      }
 799  
 800      /**
 801       * Get full_name for a lang or false.
 802       * @param string $language short langname (html4strict for example)
 803       * @return mixed
 804       */
 805      function get_language_fullname($language)
 806      {
 807          //Clean up the language name to prevent malicious code injection
 808          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 809  
 810          $language = strtolower($language);
 811  
 812          // get fullpath-filename for a langname
 813          $fullpath = $this->language_path.$language.'.php';
 814  
 815          // we need to get contents :S
 816          if (false === ($data = file_get_contents($fullpath))) {
 817              $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
 818              return false;
 819          }
 820  
 821          // match the langname
 822          if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
 823              $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
 824              return false;
 825          }
 826  
 827          // return fullname for langname
 828          return stripcslashes($matches[1]);
 829      }
 830  
 831      /**
 832       * Sets the type of header to be used.
 833       *
 834       * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
 835       * means more source code but more control over tab width and line-wrapping.
 836       * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
 837       * control. Default is GESHI_HEADER_PRE.
 838       *
 839       * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
 840       * should be outputted.
 841       *
 842       * @param int The type of header to be used
 843       * @since 1.0.0
 844       */
 845      function set_header_type($type) {
 846          //Check if we got a valid header type
 847          if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
 848              GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
 849              $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
 850              return;
 851          }
 852  
 853          //Set that new header type
 854          $this->header_type = $type;
 855      }
 856  
 857      /**
 858       * Sets the styles for the code that will be outputted
 859       * when this object is parsed. The style should be a
 860       * string of valid stylesheet declarations
 861       *
 862       * @param string  The overall style for the outputted code block
 863       * @param boolean Whether to merge the styles with the current styles or not
 864       * @since 1.0.0
 865       */
 866      function set_overall_style($style, $preserve_defaults = false) {
 867          if (!$preserve_defaults) {
 868              $this->overall_style = $style;
 869          } else {
 870              $this->overall_style .= $style;
 871          }
 872      }
 873  
 874      /**
 875       * Sets the overall classname for this block of code. This
 876       * class can then be used in a stylesheet to style this object's
 877       * output
 878       *
 879       * @param string The class name to use for this block of code
 880       * @since 1.0.0
 881       */
 882      function set_overall_class($class) {
 883          $this->overall_class = $class;
 884      }
 885  
 886      /**
 887       * Sets the overall id for this block of code. This id can then
 888       * be used in a stylesheet to style this object's output
 889       *
 890       * @param string The ID to use for this block of code
 891       * @since 1.0.0
 892       */
 893      function set_overall_id($id) {
 894          $this->overall_id = $id;
 895      }
 896  
 897      /**
 898       * Sets whether CSS classes should be used to highlight the source. Default
 899       * is off, calling this method with no arguments will turn it on
 900       *
 901       * @param boolean Whether to turn classes on or not
 902       * @since 1.0.0
 903       */
 904      function enable_classes($flag = true) {
 905          $this->use_classes = ($flag) ? true : false;
 906      }
 907  
 908      /**
 909       * Sets the style for the actual code. This should be a string
 910       * containing valid stylesheet declarations. If $preserve_defaults is
 911       * true, then styles are merged with the default styles, with the
 912       * user defined styles having priority
 913       *
 914       * Note: Use this method to override any style changes you made to
 915       * the line numbers if you are using line numbers, else the line of
 916       * code will have the same style as the line number! Consult the
 917       * GeSHi documentation for more information about this.
 918       *
 919       * @param string  The style to use for actual code
 920       * @param boolean Whether to merge the current styles with the new styles
 921       * @since 1.0.2
 922       */
 923      function set_code_style($style, $preserve_defaults = false) {
 924          if (!$preserve_defaults) {
 925              $this->code_style = $style;
 926          } else {
 927              $this->code_style .= $style;
 928          }
 929      }
 930  
 931      /**
 932       * Sets the styles for the line numbers.
 933       *
 934       * @param string The style for the line numbers that are "normal"
 935       * @param string|boolean If a string, this is the style of the line
 936       *        numbers that are "fancy", otherwise if boolean then this
 937       *        defines whether the normal styles should be merged with the
 938       *        new normal styles or not
 939       * @param boolean If set, is the flag for whether to merge the "fancy"
 940       *        styles with the current styles or not
 941       * @since 1.0.2
 942       */
 943      function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
 944          //Check if we got 2 or three parameters
 945          if (is_bool($style2)) {
 946              $preserve_defaults = $style2;
 947              $style2 = '';
 948          }
 949  
 950          //Actually set the new styles
 951          if (!$preserve_defaults) {
 952              $this->line_style1 = $style1;
 953              $this->line_style2 = $style2;
 954          } else {
 955              $this->line_style1 .= $style1;
 956              $this->line_style2 .= $style2;
 957          }
 958      }
 959  
 960      /**
 961       * Sets whether line numbers should be displayed.
 962       *
 963       * Valid values for the first parameter are:
 964       *
 965       *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
 966       *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
 967       *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
 968       *
 969       * For fancy line numbers, the second parameter is used to signal which lines
 970       * are to be fancy. For example, if the value of this parameter is 5 then every
 971       * 5th line will be fancy.
 972       *
 973       * @param int How line numbers should be displayed
 974       * @param int Defines which lines are fancy
 975       * @since 1.0.0
 976       */
 977      function enable_line_numbers($flag, $nth_row = 5) {
 978          if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
 979              && GESHI_FANCY_LINE_NUMBERS != $flag) {
 980              $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
 981          }
 982          $this->line_numbers = $flag;
 983          $this->line_nth_row = $nth_row;
 984      }
 985  
 986      /**
 987       * Sets wether spans and other HTML markup generated by GeSHi can
 988       * span over multiple lines or not. Defaults to true to reduce overhead.
 989       * Set it to false if you want to manipulate the output or manually display
 990       * the code in an ordered list.
 991       *
 992       * @param boolean Wether multiline spans are allowed or not
 993       * @since 1.0.7.22
 994       */
 995      function enable_multiline_span($flag) {
 996          $this->allow_multiline_span = (bool) $flag;
 997      }
 998  
 999      /**
1000       * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
1001       *
1002       * @see enable_multiline_span
1003       * @return bool
1004       */
1005      function get_multiline_span() {
1006          return $this->allow_multiline_span;
1007      }
1008  
1009      /**
1010       * Sets the style for a keyword group. If $preserve_defaults is
1011       * true, then styles are merged with the default styles, with the
1012       * user defined styles having priority
1013       *
1014       * @param int     The key of the keyword group to change the styles of
1015       * @param string  The style to make the keywords
1016       * @param boolean Whether to merge the new styles with the old or just
1017       *                to overwrite them
1018       * @since 1.0.0
1019       */
1020      function set_keyword_group_style($key, $style, $preserve_defaults = false) {
1021          //Set the style for this keyword group
1022          if (!$preserve_defaults) {
1023              $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1024          } else {
1025              $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1026          }
1027  
1028          //Update the lexic permissions
1029          if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1030              $this->lexic_permissions['KEYWORDS'][$key] = true;
1031          }
1032      }
1033  
1034      /**
1035       * Turns highlighting on/off for a keyword group
1036       *
1037       * @param int     The key of the keyword group to turn on or off
1038       * @param boolean Whether to turn highlighting for that group on or off
1039       * @since 1.0.0
1040       */
1041      function set_keyword_group_highlighting($key, $flag = true) {
1042          $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1043      }
1044  
1045      /**
1046       * Sets the styles for comment groups.  If $preserve_defaults is
1047       * true, then styles are merged with the default styles, with the
1048       * user defined styles having priority
1049       *
1050       * @param int     The key of the comment group to change the styles of
1051       * @param string  The style to make the comments
1052       * @param boolean Whether to merge the new styles with the old or just
1053       *                to overwrite them
1054       * @since 1.0.0
1055       */
1056      function set_comments_style($key, $style, $preserve_defaults = false) {
1057          if (!$preserve_defaults) {
1058              $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1059          } else {
1060              $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1061          }
1062      }
1063  
1064      /**
1065       * Turns highlighting on/off for comment groups
1066       *
1067       * @param int     The key of the comment group to turn on or off
1068       * @param boolean Whether to turn highlighting for that group on or off
1069       * @since 1.0.0
1070       */
1071      function set_comments_highlighting($key, $flag = true) {
1072          $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1073      }
1074  
1075      /**
1076       * Sets the styles for escaped characters. If $preserve_defaults is
1077       * true, then styles are merged with the default styles, with the
1078       * user defined styles having priority
1079       *
1080       * @param string  The style to make the escape characters
1081       * @param boolean Whether to merge the new styles with the old or just
1082       *                to overwrite them
1083       * @since 1.0.0
1084       */
1085      function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1086          if (!$preserve_defaults) {
1087              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1088          } else {
1089              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1090          }
1091      }
1092  
1093      /**
1094       * Turns highlighting on/off for escaped characters
1095       *
1096       * @param boolean Whether to turn highlighting for escape characters on or off
1097       * @since 1.0.0
1098       */
1099      function set_escape_characters_highlighting($flag = true) {
1100          $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1101      }
1102  
1103      /**
1104       * Sets the styles for brackets. If $preserve_defaults is
1105       * true, then styles are merged with the default styles, with the
1106       * user defined styles having priority
1107       *
1108       * This method is DEPRECATED: use set_symbols_style instead.
1109       * This method will be removed in 1.2.X
1110       *
1111       * @param string  The style to make the brackets
1112       * @param boolean Whether to merge the new styles with the old or just
1113       *                to overwrite them
1114       * @since 1.0.0
1115       * @deprecated In favour of set_symbols_style
1116       */
1117      function set_brackets_style($style, $preserve_defaults = false) {
1118          if (!$preserve_defaults) {
1119              $this->language_data['STYLES']['BRACKETS'][0] = $style;
1120          } else {
1121              $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1122          }
1123      }
1124  
1125      /**
1126       * Turns highlighting on/off for brackets
1127       *
1128       * This method is DEPRECATED: use set_symbols_highlighting instead.
1129       * This method will be remove in 1.2.X
1130       *
1131       * @param boolean Whether to turn highlighting for brackets on or off
1132       * @since 1.0.0
1133       * @deprecated In favour of set_symbols_highlighting
1134       */
1135      function set_brackets_highlighting($flag) {
1136          $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1137      }
1138  
1139      /**
1140       * Sets the styles for symbols. If $preserve_defaults is
1141       * true, then styles are merged with the default styles, with the
1142       * user defined styles having priority
1143       *
1144       * @param string  The style to make the symbols
1145       * @param boolean Whether to merge the new styles with the old or just
1146       *                to overwrite them
1147       * @param int     Tells the group of symbols for which style should be set.
1148       * @since 1.0.1
1149       */
1150      function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1151          // Update the style of symbols
1152          if (!$preserve_defaults) {
1153              $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1154          } else {
1155              $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1156          }
1157  
1158          // For backward compatibility
1159          if (0 == $group) {
1160              $this->set_brackets_style ($style, $preserve_defaults);
1161          }
1162      }
1163  
1164      /**
1165       * Turns highlighting on/off for symbols
1166       *
1167       * @param boolean Whether to turn highlighting for symbols on or off
1168       * @since 1.0.0
1169       */
1170      function set_symbols_highlighting($flag) {
1171          // Update lexic permissions for this symbol group
1172          $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1173  
1174          // For backward compatibility
1175          $this->set_brackets_highlighting ($flag);
1176      }
1177  
1178      /**
1179       * Sets the styles for strings. If $preserve_defaults is
1180       * true, then styles are merged with the default styles, with the
1181       * user defined styles having priority
1182       *
1183       * @param string  The style to make the escape characters
1184       * @param boolean Whether to merge the new styles with the old or just
1185       *                to overwrite them
1186       * @param int     Tells the group of strings for which style should be set.
1187       * @since 1.0.0
1188       */
1189      function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1190          if (!$preserve_defaults) {
1191              $this->language_data['STYLES']['STRINGS'][$group] = $style;
1192          } else {
1193              $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1194          }
1195      }
1196  
1197      /**
1198       * Turns highlighting on/off for strings
1199       *
1200       * @param boolean Whether to turn highlighting for strings on or off
1201       * @since 1.0.0
1202       */
1203      function set_strings_highlighting($flag) {
1204          $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1205      }
1206  
1207      /**
1208       * Sets the styles for strict code blocks. If $preserve_defaults is
1209       * true, then styles are merged with the default styles, with the
1210       * user defined styles having priority
1211       *
1212       * @param string  The style to make the script blocks
1213       * @param boolean Whether to merge the new styles with the old or just
1214       *                to overwrite them
1215       * @param int     Tells the group of script blocks for which style should be set.
1216       * @since 1.0.8.4
1217       */
1218      function set_script_style($style, $preserve_defaults = false, $group = 0) {
1219          // Update the style of symbols
1220          if (!$preserve_defaults) {
1221              $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1222          } else {
1223              $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1224          }
1225      }
1226  
1227      /**
1228       * Sets the styles for numbers. If $preserve_defaults is
1229       * true, then styles are merged with the default styles, with the
1230       * user defined styles having priority
1231       *
1232       * @param string  The style to make the numbers
1233       * @param boolean Whether to merge the new styles with the old or just
1234       *                to overwrite them
1235       * @param int     Tells the group of numbers for which style should be set.
1236       * @since 1.0.0
1237       */
1238      function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1239          if (!$preserve_defaults) {
1240              $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1241          } else {
1242              $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1243          }
1244      }
1245  
1246      /**
1247       * Turns highlighting on/off for numbers
1248       *
1249       * @param boolean Whether to turn highlighting for numbers on or off
1250       * @since 1.0.0
1251       */
1252      function set_numbers_highlighting($flag) {
1253          $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1254      }
1255  
1256      /**
1257       * Sets the styles for methods. $key is a number that references the
1258       * appropriate "object splitter" - see the language file for the language
1259       * you are highlighting to get this number. If $preserve_defaults is
1260       * true, then styles are merged with the default styles, with the
1261       * user defined styles having priority
1262       *
1263       * @param int     The key of the object splitter to change the styles of
1264       * @param string  The style to make the methods
1265       * @param boolean Whether to merge the new styles with the old or just
1266       *                to overwrite them
1267       * @since 1.0.0
1268       */
1269      function set_methods_style($key, $style, $preserve_defaults = false) {
1270          if (!$preserve_defaults) {
1271              $this->language_data['STYLES']['METHODS'][$key] = $style;
1272          } else {
1273              $this->language_data['STYLES']['METHODS'][$key] .= $style;
1274          }
1275      }
1276  
1277      /**
1278       * Turns highlighting on/off for methods
1279       *
1280       * @param boolean Whether to turn highlighting for methods on or off
1281       * @since 1.0.0
1282       */
1283      function set_methods_highlighting($flag) {
1284          $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1285      }
1286  
1287      /**
1288       * Sets the styles for regexps. If $preserve_defaults is
1289       * true, then styles are merged with the default styles, with the
1290       * user defined styles having priority
1291       *
1292       * @param string  The style to make the regular expression matches
1293       * @param boolean Whether to merge the new styles with the old or just
1294       *                to overwrite them
1295       * @since 1.0.0
1296       */
1297      function set_regexps_style($key, $style, $preserve_defaults = false) {
1298          if (!$preserve_defaults) {
1299              $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1300          } else {
1301              $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1302          }
1303      }
1304  
1305      /**
1306       * Turns highlighting on/off for regexps
1307       *
1308       * @param int     The key of the regular expression group to turn on or off
1309       * @param boolean Whether to turn highlighting for the regular expression group on or off
1310       * @since 1.0.0
1311       */
1312      function set_regexps_highlighting($key, $flag) {
1313          $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1314      }
1315  
1316      /**
1317       * Sets whether a set of keywords are checked for in a case sensitive manner
1318       *
1319       * @param int The key of the keyword group to change the case sensitivity of
1320       * @param boolean Whether to check in a case sensitive manner or not
1321       * @since 1.0.0
1322       */
1323      function set_case_sensitivity($key, $case) {
1324          $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1325      }
1326  
1327      /**
1328       * Sets the case that keywords should use when found. Use the constants:
1329       *
1330       *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1331       *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1332       *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1333       *
1334       * @param int A constant specifying what to do with matched keywords
1335       * @since 1.0.1
1336       */
1337      function set_case_keywords($case) {
1338          if (in_array($case, array(
1339              GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1340              $this->language_data['CASE_KEYWORDS'] = $case;
1341          }
1342      }
1343  
1344      /**
1345       * Sets how many spaces a tab is substituted for
1346       *
1347       * Widths below zero are ignored
1348       *
1349       * @param int The tab width
1350       * @since 1.0.0
1351       */
1352      function set_tab_width($width) {
1353          $this->tab_width = intval($width);
1354  
1355          //Check if it fit's the constraints:
1356          if ($this->tab_width < 1) {
1357              //Return it to the default
1358              $this->tab_width = 8;
1359          }
1360      }
1361  
1362      /**
1363       * Sets whether or not to use tab-stop width specifed by language
1364       *
1365       * @param boolean Whether to use language-specific tab-stop widths
1366       * @since 1.0.7.20
1367       */
1368      function set_use_language_tab_width($use) {
1369          $this->use_language_tab_width = (bool) $use;
1370      }
1371  
1372      /**
1373       * Returns the tab width to use, based on the current language and user
1374       * preference
1375       *
1376       * @return int Tab width
1377       * @since 1.0.7.20
1378       */
1379      function get_real_tab_width() {
1380          if (!$this->use_language_tab_width ||
1381              !isset($this->language_data['TAB_WIDTH'])) {
1382              return $this->tab_width;
1383          } else {
1384              return $this->language_data['TAB_WIDTH'];
1385          }
1386      }
1387  
1388      /**
1389       * Enables/disables strict highlighting. Default is off, calling this
1390       * method without parameters will turn it on. See documentation
1391       * for more details on strict mode and where to use it.
1392       *
1393       * @param boolean Whether to enable strict mode or not
1394       * @since 1.0.0
1395       */
1396      function enable_strict_mode($mode = true) {
1397          if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1398              $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1399          }
1400      }
1401  
1402      /**
1403       * Disables all highlighting
1404       *
1405       * @since 1.0.0
1406       * @todo  Rewrite with array traversal
1407       * @deprecated In favour of enable_highlighting
1408       */
1409      function disable_highlighting() {
1410          $this->enable_highlighting(false);
1411      }
1412  
1413      /**
1414       * Enables all highlighting
1415       *
1416       * The optional flag parameter was added in version 1.0.7.21 and can be used
1417       * to enable (true) or disable (false) all highlighting.
1418       *
1419       * @since 1.0.0
1420       * @param boolean A flag specifying whether to enable or disable all highlighting
1421       * @todo  Rewrite with array traversal
1422       */
1423      function enable_highlighting($flag = true) {
1424          $flag = $flag ? true : false;
1425          foreach ($this->lexic_permissions as $key => $value) {
1426              if (is_array($value)) {
1427                  foreach ($value as $k => $v) {
1428                      $this->lexic_permissions[$key][$k] = $flag;
1429                  }
1430              } else {
1431                  $this->lexic_permissions[$key] = $flag;
1432              }
1433          }
1434  
1435          // Context blocks
1436          $this->enable_important_blocks = $flag;
1437      }
1438  
1439      /**
1440       * Given a file extension, this method returns either a valid geshi language
1441       * name, or the empty string if it couldn't be found
1442       *
1443       * @param string The extension to get a language name for
1444       * @param array  A lookup array to use instead of the default one
1445       * @since 1.0.5
1446       * @todo Re-think about how this method works (maybe make it private and/or make it
1447       *       a extension->lang lookup?)
1448       * @todo static?
1449       */
1450      function get_language_name_from_extension( $extension, $lookup = array() ) {
1451          $extension = strtolower($extension);
1452  
1453          if ( !is_array($lookup) || empty($lookup)) {
1454              $lookup = array(
1455                  '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1456                  '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1457                  '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1458                  '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1459                  'abap' => array('abap'),
1460                  'actionscript' => array('as'),
1461                  'ada' => array('a', 'ada', 'adb', 'ads'),
1462                  'apache' => array('conf'),
1463                  'asm' => array('ash', 'asm', 'inc'),
1464                  'asp' => array('asp'),
1465                  'bash' => array('sh'),
1466                  'bf' => array('bf'),
1467                  'c' => array('c', 'h'),
1468                  'c_mac' => array('c', 'h'),
1469                  'caddcl' => array(),
1470                  'cadlisp' => array(),
1471                  'cdfg' => array('cdfg'),
1472                  'cobol' => array('cbl'),
1473                  'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1474                  'csharp' => array('cs'),
1475                  'css' => array('css'),
1476                  'd' => array('d'),
1477                  'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1478                  'diff' => array('diff', 'patch'),
1479                  'dos' => array('bat', 'cmd'),
1480                  'gdb' => array('kcrash', 'crash', 'bt'),
1481                  'gettext' => array('po', 'pot'),
1482                  'gml' => array('gml'),
1483                  'gnuplot' => array('plt'),
1484                  'groovy' => array('groovy'),
1485                  'haskell' => array('hs'),
1486                  'haxe' => array('hx'),
1487                  'html4strict' => array('html', 'htm'),
1488                  'ini' => array('ini', 'desktop'),
1489                  'java' => array('java'),
1490                  'javascript' => array('js'),
1491                  'klonec' => array('kl1'),
1492                  'klonecpp' => array('klx'),
1493                  'latex' => array('tex'),
1494                  'lisp' => array('lisp'),
1495                  'lua' => array('lua'),
1496                  'matlab' => array('m'),
1497                  'mpasm' => array(),
1498                  'mysql' => array('sql'),
1499                  'nsis' => array(),
1500                  'objc' => array(),
1501                  'oobas' => array(),
1502                  'oracle8' => array(),
1503                  'oracle10' => array(),
1504                  'pascal' => array('pas'),
1505                  'perl' => array('pl', 'pm'),
1506                  'php' => array('php', 'php5', 'phtml', 'phps'),
1507                  'povray' => array('pov'),
1508                  'providex' => array('pvc', 'pvx'),
1509                  'prolog' => array('pl'),
1510                  'python' => array('py'),
1511                  'qbasic' => array('bi'),
1512                  'reg' => array('reg'),
1513                  'ruby' => array('rb'),
1514                  'sas' => array('sas'),
1515                  'scala' => array('scala'),
1516                  'scheme' => array('scm'),
1517                  'scilab' => array('sci'),
1518                  'smalltalk' => array('st'),
1519                  'smarty' => array(),
1520                  'tcl' => array('tcl'),
1521                  'text' => array('txt'),
1522                  'vb' => array('bas'),
1523                  'vbnet' => array(),
1524                  'visualfoxpro' => array(),
1525                  'whitespace' => array('ws'),
1526                  'xml' => array('xml', 'svg', 'xrc'),
1527                  'z80' => array('z80', 'asm', 'inc')
1528              );
1529          }
1530  
1531          foreach ($lookup as $lang => $extensions) {
1532              if (in_array($extension, $extensions)) {
1533                  return $lang;
1534              }
1535          }
1536  
1537          return 'text';
1538      }
1539  
1540      /**
1541       * Given a file name, this method loads its contents in, and attempts
1542       * to set the language automatically. An optional lookup table can be
1543       * passed for looking up the language name. If not specified a default
1544       * table is used
1545       *
1546       * The language table is in the form
1547       * <pre>array(
1548       *   'lang_name' => array('extension', 'extension', ...),
1549       *   'lang_name' ...
1550       * );</pre>
1551       *
1552       * @param string The filename to load the source from
1553       * @param array  A lookup array to use instead of the default one
1554       * @todo Complete rethink of this and above method
1555       * @since 1.0.5
1556       */
1557      function load_from_file($file_name, $lookup = array()) {
1558          if (is_readable($file_name)) {
1559              $this->set_source(file_get_contents($file_name));
1560              $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1561          } else {
1562              $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1563          }
1564      }
1565  
1566      /**
1567       * Adds a keyword to a keyword group for highlighting
1568       *
1569       * @param int    The key of the keyword group to add the keyword to
1570       * @param string The word to add to the keyword group
1571       * @since 1.0.0
1572       */
1573      function add_keyword($key, $word) {
1574          if (!is_array($this->language_data['KEYWORDS'][$key])) {
1575              $this->language_data['KEYWORDS'][$key] = array();
1576          }
1577          if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1578              $this->language_data['KEYWORDS'][$key][] = $word;
1579  
1580              //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1581              if ($this->parse_cache_built) {
1582                  $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1583                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1584              }
1585          }
1586      }
1587  
1588      /**
1589       * Removes a keyword from a keyword group
1590       *
1591       * @param int    The key of the keyword group to remove the keyword from
1592       * @param string The word to remove from the keyword group
1593       * @param bool   Wether to automatically recompile the optimized regexp list or not.
1594       *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1595       *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1596       *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1597       *               it might be too expensive to recompile the regexp list for every removal if you want to
1598       *               remove a lot of keywords.
1599       * @since 1.0.0
1600       */
1601      function remove_keyword($key, $word, $recompile = true) {
1602          $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1603          if ($key_to_remove !== false) {
1604              unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1605  
1606              //NEW in 1.0.8, optionally recompile keyword group
1607              if ($recompile && $this->parse_cache_built) {
1608                  $this->optimize_keyword_group($key);
1609              }
1610          }
1611      }
1612  
1613      /**
1614       * Creates a new keyword group
1615       *
1616       * @param int    The key of the keyword group to create
1617       * @param string The styles for the keyword group
1618       * @param boolean Whether the keyword group is case sensitive ornot
1619       * @param array  The words to use for the keyword group
1620       * @since 1.0.0
1621       */
1622      function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1623          $words = (array) $words;
1624          if  (empty($words)) {
1625              // empty word lists mess up highlighting
1626              return false;
1627          }
1628  
1629          //Add the new keyword group internally
1630          $this->language_data['KEYWORDS'][$key] = $words;
1631          $this->lexic_permissions['KEYWORDS'][$key] = true;
1632          $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1633          $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1634  
1635          //NEW in 1.0.8, cache keyword regexp
1636          if ($this->parse_cache_built) {
1637              $this->optimize_keyword_group($key);
1638          }
1639      }
1640  
1641      /**
1642       * Removes a keyword group
1643       *
1644       * @param int    The key of the keyword group to remove
1645       * @since 1.0.0
1646       */
1647      function remove_keyword_group ($key) {
1648          //Remove the keyword group internally
1649          unset($this->language_data['KEYWORDS'][$key]);
1650          unset($this->lexic_permissions['KEYWORDS'][$key]);
1651          unset($this->language_data['CASE_SENSITIVE'][$key]);
1652          unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653  
1654          //NEW in 1.0.8
1655          unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656      }
1657  
1658      /**
1659       * compile optimized regexp list for keyword group
1660       *
1661       * @param int   The key of the keyword group to compile & optimize
1662       * @since 1.0.8
1663       */
1664      function optimize_keyword_group($key) {
1665          $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666              $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667          $space_as_whitespace = false;
1668          if(isset($this->language_data['PARSER_CONTROL'])) {
1669              if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671                      $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672                  }
1673                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674                      if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675                          $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676                      }
1677                  }
1678              }
1679          }
1680          if($space_as_whitespace) {
1681              foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683                      str_replace(" ", "\\s+", $rxv);
1684              }
1685          }
1686      }
1687  
1688      /**
1689       * Sets the content of the header block
1690       *
1691       * @param string The content of the header block
1692       * @since 1.0.2
1693       */
1694      function set_header_content($content) {
1695          $this->header_content = $content;
1696      }
1697  
1698      /**
1699       * Sets the content of the footer block
1700       *
1701       * @param string The content of the footer block
1702       * @since 1.0.2
1703       */
1704      function set_footer_content($content) {
1705          $this->footer_content = $content;
1706      }
1707  
1708      /**
1709       * Sets the style for the header content
1710       *
1711       * @param string The style for the header content
1712       * @since 1.0.2
1713       */
1714      function set_header_content_style($style) {
1715          $this->header_content_style = $style;
1716      }
1717  
1718      /**
1719       * Sets the style for the footer content
1720       *
1721       * @param string The style for the footer content
1722       * @since 1.0.2
1723       */
1724      function set_footer_content_style($style) {
1725          $this->footer_content_style = $style;
1726      }
1727  
1728      /**
1729       * Sets whether to force a surrounding block around
1730       * the highlighted code or not
1731       *
1732       * @param boolean Tells whether to enable or disable this feature
1733       * @since 1.0.7.20
1734       */
1735      function enable_inner_code_block($flag) {
1736          $this->force_code_block = (bool)$flag;
1737      }
1738  
1739      /**
1740       * Sets the base URL to be used for keywords
1741       *
1742       * @param int The key of the keyword group to set the URL for
1743       * @param string The URL to set for the group. If {FNAME} is in
1744       *               the url somewhere, it is replaced by the keyword
1745       *               that the URL is being made for
1746       * @since 1.0.2
1747       */
1748      function set_url_for_keyword_group($group, $url) {
1749          $this->language_data['URLS'][$group] = $url;
1750      }
1751  
1752      /**
1753       * Sets styles for links in code
1754       *
1755       * @param int A constant that specifies what state the style is being
1756       *            set for - e.g. :hover or :visited
1757       * @param string The styles to use for that state
1758       * @since 1.0.2
1759       */
1760      function set_link_styles($type, $styles) {
1761          $this->link_styles[$type] = $styles;
1762      }
1763  
1764      /**
1765       * Sets the target for links in code
1766       *
1767       * @param string The target for links in the code, e.g. _blank
1768       * @since 1.0.3
1769       */
1770      function set_link_target($target) {
1771          if (!$target) {
1772              $this->link_target = '';
1773          } else {
1774              $this->link_target = ' target="' . $target . '"';
1775          }
1776      }
1777  
1778      /**
1779       * Sets styles for important parts of the code
1780       *
1781       * @param string The styles to use on important parts of the code
1782       * @since 1.0.2
1783       */
1784      function set_important_styles($styles) {
1785          $this->important_styles = $styles;
1786      }
1787  
1788      /**
1789       * Sets whether context-important blocks are highlighted
1790       *
1791       * @param boolean Tells whether to enable or disable highlighting of important blocks
1792       * @todo REMOVE THIS SHIZ FROM GESHI!
1793       * @deprecated
1794       * @since 1.0.2
1795       */
1796      function enable_important_blocks($flag) {
1797          $this->enable_important_blocks = ( $flag ) ? true : false;
1798      }
1799  
1800      /**
1801       * Whether CSS IDs should be added to each line
1802       *
1803       * @param boolean If true, IDs will be added to each line.
1804       * @since 1.0.2
1805       */
1806      function enable_ids($flag = true) {
1807          $this->add_ids = ($flag) ? true : false;
1808      }
1809  
1810      /**
1811       * Specifies which lines to highlight extra
1812       *
1813       * The extra style parameter was added in 1.0.7.21.
1814       *
1815       * @param mixed An array of line numbers to highlight, or just a line
1816       *              number on its own.
1817       * @param string A string specifying the style to use for this line.
1818       *              If null is specified, the default style is used.
1819       *              If false is specified, the line will be removed from
1820       *              special highlighting
1821       * @since 1.0.2
1822       * @todo  Some data replication here that could be cut down on
1823       */
1824      function highlight_lines_extra($lines, $style = null) {
1825          if (is_array($lines)) {
1826              //Split up the job using single lines at a time
1827              foreach ($lines as $line) {
1828                  $this->highlight_lines_extra($line, $style);
1829              }
1830          } else {
1831              //Mark the line as being highlighted specially
1832              $lines = intval($lines);
1833              $this->highlight_extra_lines[$lines] = $lines;
1834  
1835              //Decide on which style to use
1836              if ($style === null) { //Check if we should use default style
1837                  unset($this->highlight_extra_lines_styles[$lines]);
1838              } elseif ($style === false) { //Check if to remove this line
1839                  unset($this->highlight_extra_lines[$lines]);
1840                  unset($this->highlight_extra_lines_styles[$lines]);
1841              } else {
1842                  $this->highlight_extra_lines_styles[$lines] = $style;
1843              }
1844          }
1845      }
1846  
1847      /**
1848       * Sets the style for extra-highlighted lines
1849       *
1850       * @param string The style for extra-highlighted lines
1851       * @since 1.0.2
1852       */
1853      function set_highlight_lines_extra_style($styles) {
1854          $this->highlight_extra_lines_style = $styles;
1855      }
1856  
1857      /**
1858       * Sets the line-ending
1859       *
1860       * @param string The new line-ending
1861       * @since 1.0.2
1862       */
1863      function set_line_ending($line_ending) {
1864          $this->line_ending = (string)$line_ending;
1865      }
1866  
1867      /**
1868       * Sets what number line numbers should start at. Should
1869       * be a positive integer, and will be converted to one.
1870       *
1871       * <b>Warning:</b> Using this method will add the "start"
1872       * attribute to the &lt;ol&gt; that is used for line numbering.
1873       * This is <b>not</b> valid XHTML strict, so if that's what you
1874       * care about then don't use this method. Firefox is getting
1875       * support for the CSS method of doing this in 1.1 and Opera
1876       * has support for the CSS method, but (of course) IE doesn't
1877       * so it's not worth doing it the CSS way yet.
1878       *
1879       * @param int The number to start line numbers at
1880       * @since 1.0.2
1881       */
1882      function start_line_numbers_at($number) {
1883          $this->line_numbers_start = abs(intval($number));
1884      }
1885  
1886      /**
1887       * Sets the encoding used for htmlspecialchars(), for international
1888       * support.
1889       *
1890       * NOTE: This is not needed for now because htmlspecialchars() is not
1891       * being used (it has a security hole in PHP4 that has not been patched).
1892       * Maybe in a future version it may make a return for speed reasons, but
1893       * I doubt it.
1894       *
1895       * @param string The encoding to use for the source
1896       * @since 1.0.3
1897       */
1898      function set_encoding($encoding) {
1899          if ($encoding) {
1900            $this->encoding = strtolower($encoding);
1901          }
1902      }
1903  
1904      /**
1905       * Turns linking of keywords on or off.
1906       *
1907       * @param boolean If true, links will be added to keywords
1908       * @since 1.0.2
1909       */
1910      function enable_keyword_links($enable = true) {
1911          $this->keyword_links = (bool) $enable;
1912      }
1913  
1914      /**
1915       * Setup caches needed for styling. This is automatically called in
1916       * parse_code() and get_stylesheet() when appropriate. This function helps
1917       * stylesheet generators as they rely on some style information being
1918       * preprocessed
1919       *
1920       * @since 1.0.8
1921       * @access private
1922       */
1923      function build_style_cache() {
1924          //Build the style cache needed to highlight numbers appropriate
1925          if($this->lexic_permissions['NUMBERS']) {
1926              //First check what way highlighting information for numbers are given
1927              if(!isset($this->language_data['NUMBERS'])) {
1928                  $this->language_data['NUMBERS'] = 0;
1929              }
1930  
1931              if(is_array($this->language_data['NUMBERS'])) {
1932                  $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1933              } else {
1934                  $this->language_data['NUMBERS_CACHE'] = array();
1935                  if(!$this->language_data['NUMBERS']) {
1936                      $this->language_data['NUMBERS'] =
1937                          GESHI_NUMBER_INT_BASIC |
1938                          GESHI_NUMBER_FLT_NONSCI;
1939                  }
1940  
1941                  for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1942                      //Rearrange style indices if required ...
1943                      if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1944                          $this->language_data['STYLES']['NUMBERS'][$i] =
1945                              $this->language_data['STYLES']['NUMBERS'][1<<$i];
1946                          unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1947                      }
1948  
1949                      //Check if this bit is set for highlighting
1950                      if($j&1) {
1951                          //So this bit is set ...
1952                          //Check if it belongs to group 0 or the actual stylegroup
1953                          if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1954                              $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1955                          } else {
1956                              if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1957                                  $this->language_data['NUMBERS_CACHE'][0] = 0;
1958                              }
1959                              $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1960                          }
1961                      }
1962                  }
1963              }
1964          }
1965      }
1966  
1967      /**
1968       * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1969       * This function makes stylesheet generators much faster as they do not need these caches.
1970       *
1971       * @since 1.0.8
1972       * @access private
1973       */
1974      function build_parse_cache() {
1975          // cache symbol regexp
1976          //As this is a costy operation, we avoid doing it for multiple groups ...
1977          //Instead we perform it for all symbols at once.
1978          //
1979          //For this to work, we need to reorganize the data arrays.
1980          if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1981              $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1982  
1983              $this->language_data['SYMBOL_DATA'] = array();
1984              $symbol_preg_multi = array(); // multi char symbols
1985              $symbol_preg_single = array(); // single char symbols
1986              foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1987                  if (is_array($symbols)) {
1988                      foreach ($symbols as $sym) {
1989                          $sym = $this->hsc($sym);
1990                          if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1991                              $this->language_data['SYMBOL_DATA'][$sym] = $key;
1992                              if (isset($sym[1])) { // multiple chars
1993                                  $symbol_preg_multi[] = preg_quote($sym, '/');
1994                              } else { // single char
1995                                  if ($sym == '-') {
1996                                      // don't trigger range out of order error
1997                                      $symbol_preg_single[] = '\-';
1998                                  } else {
1999                                      $symbol_preg_single[] = preg_quote($sym, '/');
2000                                  }
2001                              }
2002                          }
2003                      }
2004                  } else {
2005                      $symbols = $this->hsc($symbols);
2006                      if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2007                          $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2008                          if (isset($symbols[1])) { // multiple chars
2009                              $symbol_preg_multi[] = preg_quote($symbols, '/');
2010                          } elseif ($symbols == '-') {
2011                              // don't trigger range out of order error
2012                              $symbol_preg_single[] = '\-';
2013                          } else { // single char
2014                              $symbol_preg_single[] = preg_quote($symbols, '/');
2015                          }
2016                      }
2017                  }
2018              }
2019  
2020              //Now we have an array with each possible symbol as the key and the style as the actual data.
2021              //This way we can set the correct style just the moment we highlight ...
2022              //
2023              //Now we need to rewrite our array to get a search string that
2024              $symbol_preg = array();
2025              if (!empty($symbol_preg_multi)) {
2026                  rsort($symbol_preg_multi);
2027                  $symbol_preg[] = implode('|', $symbol_preg_multi);
2028              }
2029              if (!empty($symbol_preg_single)) {
2030                  rsort($symbol_preg_single);
2031                  $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2032              }
2033              $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2034          }
2035  
2036          // cache optimized regexp for keyword matching
2037          // remove old cache
2038          $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2039          foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2040              if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2041                      $this->lexic_permissions['KEYWORDS'][$key]) {
2042                  $this->optimize_keyword_group($key);
2043              }
2044          }
2045  
2046          // brackets
2047          if ($this->lexic_permissions['BRACKETS']) {
2048              $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2049              if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2050                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2051                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2052                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2053                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2054                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2055                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2056                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2057                  );
2058              }
2059              else {
2060                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2061                      '<| class="br0">&#91;|>',
2062                      '<| class="br0">&#93;|>',
2063                      '<| class="br0">&#40;|>',
2064                      '<| class="br0">&#41;|>',
2065                      '<| class="br0">&#123;|>',
2066                      '<| class="br0">&#125;|>',
2067                  );
2068              }
2069          }
2070  
2071          //Build the parse cache needed to highlight numbers appropriate
2072          if($this->lexic_permissions['NUMBERS']) {
2073              //Check if the style rearrangements have been processed ...
2074              //This also does some preprocessing to check which style groups are useable ...
2075              if(!isset($this->language_data['NUMBERS_CACHE'])) {
2076                  $this->build_style_cache();
2077              }
2078  
2079              //Number format specification
2080              //All this formats are matched case-insensitively!
2081              static $numbers_format = array(
2082                  GESHI_NUMBER_INT_BASIC =>
2083                      '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2084                  GESHI_NUMBER_INT_CSTYLE =>
2085                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2086                  GESHI_NUMBER_BIN_SUFFIX =>
2087                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2088                  GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2089                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2090                  GESHI_NUMBER_BIN_PREFIX_0B =>
2091                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2092                  GESHI_NUMBER_OCT_PREFIX =>
2093                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2094                  GESHI_NUMBER_OCT_PREFIX_0O =>
2095                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2096                  GESHI_NUMBER_OCT_PREFIX_AT =>
2097                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2098                  GESHI_NUMBER_OCT_SUFFIX =>
2099                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2100                  GESHI_NUMBER_HEX_PREFIX =>
2101                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2102                  GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2103                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2104                  GESHI_NUMBER_HEX_SUFFIX =>
2105                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2106                  GESHI_NUMBER_FLT_NONSCI =>
2107                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2108                  GESHI_NUMBER_FLT_NONSCI_F =>
2109                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2110                  GESHI_NUMBER_FLT_SCI_SHORT =>
2111                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2112                  GESHI_NUMBER_FLT_SCI_ZERO =>
2113                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2114                  );
2115  
2116              //At this step we have an associative array with flag groups for a
2117              //specific style or an string denoting a regexp given its index.
2118              $this->language_data['NUMBERS_RXCACHE'] = array();
2119              foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2120                  if(is_string($rxdata)) {
2121                      $regexp = $rxdata;
2122                  } else {
2123                      //This is a bitfield of number flags to highlight:
2124                      //Build an array, implode them together and make this the actual RX
2125                      $rxuse = array();
2126                      for($i = 1; $i <= $rxdata; $i<<=1) {
2127                          if($rxdata & $i) {
2128                              $rxuse[] = $numbers_format[$i];
2129                          }
2130                      }
2131                      $regexp = implode("|", $rxuse);
2132                  }
2133  
2134                  $this->language_data['NUMBERS_RXCACHE'][$key] =
2135                      "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2136              }
2137  
2138              if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2139                  $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2140              }
2141          }
2142  
2143          $this->parse_cache_built = true;
2144      }
2145  
2146      /**
2147       * Returns the code in $this->source, highlighted and surrounded by the
2148       * nessecary HTML.
2149       *
2150       * This should only be called ONCE, cos it's SLOW! If you want to highlight
2151       * the same source multiple times, you're better off doing a whole lot of
2152       * str_replaces to replace the &lt;span&gt;s
2153       *
2154       * @since 1.0.0
2155       */
2156      function parse_code () {
2157          // Start the timer
2158          $start_time = microtime();
2159  
2160          // Replace all newlines to a common form.
2161          $code = str_replace("\r\n", "\n", $this->source);
2162          $code = str_replace("\r", "\n", $code);
2163  
2164          // Firstly, if there is an error, we won't highlight
2165          if ($this->error) {
2166              //Escape the source for output
2167              $result = $this->hsc($this->source);
2168  
2169              //This fix is related to SF#1923020, but has to be applied regardless of
2170              //actually highlighting symbols.
2171              $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2172  
2173              // Timing is irrelevant
2174              $this->set_time($start_time, $start_time);
2175              $this->finalise($result);
2176              return $result;
2177          }
2178  
2179          // make sure the parse cache is up2date
2180          if (!$this->parse_cache_built) {
2181              $this->build_parse_cache();
2182          }
2183  
2184          // Initialise various stuff
2185          $length           = strlen($code);
2186          $COMMENT_MATCHED  = false;
2187          $stuff_to_parse   = '';
2188          $endresult        = '';
2189  
2190          // "Important" selections are handled like multiline comments
2191          // @todo GET RID OF THIS SHIZ
2192          if ($this->enable_important_blocks) {
2193              $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2194          }
2195  
2196          if ($this->strict_mode) {
2197              // Break the source into bits. Each bit will be a portion of the code
2198              // within script delimiters - for example, HTML between < and >
2199              $k = 0;
2200              $parts = array();
2201              $matches = array();
2202              $next_match_pointer = null;
2203              // we use a copy to unset delimiters on demand (when they are not found)
2204              $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2205              $i = 0;
2206              while ($i < $length) {
2207                  $next_match_pos = $length + 1; // never true
2208                  foreach ($delim_copy as $dk => $delimiters) {
2209                      if(is_array($delimiters)) {
2210                          foreach ($delimiters as $open => $close) {
2211                              // make sure the cache is setup properly
2212                              if (!isset($matches[$dk][$open])) {
2213                                  $matches[$dk][$open] = array(
2214                                      'next_match' => -1,
2215                                      'dk' => $dk,
2216  
2217                                      'open' => $open, // needed for grouping of adjacent code blocks (see below)
2218                                      'open_strlen' => strlen($open),
2219  
2220                                      'close' => $close,
2221                                      'close_strlen' => strlen($close),
2222                                  );
2223                              }
2224                              // Get the next little bit for this opening string
2225                              if ($matches[$dk][$open]['next_match'] < $i) {
2226                                  // only find the next pos if it was not already cached
2227                                  $open_pos = strpos($code, $open, $i);
2228                                  if ($open_pos === false) {
2229                                      // no match for this delimiter ever
2230                                      unset($delim_copy[$dk][$open]);
2231                                      continue;
2232                                  }
2233                                  $matches[$dk][$open]['next_match'] = $open_pos;
2234                              }
2235                              if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2236                                  //So we got a new match, update the close_pos
2237                                  $matches[$dk][$open]['close_pos'] =
2238                                      strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2239  
2240                                  $next_match_pointer =& $matches[$dk][$open];
2241                                  $next_match_pos = $matches[$dk][$open]['next_match'];
2242                              }
2243                          }
2244                      } else {
2245                          //So we should match an RegExp as Strict Block ...
2246                          /**
2247                           * The value in $delimiters is expected to be an RegExp
2248                           * containing exactly 2 matching groups:
2249                           *  - Group 1 is the opener
2250                           *  - Group 2 is the closer
2251                           */
2252                          if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2253                              preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2254                              //We got a match ...
2255                              if(isset($matches_rx['start']) && isset($matches_rx['end']))
2256                              {
2257                                  $matches[$dk] = array(
2258                                      'next_match' => $matches_rx['start'][1],
2259                                      'dk' => $dk,
2260  
2261                                      'close_strlen' => strlen($matches_rx['end'][0]),
2262                                      'close_pos' => $matches_rx['end'][1],
2263                                      );
2264                              } else {
2265                                  $matches[$dk] = array(
2266                                      'next_match' => $matches_rx[1][1],
2267                                      'dk' => $dk,
2268  
2269                                      'close_strlen' => strlen($matches_rx[2][0]),
2270                                      'close_pos' => $matches_rx[2][1],
2271                                      );
2272                              }
2273                          } else {
2274                              // no match for this delimiter ever
2275                              unset($delim_copy[$dk]);
2276                              continue;
2277                          }
2278  
2279                          if ($matches[$dk]['next_match'] <= $next_match_pos) {
2280                              $next_match_pointer =& $matches[$dk];
2281                              $next_match_pos = $matches[$dk]['next_match'];
2282                          }
2283                      }
2284                  }
2285  
2286                  // non-highlightable text
2287                  $parts[$k] = array(
2288                      1 => substr($code, $i, $next_match_pos - $i)
2289                  );
2290                  ++$k;
2291  
2292                  if ($next_match_pos > $length) {
2293                      // out of bounds means no next match was found
2294                      break;
2295                  }
2296  
2297                  // highlightable code
2298                  $parts[$k][0] = $next_match_pointer['dk'];
2299  
2300                  //Only combine for non-rx script blocks
2301                  if(is_array($delim_copy[$next_match_pointer['dk']])) {
2302                      // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2303                      $i = $next_match_pos + $next_match_pointer['open_strlen'];
2304                      while (true) {
2305                          $close_pos = strpos($code, $next_match_pointer['close'], $i);
2306                          if ($close_pos == false) {
2307                              break;
2308                          }
2309                          $i = $close_pos + $next_match_pointer['close_strlen'];
2310                          if ($i == $length) {
2311                              break;
2312                          }
2313                          if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2314                              substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2315                              // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2316                              foreach ($matches as $submatches) {
2317                                  foreach ($submatches as $match) {
2318                                      if ($match['next_match'] == $i) {
2319                                          // a different block already matches here!
2320                                          break 3;
2321                                      }
2322                                  }
2323                              }
2324                          } else {
2325                              break;
2326                          }
2327                      }
2328                  } else {
2329                      $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2330                      $i = $close_pos;
2331                  }
2332  
2333                  if ($close_pos === false) {
2334                      // no closing delimiter found!
2335                      $parts[$k][1] = substr($code, $next_match_pos);
2336                      ++$k;
2337                      break;
2338                  } else {
2339                      $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2340                      ++$k;
2341                  }
2342              }
2343              unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2344              $num_parts = $k;
2345  
2346              if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2347                  // when we have only one part, we don't have anything to highlight at all.
2348                  // if we have a "maybe" strict language, this should be handled as highlightable code
2349                  $parts = array(
2350                      0 => array(
2351                          0 => '',
2352                          1 => ''
2353                      ),
2354                      1 => array(
2355                          0 => null,
2356                          1 => $parts[0][1]
2357                      )
2358                  );
2359                  $num_parts = 2;
2360              }
2361  
2362          } else {
2363              // Not strict mode - simply dump the source into
2364              // the array at index 1 (the first highlightable block)
2365              $parts = array(
2366                  0 => array(
2367                      0 => '',
2368                      1 => ''
2369                  ),
2370                  1 => array(
2371                      0 => null,
2372                      1 => $code
2373                  )
2374              );
2375              $num_parts = 2;
2376          }
2377  
2378          //Unset variables we won't need any longer
2379          unset($code);
2380  
2381          //Preload some repeatedly used values regarding hardquotes ...
2382          $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2383          $hq_strlen = strlen($hq);
2384  
2385          //Preload if line numbers are to be generated afterwards
2386          //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2387          $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2388              !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2389  
2390          //preload the escape char for faster checking ...
2391          $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2392  
2393          // this is used for single-line comments
2394          $sc_disallowed_before = "";
2395          $sc_disallowed_after = "";
2396  
2397          if (isset($this->language_data['PARSER_CONTROL'])) {
2398              if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2399                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2400                      $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2401                  }
2402                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2403                      $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2404                  }
2405              }
2406          }
2407  
2408          //Fix for SF#1932083: Multichar Quotemarks unsupported
2409          $is_string_starter = array();
2410          if ($this->lexic_permissions['STRINGS']) {
2411              foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2412                  if (!isset($is_string_starter[$quotemark[0]])) {
2413                      $is_string_starter[$quotemark[0]] = (string)$quotemark;
2414                  } elseif (is_string($is_string_starter[$quotemark[0]])) {
2415                      $is_string_starter[$quotemark[0]] = array(
2416                          $is_string_starter[$quotemark[0]],
2417                          $quotemark);
2418                  } else {
2419                      $is_string_starter[$quotemark[0]][] = $quotemark;
2420                  }
2421              }
2422          }
2423  
2424          // Now we go through each part. We know that even-indexed parts are
2425          // code that shouldn't be highlighted, and odd-indexed parts should
2426          // be highlighted
2427          for ($key = 0; $key < $num_parts; ++$key) {
2428              $STRICTATTRS = '';
2429  
2430              // If this block should be highlighted...
2431              if (!($key & 1)) {
2432                  // Else not a block to highlight
2433                  $endresult .= $this->hsc($parts[$key][1]);
2434                  unset($parts[$key]);
2435                  continue;
2436              }
2437  
2438              $result = '';
2439              $part = $parts[$key][1];
2440  
2441              $highlight_part = true;
2442              if ($this->strict_mode && !is_null($parts[$key][0])) {
2443                  // get the class key for this block of code
2444                  $script_key = $parts[$key][0];
2445                  $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2446                  if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2447                      $this->lexic_permissions['SCRIPT']) {
2448                      // Add a span element around the source to
2449                      // highlight the overall source block
2450                      if (!$this->use_classes &&
2451                          $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2452                          $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2453                      } else {
2454                          $attributes = ' class="sc' . $script_key . '"';
2455                      }
2456                      $result .= "<span$attributes>";
2457                      $STRICTATTRS = $attributes;
2458                  }
2459              }
2460  
2461              if ($highlight_part) {
2462                  // Now, highlight the code in this block. This code
2463                  // is really the engine of GeSHi (along with the method
2464                  // parse_non_string_part).
2465  
2466                  // cache comment regexps incrementally
2467                  $next_comment_regexp_key = '';
2468                  $next_comment_regexp_pos = -1;
2469                  $next_comment_multi_pos = -1;
2470                  $next_comment_single_pos = -1;
2471                  $comment_regexp_cache_per_key = array();
2472                  $comment_multi_cache_per_key = array();
2473                  $comment_single_cache_per_key = array();
2474                  $next_open_comment_multi = '';
2475                  $next_comment_single_key = '';
2476                  $escape_regexp_cache_per_key = array();
2477                  $next_escape_regexp_key = '';
2478                  $next_escape_regexp_pos = -1;
2479  
2480                  $length = strlen($part);
2481                  for ($i = 0; $i < $length; ++$i) {
2482                      // Get the next char
2483                      $char = $part[$i];
2484                      $char_len = 1;
2485  
2486                      // update regexp comment cache if needed
2487                      if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2488                          $next_comment_regexp_pos = $length;
2489                          foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2490                              $match_i = false;
2491                              if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2492                                  ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2493                                   $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2494                                  // we have already matched something
2495                                  if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2496                                      // this comment is never matched
2497                                      continue;
2498                                  }
2499                                  $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2500                              } elseif (
2501                                  //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2502                                  (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2503                                  (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2504                                  ) {
2505                                  $match_i = $match[0][1];
2506                                  if (GESHI_PHP_PRE_433) {
2507                                      $match_i += $i;
2508                                  }
2509  
2510                                  $comment_regexp_cache_per_key[$comment_key] = array(
2511                                      'key' => $comment_key,
2512                                      'length' => strlen($match[0][0]),
2513                                      'pos' => $match_i
2514                                  );
2515                              } else {
2516                                  $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517                                  continue;
2518                              }
2519  
2520                              if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521                                  $next_comment_regexp_pos = $match_i;
2522                                  $next_comment_regexp_key = $comment_key;
2523                                  if ($match_i === $i) {
2524                                      break;
2525                                  }
2526                              }
2527                          }
2528                      }
2529  
2530                      $string_started = false;
2531  
2532                      if (isset($is_string_starter[$char])) {
2533                          // Possibly the start of a new string ...
2534  
2535                          //Check which starter it was ...
2536                          //Fix for SF#1932083: Multichar Quotemarks unsupported
2537                          if (is_array($is_string_starter[$char])) {
2538                              $char_new = '';
2539                              foreach ($is_string_starter[$char] as $testchar) {
2540                                  if ($testchar === substr($part, $i, strlen($testchar)) &&
2541                                      strlen($testchar) > strlen($char_new)) {
2542                                      $char_new = $testchar;
2543                                      $string_started = true;
2544                                  }
2545                              }
2546                              if ($string_started) {
2547                                  $char = $char_new;
2548                              }
2549                          } else {
2550                              $testchar = $is_string_starter[$char];
2551                              if ($testchar === substr($part, $i, strlen($testchar))) {
2552                                  $char = $testchar;
2553                                  $string_started = true;
2554                              }
2555                          }
2556                          $char_len = strlen($char);
2557                      }
2558  
2559                      if ($string_started && ($i != $next_comment_regexp_pos)) {
2560                          // Hand out the correct style information for this string
2561                          $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562                          if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563                              !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564                              $string_key = 0;
2565                          }
2566  
2567                          // parse the stuff before this
2568                          $result .= $this->parse_non_string_part($stuff_to_parse);
2569                          $stuff_to_parse = '';
2570  
2571                          if (!$this->use_classes) {
2572                              $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573                          } else {
2574                              $string_attributes = ' class="st'.$string_key.'"';
2575                          }
2576  
2577                          // now handle the string
2578                          $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579                          $start = $i + $char_len;
2580                          $string_open = true;
2581  
2582                          if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583                              $next_escape_regexp_pos = $length;
2584                          }
2585  
2586                          do {
2587                              //Get the regular ending pos ...
2588                              $close_pos = strpos($part, $char, $start);
2589                              if(false === $close_pos) {
2590                                  $close_pos = $length;
2591                              }
2592  
2593                              if($this->lexic_permissions['ESCAPE_CHAR']) {
2594                                  // update escape regexp cache if needed
2595                                  if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596                                      $next_escape_regexp_pos = $length;
2597                                      foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598                                          $match_i = false;
2599                                          if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600                                              ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601                                               $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602                                              // we have already matched something
2603                                              if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604                                                  // this comment is never matched
2605                                                  continue;
2606                                              }
2607                                              $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608                                          } elseif (
2609                                              //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2610                                              (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2611                                              (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2612                                              ) {
2613                                              $match_i = $match[0][1];
2614                                              if (GESHI_PHP_PRE_433) {
2615                                                  $match_i += $start;
2616                                              }
2617  
2618                                              $escape_regexp_cache_per_key[$escape_key] = array(
2619                                                  'key' => $escape_key,
2620                                                  'length' => strlen($match[0][0]),
2621                                                  'pos' => $match_i
2622                                              );
2623                                          } else {
2624                                              $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2625                                              continue;
2626                                          }
2627  
2628                                          if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2629                                              $next_escape_regexp_pos = $match_i;
2630                                              $next_escape_regexp_key = $escape_key;
2631                                              if ($match_i === $start) {
2632                                                  break;
2633                                              }
2634                                          }
2635                                      }
2636                                  }
2637  
2638                                  //Find the next simple escape position
2639                                  if('' != $this->language_data['ESCAPE_CHAR']) {
2640                                      $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2641                                      if(false === $simple_escape) {
2642                                          $simple_escape = $length;
2643                                      }
2644                                  } else {
2645                                      $simple_escape = $length;
2646                                  }
2647                              } else {
2648                                  $next_escape_regexp_pos = $length;
2649                                  $simple_escape = $length;
2650                              }
2651  
2652                              if($simple_escape < $next_escape_regexp_pos &&
2653                                  $simple_escape < $length &&
2654                                  $simple_escape < $close_pos) {
2655                                  //The nexxt escape sequence is a simple one ...
2656                                  $es_pos = $simple_escape;
2657  
2658                                  //Add the stuff not in the string yet ...
2659                                  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2660  
2661                                  //Get the style for this escaped char ...
2662                                  if (!$this->use_classes) {
2663                                      $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2664                                  } else {
2665                                      $escape_char_attributes = ' class="es0"';
2666                                  }
2667  
2668                                  //Add the style for the escape char ...
2669                                  $string .= "<span$escape_char_attributes>" .
2670                                      GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2671  
2672                                  //Get the byte AFTER the ESCAPE_CHAR we just found
2673                                  $es_char = $part[$es_pos + 1];
2674                                  if ($es_char == "\n") {
2675                                      // don't put a newline around newlines
2676                                      $string .= "</span>\n";
2677                                      $start = $es_pos + 2;
2678                                  } elseif (ord($es_char) >= 128) {
2679                                      //This is an non-ASCII char (UTF8 or single byte)
2680                                      //This code tries to work around SF#2037598 ...
2681                                      if(function_exists('mb_substr')) {
2682                                          $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2683                                          $string .= $es_char_m . '</span>';
2684                                      } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2685                                          if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2686                                              "|\xE0[\xA0-\xBF][\x80-\xBF]".
2687                                              "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2688                                              "|\xED[\x80-\x9F][\x80-\xBF]".
2689                                              "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2690                                              "|[\xF1-\xF3][\x80-\xBF]{3}".
2691                                              "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2692                                              $part, $es_char_m, null, $es_pos + 1)) {
2693                                              $es_char_m = $es_char_m[0];
2694                                          } else {
2695                                              $es_char_m = $es_char;
2696                                          }
2697                                          $string .= $this->hsc($es_char_m) . '</span>';
2698                                      } else {
2699                                          $es_char_m = $this->hsc($es_char);
2700                                      }
2701                                      $start = $es_pos + strlen($es_char_m) + 1;
2702                                  } else {
2703                                      $string .= $this->hsc($es_char) . '</span>';
2704                                      $start = $es_pos + 2;
2705                                  }
2706                              } elseif ($next_escape_regexp_pos < $length &&
2707                                  $next_escape_regexp_pos < $close_pos) {
2708                                  $es_pos = $next_escape_regexp_pos;
2709                                  //Add the stuff not in the string yet ...
2710                                  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2711  
2712                                  //Get the key and length of this match ...
2713                                  $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2714                                  $escape_str = substr($part, $es_pos, $escape['length']);
2715                                  $escape_key = $escape['key'];
2716  
2717                                  //Get the style for this escaped char ...
2718                                  if (!$this->use_classes) {
2719                                      $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2720                                  } else {
2721                                      $escape_char_attributes = ' class="es' . $escape_key . '"';
2722                                  }
2723  
2724                                  //Add the style for the escape char ...
2725                                  $string .= "<span$escape_char_attributes>" .
2726                                      $this->hsc($escape_str) . '</span>';
2727  
2728                                  $start = $es_pos + $escape['length'];
2729                              } else {
2730                                  //Copy the remainder of the string ...
2731                                  $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2732                                  $start = $close_pos + $char_len;
2733                                  $string_open = false;
2734                              }
2735                          } while($string_open);
2736  
2737                          if ($check_linenumbers) {
2738                              // Are line numbers used? If, we should end the string before
2739                              // the newline and begin it again (so when <li>s are put in the source
2740                              // remains XHTML compliant)
2741                              // note to self: This opens up possibility of config files specifying
2742                              // that languages can/cannot have multiline strings???
2743                              $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2744                          }
2745  
2746                          $result .= $string;
2747                          $string = '';
2748                          $i = $start - 1;
2749                          continue;
2750                      } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2751                          substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2752                          // The start of a hard quoted string
2753                          if (!$this->use_classes) {
2754                              $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2755                              $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2756                          } else {
2757                              $string_attributes = ' class="st_h"';
2758                              $escape_char_attributes = ' class="es_h"';
2759                          }
2760                          // parse the stuff before this
2761                          $result .= $this->parse_non_string_part($stuff_to_parse);
2762                          $stuff_to_parse = '';
2763  
2764                          // now handle the string
2765                          $string = '';
2766  
2767                          // look for closing quote
2768                          $start = $i + $hq_strlen;
2769                          while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2770                              $start = $close_pos + 1;
2771                              if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2772                                  (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2773                                  // make sure this quote is not escaped
2774                                  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2775                                      if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2776                                          // check wether this quote is escaped or if it is something like '\\'
2777                                          $escape_char_pos = $close_pos - 1;
2778                                          while ($escape_char_pos > 0
2779                                                  && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2780                                              --$escape_char_pos;
2781                                          }
2782                                          if (($close_pos - $escape_char_pos) & 1) {
2783                                              // uneven number of escape chars => this quote is escaped
2784                                              continue 2;
2785                                          }
2786                                      }
2787                                  }
2788                              }
2789  
2790                              // found closing quote
2791                              break;
2792                          }
2793  
2794                          //Found the closing delimiter?
2795                          if (!$close_pos) {
2796                              // span till the end of this $part when no closing delimiter is found
2797                              $close_pos = $length;
2798                          }
2799  
2800                          //Get the actual string
2801                          $string = substr($part, $i, $close_pos - $i + 1);
2802                          $i = $close_pos;
2803  
2804                          // handle escape chars and encode html chars
2805                          // (special because when we have escape chars within our string they may not be escaped)
2806                          if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2807                              $start = 0;
2808                              $new_string = '';
2809                              while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2810                                  // hmtl escape stuff before
2811                                  $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2812                                  // check if this is a hard escape
2813                                  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2814                                      if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2815                                          // indeed, this is a hardescape
2816                                          $new_string .= "<span$escape_char_attributes>" .
2817                                              $this->hsc($hardescape) . '</span>';
2818                                          $start = $es_pos + strlen($hardescape);
2819                                          continue 2;
2820                                      }
2821                                  }
2822                                  // not a hard escape, but a normal escape
2823                                  // they come in pairs of two
2824                                  $c = 0;
2825                                  while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2826                                      && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2827                                      && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2828                                      $c += 2;
2829                                  }
2830                                  if ($c) {
2831                                      $new_string .= "<span$escape_char_attributes>" .
2832                                          str_repeat($escaped_escape_char, $c) .
2833                                          '</span>';
2834                                      $start = $es_pos + $c;
2835                                  } else {
2836                                      // this is just a single lonely escape char...
2837                                      $new_string .= $escaped_escape_char;
2838                                      $start = $es_pos + 1;
2839                                  }
2840                              }
2841                              $string = $new_string . $this->hsc(substr($string, $start));
2842                          } else {
2843                              $string = $this->hsc($string);
2844                          }
2845  
2846                          if ($check_linenumbers) {
2847                              // Are line numbers used? If, we should end the string before
2848                              // the newline and begin it again (so when <li>s are put in the source
2849                              // remains XHTML compliant)
2850                              // note to self: This opens up possibility of config files specifying
2851                              // that languages can/cannot have multiline strings???
2852                              $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2853                          }
2854  
2855                          $result .= "<span$string_attributes>" . $string . '</span>';
2856                          $string = '';
2857                          continue;
2858                      } else {
2859                          //Have a look for regexp comments
2860                          if ($i == $next_comment_regexp_pos) {
2861                              $COMMENT_MATCHED = true;
2862                              $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2863                              $test_str = $this->hsc(substr($part, $i, $comment['length']));
2864  
2865                              //@todo If remove important do remove here
2866                              if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2867                                  if (!$this->use_classes) {
2868                                      $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2869                                  } else {
2870                                      $attributes = ' class="co' . $comment['key'] . '"';
2871                                  }
2872  
2873                                  $test_str = "<span$attributes>" . $test_str . "</span>";
2874  
2875                                  // Short-cut through all the multiline code
2876                                  if ($check_linenumbers) {
2877                                      // strreplace to put close span and open span around multiline newlines
2878                                      $test_str = str_replace(
2879                                          "\n", "</span>\n<span$attributes>",
2880                                          str_replace("\n ", "\n&nbsp;", $test_str)
2881                                      );
2882                                  }
2883                              }
2884  
2885                              $i += $comment['length'] - 1;
2886  
2887                              // parse the rest
2888                              $result .= $this->parse_non_string_part($stuff_to_parse);
2889                              $stuff_to_parse = '';
2890                          }
2891  
2892                          // If we haven't matched a regexp comment, try multi-line comments
2893                          if (!$COMMENT_MATCHED) {
2894                              // Is this a multiline comment?
2895                              if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2896                                  $next_comment_multi_pos = $length;
2897                                  foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2898                                      $match_i = false;
2899                                      if (isset($comment_multi_cache_per_key[$open]) &&
2900                                          ($comment_multi_cache_per_key[$open] >= $i ||
2901                                           $comment_multi_cache_per_key[$open] === false)) {
2902                                          // we have already matched something
2903                                          if ($comment_multi_cache_per_key[$open] === false) {
2904                                              // this comment is never matched
2905                                              continue;
2906                                          }
2907                                          $match_i = $comment_multi_cache_per_key[$open];
2908                                      } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2909                                          $comment_multi_cache_per_key[$open] = $match_i;
2910                                      } else {
2911                                          $comment_multi_cache_per_key[$open] = false;
2912                                          continue;
2913                                      }
2914                                      if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2915                                          $next_comment_multi_pos = $match_i;
2916                                          $next_open_comment_multi = $open;
2917                                          if ($match_i === $i) {
2918                                              break;
2919                                          }
2920                                      }
2921                                  }
2922                              }
2923                              if ($i == $next_comment_multi_pos) {
2924                                  $open = $next_open_comment_multi;
2925                                  $close = $this->language_data['COMMENT_MULTI'][$open];
2926                                  $open_strlen = strlen($open);
2927                                  $close_strlen = strlen($close);
2928                                  $COMMENT_MATCHED = true;
2929                                  $test_str_match = $open;
2930                                  //@todo If remove important do remove here
2931                                  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2932                                      $open == GESHI_START_IMPORTANT) {
2933                                      if ($open != GESHI_START_IMPORTANT) {
2934                                          if (!$this->use_classes) {
2935                                              $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2936                                          } else {
2937                                              $attributes = ' class="coMULTI"';
2938                                          }
2939                                          $test_str = "<span$attributes>" . $this->hsc($open);
2940                                      } else {
2941                                          if (!$this->use_classes) {
2942                                              $attributes = ' style="' . $this->important_styles . '"';
2943                                          } else {
2944                                              $attributes = ' class="imp"';
2945                                          }
2946  
2947                                          // We don't include the start of the comment if it's an
2948                                          // "important" part
2949                                          $test_str = "<span$attributes>";
2950                                      }
2951                                  } else {
2952                                      $test_str = $this->hsc($open);
2953                                  }
2954  
2955                                  $close_pos = strpos( $part, $close, $i + $open_strlen );
2956  
2957                                  if ($close_pos === false) {
2958                                      $close_pos = $length;
2959                                  }
2960  
2961                                  // Short-cut through all the multiline code
2962                                  $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2963                                  if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2964                                      $test_str_match == GESHI_START_IMPORTANT) &&
2965                                      $check_linenumbers) {
2966  
2967                                      // strreplace to put close span and open span around multiline newlines
2968                                      $test_str .= str_replace(
2969                                          "\n", "</span>\n<span$attributes>",
2970                                          str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2971                                      );
2972                                  } else {
2973                                      $test_str .= $rest_of_comment;
2974                                  }
2975  
2976                                  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2977                                      $test_str_match == GESHI_START_IMPORTANT) {
2978                                      $test_str .= '</span>';
2979                                  }
2980  
2981                                  $i = $close_pos + $close_strlen - 1;
2982  
2983                                  // parse the rest
2984                                  $result .= $this->parse_non_string_part($stuff_to_parse);
2985                                  $stuff_to_parse = '';
2986                              }
2987                          }
2988  
2989                          // If we haven't matched a multiline comment, try single-line comments
2990                          if (!$COMMENT_MATCHED) {
2991                              // cache potential single line comment occurances
2992                              if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2993                                  $next_comment_single_pos = $length;
2994                                  foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2995                                      $match_i = false;
2996                                      if (isset($comment_single_cache_per_key[$comment_key]) &&
2997                                          ($comment_single_cache_per_key[$comment_key] >= $i ||
2998                                           $comment_single_cache_per_key[$comment_key] === false)) {
2999                                          // we have already matched something
3000                                          if ($comment_single_cache_per_key[$comment_key] === false) {
3001                                              // this comment is never matched
3002                                              continue;
3003                                          }
3004                                          $match_i = $comment_single_cache_per_key[$comment_key];
3005                                      } elseif (
3006                                          // case sensitive comments
3007                                          ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3008                                          ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3009                                          // non case sensitive
3010                                          (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3011                                            (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3012                                          $comment_single_cache_per_key[$comment_key] = $match_i;
3013                                      } else {
3014                                          $comment_single_cache_per_key[$comment_key] = false;
3015                                          continue;
3016                                      }
3017                                      if ($match_i !== false && $match_i < $next_comment_single_pos) {
3018                                          $next_comment_single_pos = $match_i;
3019                                          $next_comment_single_key = $comment_key;
3020                                          if ($match_i === $i) {
3021                                              break;
3022                                          }
3023                                      }
3024                                  }
3025                              }
3026                              if ($next_comment_single_pos == $i) {
3027                                  $comment_key = $next_comment_single_key;
3028                                  $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3029                                  $com_len = strlen($comment_mark);
3030  
3031                                  // This check will find special variables like $# in bash
3032                                  // or compiler directives of Delphi beginning {$
3033                                  if ((empty($sc_disallowed_before) || ($i == 0) ||
3034                                      (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3035                                      (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3036                                      (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3037                                  {
3038                                      // this is a valid comment
3039                                      $COMMENT_MATCHED = true;
3040                                      if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3041                                          if (!$this->use_classes) {
3042                                              $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3043                                          } else {
3044                                              $attributes = ' class="co' . $comment_key . '"';
3045                                          }
3046                                          $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3047                                      } else {
3048                                          $test_str = $this->hsc($comment_mark);
3049                                      }
3050  
3051                                      //Check if this comment is the last in the source
3052                                      $close_pos = strpos($part, "\n", $i);
3053                                      $oops = false;
3054                                      if ($close_pos === false) {
3055                                          $close_pos = $length;
3056                                          $oops = true;
3057                                      }
3058                                      $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3059                                      if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3060                                          $test_str .= "</span>";
3061                                      }
3062  
3063                                      // Take into account that the comment might be the last in the source
3064                                      if (!$oops) {
3065                                        $test_str .= "\n";
3066                                      }
3067  
3068                                      $i = $close_pos;
3069  
3070                                      // parse the rest
3071                                      $result .= $this->parse_non_string_part($stuff_to_parse);
3072                                      $stuff_to_parse = '';
3073                                  }
3074                              }
3075                          }
3076                      }
3077  
3078                      // Where are we adding this char?
3079                      if (!$COMMENT_MATCHED) {
3080                          $stuff_to_parse .= $char;
3081                      } else {
3082                          $result .= $test_str;
3083                          unset($test_str);
3084                          $COMMENT_MATCHED = false;
3085                      }
3086                  }
3087                  // Parse the last bit
3088                  $result .= $this->parse_non_string_part($stuff_to_parse);
3089                  $stuff_to_parse = '';
3090              } else {
3091                  $result .= $this->hsc($part);
3092              }
3093              // Close the <span> that surrounds the block
3094              if ($STRICTATTRS != '') {
3095                  $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3096                  $result .= '</span>';
3097              }
3098  
3099              $endresult .= $result;
3100              unset($part, $parts[$key], $result);
3101          }
3102  
3103          //This fix is related to SF#1923020, but has to be applied regardless of
3104          //actually highlighting symbols.
3105          /** NOTE: memorypeak #3 */
3106          $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3107  
3108  //        // Parse the last stuff (redundant?)
3109  //        $result .= $this->parse_non_string_part($stuff_to_parse);
3110  
3111          // Lop off the very first and last spaces
3112  //        $result = substr($result, 1, -1);
3113  
3114          // We're finished: stop timing
3115          $this->set_time($start_time, microtime());
3116  
3117          $this->finalise($endresult);
3118          return $endresult;
3119      }
3120  
3121      /**
3122       * Swaps out spaces and tabs for HTML indentation. Not needed if
3123       * the code is in a pre block...
3124       *
3125       * @param  string The source to indent (reference!)
3126       * @since  1.0.0
3127       * @access private
3128       */
3129      function indent(&$result) {
3130          /// Replace tabs with the correct number of spaces
3131          if (false !== strpos($result, "\t")) {
3132              $lines = explode("\n", $result);
3133              $result = null;//Save memory while we process the lines individually
3134              $tab_width = $this->get_real_tab_width();
3135              $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3136  
3137              for ($key = 0, $n = count($lines); $key < $n; $key++) {
3138                  $line = $lines[$key];
3139                  if (false === strpos($line, "\t")) {
3140                      continue;
3141                  }
3142  
3143                  $pos = 0;
3144                  $length = strlen($line);
3145                  $lines[$key] = ''; // reduce memory
3146  
3147                  $IN_TAG = false;
3148                  for ($i = 0; $i < $length; ++$i) {
3149                      $char = $line[$i];
3150                      // Simple engine to work out whether we're in a tag.
3151                      // If we are we modify $pos. This is so we ignore HTML
3152                      // in the line and only workout the tab replacement
3153                      // via the actual content of the string
3154                      // This test could be improved to include strings in the
3155                      // html so that < or > would be allowed in user's styles
3156                      // (e.g. quotes: '<' '>'; or similar)
3157                      if ($IN_TAG) {
3158                          if ('>' == $char) {
3159                              $IN_TAG = false;
3160                          }
3161                          $lines[$key] .= $char;
3162                      } elseif ('<' == $char) {
3163                          $IN_TAG = true;
3164                          $lines[$key] .= '<';
3165                      } elseif ('&' == $char) {
3166                          $substr = substr($line, $i + 3, 5);
3167                          $posi = strpos($substr, ';');
3168                          if (false === $posi) {
3169                              ++$pos;
3170                          } else {
3171                              $pos -= $posi+2;
3172                          }
3173                          $lines[$key] .= $char;
3174                      } elseif ("\t" == $char) {
3175                          $str = '';
3176                          // OPTIMISE - move $strs out. Make an array:
3177                          // $tabs = array(
3178                          //  1 => '&nbsp;',
3179                          //  2 => '&nbsp; ',
3180                          //  3 => '&nbsp; &nbsp;' etc etc
3181                          // to use instead of building a string every time
3182                          $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3183                          if (($pos & 1) || 1 == $tab_end_width) {
3184                              $str .= substr($tab_string, 6, $tab_end_width);
3185                          } else {
3186                              $str .= substr($tab_string, 0, $tab_end_width+5);
3187                          }
3188                          $lines[$key] .= $str;
3189                          $pos += $tab_end_width;
3190  
3191                          if (false === strpos($line, "\t", $i + 1)) {
3192                              $lines[$key] .= substr($line, $i + 1);
3193                              break;
3194                          }
3195                      } elseif (0 == $pos && ' ' == $char) {
3196                          $lines[$key] .= '&nbsp;';
3197                          ++$pos;
3198                      } else {
3199                          $lines[$key] .= $char;
3200                          ++$pos;
3201                      }
3202                  }
3203              }
3204              $result = implode("\n", $lines);
3205              unset($lines);//We don't need the lines separated beyond this --- free them!
3206          }
3207          // Other whitespace
3208          // BenBE: Fix to reduce the number of replacements to be done
3209          $result = preg_replace('/^ /m', '&nbsp;', $result);
3210          $result = str_replace('  ', ' &nbsp;', $result);
3211  
3212          if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3213              if ($this->line_ending === null) {
3214                  $result = nl2br($result);
3215              } else {
3216                  $result = str_replace("\n", $this->line_ending, $result);
3217              }
3218          }
3219      }
3220  
3221      /**
3222       * Changes the case of a keyword for those languages where a change is asked for
3223       *
3224       * @param  string The keyword to change the case of
3225       * @return string The keyword with its case changed
3226       * @since  1.0.0
3227       * @access private
3228       */
3229      function change_case($instr) {
3230          switch ($this->language_data['CASE_KEYWORDS']) {
3231              case GESHI_CAPS_UPPER:
3232                  return strtoupper($instr);
3233              case GESHI_CAPS_LOWER:
3234                  return strtolower($instr);
3235              default:
3236                  return $instr;
3237          }
3238      }
3239  
3240      /**
3241       * Handles replacements of keywords to include markup and links if requested
3242       *
3243       * @param  string The keyword to add the Markup to
3244       * @return The HTML for the match found
3245       * @since  1.0.8
3246       * @access private
3247       *
3248       * @todo   Get rid of ender in keyword links
3249       */
3250      function handle_keyword_replace($match) {
3251          $k = $this->_kw_replace_group;
3252          $keyword = $match[0];
3253          $keyword_match = $match[1];
3254  
3255          $before = '';
3256          $after = '';
3257  
3258          if ($this->keyword_links) {
3259              // Keyword links have been ebabled
3260  
3261              if (isset($this->language_data['URLS'][$k]) &&
3262                  $this->language_data['URLS'][$k] != '') {
3263                  // There is a base group for this keyword
3264  
3265                  // Old system: strtolower
3266                  //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3267                  // New system: get keyword from language file to get correct case
3268                  if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3269                      strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3270                      foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3271                          if (strcasecmp($word, $keyword_match) == 0) {
3272                              break;
3273                          }
3274                      }
3275                  } else {
3276                      $word = $keyword_match;
3277                  }
3278  
3279                  $before = '<|UR1|"' .
3280                      str_replace(
3281                          array(
3282                              '{FNAME}',
3283                              '{FNAMEL}',
3284                              '{FNAMEU}',
3285                              '.'),
3286                          array(
3287                              str_replace('+', '%20', urlencode($this->hsc($word))),
3288                              str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3289                              str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3290                              '<DOT>'),
3291                          $this->language_data['URLS'][$k]
3292                      ) . '">';
3293                  $after = '</a>';
3294              }
3295          }
3296  
3297          return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3298      }
3299  
3300      /**
3301       * handles regular expressions highlighting-definitions with callback functions
3302       *
3303       * @note this is a callback, don't use it directly
3304       *
3305       * @param array the matches array
3306       * @return The highlighted string
3307       * @since 1.0.8
3308       * @access private
3309       */
3310      function handle_regexps_callback($matches) {
3311          // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3312          return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3313      }
3314  
3315      /**
3316       * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3317       *
3318       * @note this is a callback, don't use it directly
3319       *
3320       * @param array the matches array
3321       * @return string
3322       * @since 1.0.8
3323       * @access private
3324       */
3325      function handle_multiline_regexps($matches) {
3326          $before = $this->_hmr_before;
3327          $after = $this->_hmr_after;
3328          if ($this->_hmr_replace) {
3329              $replace = $this->_hmr_replace;
3330              $search = array();
3331  
3332              foreach (array_keys($matches) as $k) {
3333                  $search[] = '\\' . $k;
3334              }
3335  
3336              $before = str_replace($search, $matches, $before);
3337              $after = str_replace($search, $matches, $after);
3338              $replace = str_replace($search, $matches, $replace);
3339          } else {
3340              $replace = $matches[0];
3341          }
3342          return $before
3343                      . '<|!REG3XP' . $this->_hmr_key .'!>'
3344                          . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3345                      . '|>'
3346                . $after;
3347      }
3348  
3349      /**
3350       * Takes a string that has no strings or comments in it, and highlights
3351       * stuff like keywords, numbers and methods.
3352       *
3353       * @param string The string to parse for keyword, numbers etc.
3354       * @since 1.0.0
3355       * @access private
3356       * @todo BUGGY! Why? Why not build string and return?
3357       */
3358      function parse_non_string_part($stuff_to_parse) {
3359          $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3360  
3361          // Highlight keywords
3362          $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3363          $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3364          if ($this->lexic_permissions['STRINGS']) {
3365              $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3366              $disallowed_before .= $quotemarks;
3367              $disallowed_after .= $quotemarks;
3368          }
3369          $disallowed_before .= "])";
3370          $disallowed_after .= "])";
3371  
3372          $parser_control_pergroup = false;
3373          if (isset($this->language_data['PARSER_CONTROL'])) {
3374              if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3375                  $x = 0; // check wether per-keyword-group parser_control is enabled
3376                  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3377                      $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3378                      ++$x;
3379                  }
3380                  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3381                      $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3382                      ++$x;
3383                  }
3384                  $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3385              }
3386          }
3387  
3388          foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3389              if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3390                  $this->lexic_permissions['KEYWORDS'][$k]) {
3391  
3392                  $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3393                  $modifiers = $case_sensitive ? '' : 'i';
3394  
3395                  // NEW in 1.0.8 - per-keyword-group parser control
3396                  $disallowed_before_local = $disallowed_before;
3397                  $disallowed_after_local = $disallowed_after;
3398                  if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3399                      if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3400                          $disallowed_before_local =
3401                              $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3402                      }
3403  
3404                      if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3405                          $disallowed_after_local =
3406                              $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3407                      }
3408                  }
3409  
3410                  $this->_kw_replace_group = $k;
3411  
3412                  //NEW in 1.0.8, the cached regexp list
3413                  // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3414                  for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3415                      $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3416                      // Might make a more unique string for putting the number in soon
3417                      // Basically, we don't put the styles in yet because then the styles themselves will
3418                      // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3419                      $stuff_to_parse = preg_replace_callback(
3420                          "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3421                          array($this, 'handle_keyword_replace'),
3422                          $stuff_to_parse
3423                          );
3424                  }
3425              }
3426          }
3427  
3428          // Regular expressions
3429          foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3430              if ($this->lexic_permissions['REGEXPS'][$key]) {
3431                  if (is_array($regexp)) {
3432                      if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3433                          // produce valid HTML when we match multiple lines
3434                          $this->_hmr_replace = $regexp[GESHI_REPLACE];
3435                          $this->_hmr_before = $regexp[GESHI_BEFORE];
3436                          $this->_hmr_key = $key;
3437                          $this->_hmr_after = $regexp[GESHI_AFTER];
3438                          $stuff_to_parse = preg_replace_callback(
3439                              "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3440                              array($this, 'handle_multiline_regexps'),
3441                              $stuff_to_parse);
3442                          $this->_hmr_replace = false;
3443                          $this->_hmr_before = '';
3444                          $this->_hmr_after = '';
3445                      } else {
3446                          $stuff_to_parse = preg_replace(
3447                              '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3448                              $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3449                              $stuff_to_parse);
3450                      }
3451                  } else {
3452                      if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3453                          // produce valid HTML when we match multiple lines
3454                          $this->_hmr_key = $key;
3455                          $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3456                                                array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3457                          $this->_hmr_key = '';
3458                      } else {
3459                          $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3460                      }
3461                  }
3462              }
3463          }
3464  
3465          // Highlight numbers. As of 1.0.8 we support different types of numbers
3466          $numbers_found = false;
3467  
3468          if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3469              $numbers_found = true;
3470  
3471              //For each of the formats ...
3472              foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3473                  //Check if it should be highlighted ...
3474                  $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3475              }
3476          }
3477  
3478          //
3479          // Now that's all done, replace /[number]/ with the correct styles
3480          //
3481          foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3482              if (!$this->use_classes) {
3483                  $attributes = ' style="' .
3484                      (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3485                      $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3486              } else {
3487                  $attributes = ' class="kw' . $k . '"';
3488              }
3489              $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3490          }
3491  
3492          if ($numbers_found) {
3493              // Put number styles in
3494              foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3495                  //Commented out for now, as this needs some review ...
3496                  //                if ($numbers_permissions & $id) {
3497                  //Get the appropriate style ...
3498                  //Checking for unset styles is done by the style cache builder ...
3499                  if (!$this->use_classes) {
3500                      $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3501                  } else {
3502                      $attributes = ' class="nu'.$id.'"';
3503                  }
3504  
3505                  //Set in the correct styles ...
3506                  $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3507                  //                }
3508              }
3509          }
3510  
3511          // Highlight methods and fields in objects
3512          if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3513              $oolang_spaces = "[\s]*";
3514              $oolang_before = "";
3515              $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3516              if (isset($this->language_data['PARSER_CONTROL'])) {
3517                  if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3518                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3519                          $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3520                      }
3521                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3522                          $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3523                      }
3524                      if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3525                          $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3526                      }
3527                  }
3528              }
3529  
3530              foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3531                  if (false !== strpos($stuff_to_parse, $splitter)) {
3532                      if (!$this->use_classes) {
3533                          $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3534                      } else {
3535                          $attributes = ' class="me' . $key . '"';
3536                      }
3537                      $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3538                  }
3539              }
3540          }
3541  
3542          //
3543          // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3544          // You try it, and see what happens ;)
3545          // TODO: Fix lexic permissions not converting entities if shouldn't
3546          // be highlighting regardless
3547          //
3548          if ($this->lexic_permissions['BRACKETS']) {
3549              $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3550                                $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3551          }
3552  
3553  
3554          //FIX for symbol highlighting ...
3555          if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3556              //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3557              $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3558              $global_offset = 0;
3559              for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3560                  $symbol_match = $pot_symbols[$s_id][0][0];
3561                  if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3562                      // already highlighted blocks _must_ include either < or >
3563                      // so if this conditional applies, we have to skip this match
3564                      // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3565                      if(strpos($symbol_match, '<SEMI>') === false &&
3566                          strpos($symbol_match, '<PIPE>') === false) {
3567                          continue;
3568                      }
3569                  }
3570  
3571                  // if we reach this point, we have a valid match which needs to be highlighted
3572  
3573                  $symbol_length = strlen($symbol_match);
3574                  $symbol_offset = $pot_symbols[$s_id][0][1];
3575                  unset($pot_symbols[$s_id]);
3576                  $symbol_end = $symbol_length + $symbol_offset;
3577                  $symbol_hl = "";
3578  
3579                  // if we have multiple styles, we have to handle them properly
3580                  if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3581                      $old_sym = -1;
3582                      // Split the current stuff to replace into its atomic symbols ...
3583                      preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3584                      foreach ($sym_match_syms[0] as $sym_ms) {
3585                          //Check if consequtive symbols belong to the same group to save output ...
3586                          if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3587                              && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3588                              if (-1 != $old_sym) {
3589                                  $symbol_hl .= "|>";
3590                              }
3591                              $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3592                              if (!$this->use_classes) {
3593                                  $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3594                              } else {
3595                                  $symbol_hl .= '<| class="sy' . $old_sym . '">';
3596                              }
3597                          }
3598                          $symbol_hl .= $sym_ms;
3599                      }
3600                      unset($sym_match_syms);
3601  
3602                      //Close remaining tags and insert the replacement at the right position ...
3603                      //Take caution if symbol_hl is empty to avoid doubled closing spans.
3604                      if (-1 != $old_sym) {
3605                          $symbol_hl .= "|>";
3606                      }
3607                  } else {
3608                      if (!$this->use_classes) {
3609                          $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3610                      } else {
3611                          $symbol_hl = '<| class="sy0">';
3612                      }
3613                      $symbol_hl .= $symbol_match . '|>';
3614                  }
3615  
3616                  $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3617  
3618                  // since we replace old text with something of different size,
3619                  // we'll have to keep track of the differences
3620                  $global_offset += strlen($symbol_hl) - $symbol_length;
3621              }
3622          }
3623          //FIX for symbol highlighting ...
3624  
3625          // Add class/style for regexps
3626          foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3627              if ($this->lexic_permissions['REGEXPS'][$key]) {
3628                  if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3629                      $this->_rx_key = $key;
3630                      $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3631                          array($this, 'handle_regexps_callback'),
3632                          $stuff_to_parse);
3633                  } else {
3634                      if (!$this->use_classes) {
3635                          $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3636                      } else {
3637                          if (is_array($this->language_data['REGEXPS'][$key]) &&
3638                              array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3639                              $attributes = ' class="' .
3640                                  $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3641                          } else {
3642                             $attributes = ' class="re' . $key . '"';
3643                          }
3644                      }
3645                      $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3646                  }
3647              }
3648          }
3649  
3650          // Replace <DOT> with . for urls
3651          $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3652          // Replace <|UR1| with <a href= for urls also
3653          if (isset($this->link_styles[GESHI_LINK])) {
3654              if ($this->use_classes) {
3655                  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3656              } else {
3657                  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3658              }
3659          } else {
3660              $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3661          }
3662  
3663          //
3664          // NOW we add the span thingy ;)
3665          //
3666  
3667          $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3668          $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3669          return substr($stuff_to_parse, 1);
3670      }
3671  
3672      /**
3673       * Sets the time taken to parse the code
3674       *
3675       * @param microtime The time when parsing started
3676       * @param microtime The time when parsing ended
3677       * @since 1.0.2
3678       * @access private
3679       */
3680      function set_time($start_time, $end_time) {
3681          $start = explode(' ', $start_time);
3682          $end = explode(' ', $end_time);
3683          $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3684      }
3685  
3686      /**
3687       * Gets the time taken to parse the code
3688       *
3689       * @return double The time taken to parse the code
3690       * @since  1.0.2
3691       */
3692      function get_time() {
3693          return $this->time;
3694      }
3695  
3696      /**
3697       * Merges arrays recursively, overwriting values of the first array with values of later arrays
3698       *
3699       * @since 1.0.8
3700       * @access private
3701       */
3702      function merge_arrays() {
3703          $arrays = func_get_args();
3704          $narrays = count($arrays);
3705  
3706          // check arguments
3707          // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3708          for ($i = 0; $i < $narrays; $i ++) {
3709              if (!is_array($arrays[$i])) {
3710                  // also array_merge_recursive returns nothing in this case
3711                  trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3712                  return false;
3713              }
3714          }
3715  
3716          // the first array is in the output set in every case
3717          $ret = $arrays[0];
3718  
3719          // merege $ret with the remaining arrays
3720          for ($i = 1; $i < $narrays; $i ++) {
3721              foreach ($arrays[$i] as $key => $value) {
3722                  if (is_array($value) && isset($ret[$key])) {
3723                      // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3724                      // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3725                      $ret[$key] = $this->merge_arrays($ret[$key], $value);
3726                  } else {
3727                      $ret[$key] = $value;
3728                  }
3729              }
3730          }
3731  
3732          return $ret;
3733      }
3734  
3735      /**
3736       * Gets language information and stores it for later use
3737       *
3738       * @param string The filename of the language file you want to load
3739       * @since 1.0.0
3740       * @access private
3741       * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3742       */
3743      function load_language($file_name) {
3744          if ($file_name == $this->loaded_language) {
3745              // this file is already loaded!
3746              return;
3747          }
3748  
3749          //Prepare some stuff before actually loading the language file
3750          $this->loaded_language = $file_name;
3751          $this->parse_cache_built = false;
3752          $this->enable_highlighting();
3753          $language_data = array();
3754  
3755          //Load the language file
3756          require $file_name;
3757  
3758          // Perhaps some checking might be added here later to check that
3759          // $language data is a valid thing but maybe not
3760          $this->language_data = $language_data;
3761  
3762          // Set strict mode if should be set
3763          $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3764  
3765          // Set permissions for all lexics to true
3766          // so they'll be highlighted by default
3767          foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3768              if (!empty($this->language_data['KEYWORDS'][$key])) {
3769                  $this->lexic_permissions['KEYWORDS'][$key] = true;
3770              } else {
3771                  $this->lexic_permissions['KEYWORDS'][$key] = false;
3772              }
3773          }
3774  
3775          foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3776              $this->lexic_permissions['COMMENTS'][$key] = true;
3777          }
3778          foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3779              $this->lexic_permissions['REGEXPS'][$key] = true;
3780          }
3781  
3782          // for BenBE and future code reviews:
3783          // we can use empty here since we only check for existance and emptiness of an array
3784          // if it is not an array at all but rather false or null this will work as intended as well
3785          // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3786          if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3787              foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3788                  // it's either true or false and maybe is true as well
3789                  $perm = $value !== GESHI_NEVER;
3790                  if ($flag == 'ALL') {
3791                      $this->enable_highlighting($perm);
3792                      continue;
3793                  }
3794                  if (!isset($this->lexic_permissions[$flag])) {
3795                      // unknown lexic permission
3796                      continue;
3797                  }
3798                  if (is_array($this->lexic_permissions[$flag])) {
3799                      foreach ($this->lexic_permissions[$flag] as $key => $val) {
3800                          $this->lexic_permissions[$flag][$key] = $perm;
3801                      }
3802                  } else {
3803                      $this->lexic_permissions[$flag] = $perm;
3804                  }
3805              }
3806              unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3807          }
3808  
3809          //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3810          //You need to set one for HARDESCAPES only in this case.
3811          if(!isset($this->language_data['HARDCHAR'])) {
3812              $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3813          }
3814  
3815          //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3816          $style_filename = substr($file_name, 0, -4) . '.style.php';
3817          if (is_readable($style_filename)) {
3818              //Clear any style_data that could have been set before ...
3819              if (isset($style_data)) {
3820                  unset($style_data);
3821              }
3822  
3823              //Read the Style Information from the style file
3824              include $style_filename;
3825  
3826              //Apply the new styles to our current language styles
3827              if (isset($style_data) && is_array($style_data)) {
3828                  $this->language_data['STYLES'] =
3829                      $this->merge_arrays($this->language_data['STYLES'], $style_data);
3830              }
3831          }
3832      }
3833  
3834      /**
3835       * Takes the parsed code and various options, and creates the HTML
3836       * surrounding it to make it look nice.
3837       *
3838       * @param  string The code already parsed (reference!)
3839       * @since  1.0.0
3840       * @access private
3841       */
3842      function finalise(&$parsed_code) {
3843          // Remove end parts of important declarations
3844          // This is BUGGY!! My fault for bad code: fix coming in 1.2
3845          // @todo Remove this crap
3846          if ($this->enable_important_blocks &&
3847              (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3848              $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3849          }
3850  
3851          // Add HTML whitespace stuff if we're using the <div> header
3852          if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3853              $this->indent($parsed_code);
3854          }
3855  
3856          // purge some unnecessary stuff
3857          /** NOTE: memorypeak #1 */
3858          $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3859  
3860          // If we are using IDs for line numbers, there needs to be an overall
3861          // ID set to prevent collisions.
3862          if ($this->add_ids && !$this->overall_id) {
3863              $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3864          }
3865  
3866          // Get code into lines
3867          /** NOTE: memorypeak #2 */
3868          $code = explode("\n", $parsed_code);
3869          $parsed_code = $this->header();
3870  
3871          // If we're using line numbers, we insert <li>s and appropriate
3872          // markup to style them (otherwise we don't need to do anything)
3873          if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3874              // If we're using the <pre> header, we shouldn't add newlines because
3875              // the <pre> will line-break them (and the <li>s already do this for us)
3876              $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3877  
3878              // Set vars to defaults for following loop
3879              $i = 0;
3880  
3881              // Foreach line...
3882              for ($i = 0, $n = count($code); $i < $n;) {
3883                  //Reset the attributes for a new line ...
3884                  $attrs = array();
3885  
3886                  // Make lines have at least one space in them if they're empty
3887                  // BenBE: Checking emptiness using trim instead of relying on blanks
3888                  if ('' == trim($code[$i])) {
3889                      $code[$i] = '&nbsp;';
3890                  }
3891  
3892                  // If this is a "special line"...
3893                  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3894                      $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3895                      // Set the attributes to style the line
3896                      if ($this->use_classes) {
3897                          //$attr = ' class="li2"';
3898                          $attrs['class'][] = 'li2';
3899                          $def_attr = ' class="de2"';
3900                      } else {
3901                          //$attr = ' style="' . $this->line_style2 . '"';
3902                          $attrs['style'][] = $this->line_style2;
3903                          // This style "covers up" the special styles set for special lines
3904                          // so that styles applied to special lines don't apply to the actual
3905                          // code on that line
3906                          $def_attr = ' style="' . $this->code_style . '"';
3907                      }
3908                  } else {
3909                      if ($this->use_classes) {
3910                          //$attr = ' class="li1"';
3911                          $attrs['class'][] = 'li1';
3912                          $def_attr = ' class="de1"';
3913                      } else {
3914                          //$attr = ' style="' . $this->line_style1 . '"';
3915                          $attrs['style'][] = $this->line_style1;
3916                          $def_attr = ' style="' . $this->code_style . '"';
3917                      }
3918                  }
3919  
3920                  //Check which type of tag to insert for this line
3921                  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3922                      $start = "<pre$def_attr>";
3923                      $end = '</pre>';
3924                  } else {
3925                      // Span or div?
3926                      $start = "<div$def_attr>";
3927                      $end = '</div>';
3928                  }
3929  
3930                  ++$i;
3931  
3932                  // Are we supposed to use ids? If so, add them
3933                  if ($this->add_ids) {
3934                      $attrs['id'][] = "$this->overall_id-$i";
3935                  }
3936  
3937                  //Is this some line with extra styles???
3938                  if (in_array($i, $this->highlight_extra_lines)) {
3939                      if ($this->use_classes) {
3940                          if (isset($this->highlight_extra_lines_styles[$i])) {
3941                              $attrs['class'][] = "lx$i";
3942                          } else {
3943                              $attrs['class'][] = "ln-xtra";
3944                          }
3945                      } else {
3946                          array_push($attrs['style'], $this->get_line_style($i));
3947                      }
3948                  }
3949  
3950                  // Add in the line surrounded by appropriate list HTML
3951                  $attr_string = '';
3952                  foreach ($attrs as $key => $attr) {
3953                      $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3954                  }
3955  
3956                  $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3957                  unset($code[$i - 1]);
3958              }
3959          } else {
3960              $n = count($code);
3961              if ($this->use_classes) {
3962                  $attributes = ' class="de1"';
3963              } else {
3964                  $attributes = ' style="'. $this->code_style .'"';
3965              }
3966              if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3967                  $parsed_code .= '<pre'. $attributes .'>';
3968              } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3969                  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3970                      if ($this->use_classes) {
3971                          $attrs = ' class="ln"';
3972                      } else {
3973                          $attrs = ' style="'. $this->table_linenumber_style .'"';
3974                      }
3975                      $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3976                      // get linenumbers
3977                      // we don't merge it with the for below, since it should be better for
3978                      // memory consumption this way
3979                      // @todo: but... actually it would still be somewhat nice to merge the two loops
3980                      //        the mem peaks are at different positions
3981                      for ($i = 0; $i < $n; ++$i) {
3982                          $close = 0;
3983                          // fancy lines
3984                          if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3985                              $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3986                              // Set the attributes to style the line
3987                              if ($this->use_classes) {
3988                                  $parsed_code .= '<span class="xtra li2"><span class="de2">';
3989                              } else {
3990                                  // This style "covers up" the special styles set for special lines
3991                                  // so that styles applied to special lines don't apply to the actual
3992                                  // code on that line
3993                                  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3994                                                    .'<span style="' . $this->code_style .'">';
3995                              }
3996                              $close += 2;
3997                          }
3998                          //Is this some line with extra styles???
3999                          if (in_array($i + 1, $this->highlight_extra_lines)) {
4000                              if ($this->use_classes) {
4001                                  if (isset($this->highlight_extra_lines_styles[$i])) {
4002                                      $parsed_code .= "<span class=\"xtra lx$i\">";
4003                                  } else {
4004                                      $parsed_code .= "<span class=\"xtra ln-xtra\">";
4005                                  }
4006                              } else {
4007                                  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4008                              }
4009                              ++$close;
4010                          }
4011                          $parsed_code .= $this->line_numbers_start + $i;
4012                          if ($close) {
4013                              $parsed_code .= str_repeat('</span>', $close);
4014                          } elseif ($i != $n) {
4015                              $parsed_code .= "\n";
4016                          }
4017                      }
4018                      $parsed_code .= '</pre></td><td'.$attributes.'>';
4019                  }
4020                  $parsed_code .= '<pre'. $attributes .'>';
4021              }
4022              // No line numbers, but still need to handle highlighting lines extra.
4023              // Have to use divs so the full width of the code is highlighted
4024              $close = 0;
4025              for ($i = 0; $i < $n; ++$i) {
4026                  // Make lines have at least one space in them if they're empty
4027                  // BenBE: Checking emptiness using trim instead of relying on blanks
4028                  if ('' == trim($code[$i])) {
4029                      $code[$i] = '&nbsp;';
4030                  }
4031                  // fancy lines
4032                  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4033                      $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4034                      // Set the attributes to style the line
4035                      if ($this->use_classes) {
4036                          $parsed_code .= '<span class="xtra li2"><span class="de2">';
4037                      } else {
4038                          // This style "covers up" the special styles set for special lines
4039                          // so that styles applied to special lines don't apply to the actual
4040                          // code on that line
4041                          $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4042                                            .'<span style="' . $this->code_style .'">';
4043                      }
4044                      $close += 2;
4045                  }
4046                  //Is this some line with extra styles???
4047                  if (in_array($i + 1, $this->highlight_extra_lines)) {
4048                      if ($this->use_classes) {
4049                          if (isset($this->highlight_extra_lines_styles[$i])) {
4050                              $parsed_code .= "<span class=\"xtra lx$i\">";
4051                          } else {
4052                              $parsed_code .= "<span class=\"xtra ln-xtra\">";
4053                          }
4054                      } else {
4055                          $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4056                      }
4057                      ++$close;
4058                  }
4059  
4060                  $parsed_code .= $code[$i];
4061  
4062                  if ($close) {
4063                    $parsed_code .= str_repeat('</span>', $close);
4064                    $close = 0;
4065                  }
4066                  elseif ($i + 1 < $n) {
4067                      $parsed_code .= "\n";
4068                  }
4069                  unset($code[$i]);
4070              }
4071  
4072              if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4073                  $parsed_code .= '</pre>';
4074              }
4075              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4076                  $parsed_code .= '</td>';
4077              }
4078          }
4079  
4080          $parsed_code .= $this->footer();
4081      }
4082  
4083      /**
4084       * Creates the header for the code block (with correct attributes)
4085       *
4086       * @return string The header for the code block
4087       * @since  1.0.0
4088       * @access private
4089       */
4090      function header() {
4091          // Get attributes needed
4092          /**
4093           * @todo   Document behaviour change - class is outputted regardless of whether
4094           *         we're using classes or not. Same with style
4095           */
4096          $attributes = ' class="' . $this->_genCSSName($this->language);
4097          if ($this->overall_class != '') {
4098              $attributes .= " ".$this->_genCSSName($this->overall_class);
4099          }
4100          $attributes .= '"';
4101  
4102          if ($this->overall_id != '') {
4103              $attributes .= " id=\"{$this->overall_id}\"";
4104          }
4105          if ($this->overall_style != '' && !$this->use_classes) {
4106              $attributes .= ' style="' . $this->overall_style . '"';
4107          }
4108  
4109          $ol_attributes = '';
4110  
4111          if ($this->line_numbers_start != 1) {
4112              $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4113          }
4114  
4115          // Get the header HTML
4116          $header = $this->header_content;
4117          if ($header) {
4118              if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4119                  $header = str_replace("\n", '', $header);
4120              }
4121              $header = $this->replace_keywords($header);
4122  
4123              if ($this->use_classes) {
4124                  $attr = ' class="head"';
4125              } else {
4126                  $attr = " style=\"{$this->header_content_style}\"";
4127              }
4128              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4129                  $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4130              } else {
4131                  $header = "<div$attr>$header</div>";
4132              }
4133          }
4134  
4135          if (GESHI_HEADER_NONE == $this->header_type) {
4136              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4137                  return "$header<ol$attributes$ol_attributes>";
4138              }
4139              return $header . ($this->force_code_block ? '<div>' : '');
4140          }
4141  
4142          // Work out what to return and do it
4143          if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4144              if ($this->header_type == GESHI_HEADER_PRE) {
4145                  return "<pre$attributes>$header<ol$ol_attributes>";
4146              } elseif ($this->header_type == GESHI_HEADER_DIV ||
4147                  $this->header_type == GESHI_HEADER_PRE_VALID) {
4148                  return "<div$attributes>$header<ol$ol_attributes>";
4149              } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4150                  return "<table$attributes>$header<tbody><tr class=\"li1\">";
4151              }
4152          } else {
4153              if ($this->header_type == GESHI_HEADER_PRE) {
4154                  return "<pre$attributes>$header"  .
4155                      ($this->force_code_block ? '<div>' : '');
4156              } else {
4157                  return "<div$attributes>$header" .
4158                      ($this->force_code_block ? '<div>' : '');
4159              }
4160          }
4161      }
4162  
4163      /**
4164       * Returns the footer for the code block.
4165       *
4166       * @return string The footer for the code block
4167       * @since  1.0.0
4168       * @access private
4169       */
4170      function footer() {
4171          $footer = $this->footer_content;
4172          if ($footer) {
4173              if ($this->header_type == GESHI_HEADER_PRE) {
4174                  $footer = str_replace("\n", '', $footer);;
4175              }
4176              $footer = $this->replace_keywords($footer);
4177  
4178              if ($this->use_classes) {
4179                  $attr = ' class="foot"';
4180              } else {
4181                  $attr = " style=\"{$this->footer_content_style}\"";
4182              }
4183              if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4184                  $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4185              } else {
4186                  $footer = "<div$attr>$footer</div>";
4187              }
4188          }
4189  
4190          if (GESHI_HEADER_NONE == $this->header_type) {
4191              return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4192          }
4193  
4194          if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4195              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4196                  return "</ol>$footer</div>";
4197              }
4198              return ($this->force_code_block ? '</div>' : '') .
4199                  "$footer</div>";
4200          }
4201          elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4202              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4203                  return "</tr></tbody>$footer</table>";
4204              }
4205              return ($this->force_code_block ? '</div>' : '') .
4206                  "$footer</div>";
4207          }
4208          else {
4209              if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4210                  return "</ol>$footer</pre>";
4211              }
4212              return ($this->force_code_block ? '</div>' : '') .
4213                  "$footer</pre>";
4214          }
4215      }
4216  
4217      /**
4218       * Replaces certain keywords in the header and footer with
4219       * certain configuration values
4220       *
4221       * @param  string The header or footer content to do replacement on
4222       * @return string The header or footer with replaced keywords
4223       * @since  1.0.2
4224       * @access private
4225       */
4226      function replace_keywords($instr) {
4227          $keywords = $replacements = array();
4228  
4229          $keywords[] = '<TIME>';
4230          $keywords[] = '{TIME}';
4231          $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4232  
4233          $keywords[] = '<LANGUAGE>';
4234          $keywords[] = '{LANGUAGE}';
4235          $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4236  
4237          $keywords[] = '<VERSION>';
4238          $keywords[] = '{VERSION}';
4239          $replacements[] = $replacements[] = GESHI_VERSION;
4240  
4241          $keywords[] = '<SPEED>';
4242          $keywords[] = '{SPEED}';
4243          if ($time <= 0) {
4244              $speed = 'N/A';
4245          } else {
4246              $speed = strlen($this->source) / $time;
4247              if ($speed >= 1024) {
4248                  $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4249              } else {
4250                  $speed = sprintf("%.0f B/s", $speed);
4251              }
4252          }
4253          $replacements[] = $replacements[] = $speed;
4254  
4255          return str_replace($keywords, $replacements, $instr);
4256      }
4257  
4258      /**
4259       * Secure replacement for PHP built-in function htmlspecialchars().
4260       *
4261       * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4262       * for this replacement function.
4263       *
4264       * The INTERFACE for this function is almost the same as that for
4265       * htmlspecialchars(), with the same default for quote style; however, there
4266       * is no 'charset' parameter. The reason for this is as follows:
4267       *
4268       * The PHP docs say:
4269       *      "The third argument charset defines character set used in conversion."
4270       *
4271       * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4272       * thus _needs_ to know (or asssume) a character set because the special
4273       * characters to be replaced could exist at different code points in
4274       * different character sets. (If indeed htmlspecialchars() works at
4275       * byte-value level that goes some  way towards explaining why the
4276       * vulnerability would exist in this function, too, and not only in
4277       * htmlentities() which certainly is working at byte-value level.)
4278       *
4279       * This replacement function however works at character level and should
4280       * therefore be "immune" to character set differences - so no charset
4281       * parameter is needed or provided. If a third parameter is passed, it will
4282       * be silently ignored.
4283       *
4284       * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4285       * of PHP's '&#039;' for a single quote: this provides compatibility with
4286       *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4287       * (see comment by mikiwoz at yahoo dot co dot uk on
4288       * http://php.net/htmlspecialchars); it also matches the entity definition
4289       * for XML 1.0
4290       * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4291       * Like PHP we use a numeric character reference instead of '&apos;' for the
4292       * single quote. For the other special characters we use the named entity
4293       * references, as PHP is doing.
4294       *
4295       * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4296       *
4297       * @license     http://www.gnu.org/copyleft/lgpl.html
4298       *              GNU Lesser General Public License
4299       * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4300       *              Wikka Development Team}
4301       *
4302       * @access      private
4303       * @param       string  $string string to be converted
4304       * @param       integer $quote_style
4305       *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4306       *                      - ENT_NOQUOTES: escapes only &, < and >
4307       *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4308       * @return      string  converted string
4309       * @since       1.0.7.18
4310       */
4311      function hsc($string, $quote_style = ENT_COMPAT) {
4312          // init
4313          static $aTransSpecchar = array(
4314              '&' => '&amp;',
4315              '"' => '&quot;',
4316              '<' => '&lt;',
4317              '>' => '&gt;',
4318  
4319              //This fix is related to SF#1923020, but has to be applied
4320              //regardless of actually highlighting symbols.
4321  
4322              //Circumvent a bug with symbol highlighting
4323              //This is required as ; would produce undesirable side-effects if it
4324              //was not to be processed as an entity.
4325              ';' => '<SEMI>', // Force ; to be processed as entity
4326              '|' => '<PIPE>' // Force | to be processed as entity
4327              );                      // ENT_COMPAT set
4328  
4329          switch ($quote_style) {
4330              case ENT_NOQUOTES: // don't convert double quotes
4331                  unset($aTransSpecchar['"']);
4332                  break;
4333              case ENT_QUOTES: // convert single quotes as well
4334                  $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4335                  break;
4336          }
4337  
4338          // return translated string
4339          return strtr($string, $aTransSpecchar);
4340      }
4341  
4342      function _genCSSName($name){
4343          return (is_numeric($name[0]) ? '_' : '') . $name;
4344      }
4345  
4346      /**
4347       * Returns a stylesheet for the highlighted code. If $economy mode
4348       * is true, we only return the stylesheet declarations that matter for
4349       * this code block instead of the whole thing
4350       *
4351       * @param  boolean Whether to use economy mode or not
4352       * @return string A stylesheet built on the data for the current language
4353       * @since  1.0.0
4354       */
4355      function get_stylesheet($economy_mode = true) {
4356          // If there's an error, chances are that the language file
4357          // won't have populated the language data file, so we can't
4358          // risk getting a stylesheet...
4359          if ($this->error) {
4360              return '';
4361          }
4362  
4363          //Check if the style rearrangements have been processed ...
4364          //This also does some preprocessing to check which style groups are useable ...
4365          if(!isset($this->language_data['NUMBERS_CACHE'])) {
4366              $this->build_style_cache();
4367          }
4368  
4369          // First, work out what the selector should be. If there's an ID,
4370          // that should be used, the same for a class. Otherwise, a selector
4371          // of '' means that these styles will be applied anywhere
4372          if ($this->overall_id) {
4373              $selector = '#' . $this->_genCSSName($this->overall_id);
4374          } else {
4375              $selector = '.' . $this->_genCSSName($this->language);
4376              if ($this->overall_class) {
4377                  $selector .= '.' . $this->_genCSSName($this->overall_class);
4378              }
4379          }
4380          $selector .= ' ';
4381  
4382          // Header of the stylesheet
4383          if (!$economy_mode) {
4384              $stylesheet = "/**\n".
4385                  " * GeSHi Dynamically Generated Stylesheet\n".
4386                  " * --------------------------------------\n".
4387                  " * Dynamically generated stylesheet for {$this->language}\n".
4388                  " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4389                  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4390                  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4391                  " * --------------------------------------\n".
4392                  " */\n";
4393          } else {
4394              $stylesheet = "/**\n".
4395                  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4396                  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4397                  " */\n";
4398          }
4399  
4400          // Set the <ol> to have no effect at all if there are line numbers
4401          // (<ol>s have margins that should be destroyed so all layout is
4402          // controlled by the set_overall_style method, which works on the
4403          // <pre> or <div> container). Additionally, set default styles for lines
4404          if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4405              //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4406              $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4407          }
4408  
4409          // Add overall styles
4410          // note: neglect economy_mode, empty styles are meaningless
4411          if ($this->overall_style != '') {
4412              $stylesheet .= "$selector {{$this->overall_style}}\n";
4413          }
4414  
4415          // Add styles for links
4416          // note: economy mode does not make _any_ sense here
4417          //       either the style is empty and thus no selector is needed
4418          //       or the appropriate key is given.
4419          foreach ($this->link_styles as $key => $style) {
4420              if ($style != '') {
4421                  switch ($key) {
4422                      case GESHI_LINK:
4423                          $stylesheet .= "{$selector}a:link {{$style}}\n";
4424                          break;
4425                      case GESHI_HOVER:
4426                          $stylesheet .= "{$selector}a:hover {{$style}}\n";
4427                          break;
4428                      case GESHI_ACTIVE:
4429                          $stylesheet .= "{$selector}a:active {{$style}}\n";
4430                          break;
4431                      case GESHI_VISITED:
4432                          $stylesheet .= "{$selector}a:visited {{$style}}\n";
4433                          break;
4434                  }
4435              }
4436          }
4437  
4438          // Header and footer
4439          // note: neglect economy_mode, empty styles are meaningless
4440          if ($this->header_content_style != '') {
4441              $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4442          }
4443          if ($this->footer_content_style != '') {
4444              $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4445          }
4446  
4447          // Styles for important stuff
4448          // note: neglect economy_mode, empty styles are meaningless
4449          if ($this->important_styles != '') {
4450              $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4451          }
4452  
4453          // Simple line number styles
4454          if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4455              $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4456          }
4457          if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4458              $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4459          }
4460          // If there is a style set for fancy line numbers, echo it out
4461          if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4462              $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4463          }
4464  
4465          // note: empty styles are meaningless
4466          foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4467              if ($styles != '' && (!$economy_mode ||
4468                  (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4469                  $this->lexic_permissions['KEYWORDS'][$group]))) {
4470                  $stylesheet .= "$selector.kw$group {{$styles}}\n";
4471              }
4472          }
4473          foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4474              if ($styles != '' && (!$economy_mode ||
4475                  (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4476                  $this->lexic_permissions['COMMENTS'][$group]) ||
4477                  (!empty($this->language_data['COMMENT_REGEXP']) &&
4478                  !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4479                  $stylesheet .= "$selector.co$group {{$styles}}\n";
4480              }
4481          }
4482          foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4483              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4484                  // NEW: since 1.0.8 we have to handle hardescapes
4485                  if ($group === 'HARD') {
4486                      $group = '_h';
4487                  }
4488                  $stylesheet .= "$selector.es$group {{$styles}}\n";
4489              }
4490          }
4491          foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4492              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4493                  $stylesheet .= "$selector.br$group {{$styles}}\n";
4494              }
4495          }
4496          foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4497              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4498                  $stylesheet .= "$selector.sy$group {{$styles}}\n";
4499              }
4500          }
4501          foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4502              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4503                  // NEW: since 1.0.8 we have to handle hardquotes
4504                  if ($group === 'HARD') {
4505                      $group = '_h';
4506                  }
4507                  $stylesheet .= "$selector.st$group {{$styles}}\n";
4508              }
4509          }
4510          foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4511              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4512                  $stylesheet .= "$selector.nu$group {{$styles}}\n";
4513              }
4514          }
4515          foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4516              if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4517                  $stylesheet .= "$selector.me$group {{$styles}}\n";
4518              }
4519          }
4520          // note: neglect economy_mode, empty styles are meaningless
4521          foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4522              if ($styles != '') {
4523                  $stylesheet .= "$selector.sc$group {{$styles}}\n";
4524              }
4525          }
4526          foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4527              if ($styles != '' && (!$economy_mode ||
4528                  (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4529                  $this->lexic_permissions['REGEXPS'][$group]))) {
4530                  if (is_array($this->language_data['REGEXPS'][$group]) &&
4531                      array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4532                      $stylesheet .= "$selector.";
4533                      $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4534                      $stylesheet .= " {{$styles}}\n";
4535                  } else {
4536                      $stylesheet .= "$selector.re$group {{$styles}}\n";
4537                  }
4538              }
4539          }
4540          // Styles for lines being highlighted extra
4541          if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4542              $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4543          }
4544          $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4545          foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4546              $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4547          }
4548  
4549          return $stylesheet;
4550      }
4551  
4552      /**
4553       * Get's the style that is used for the specified line
4554       *
4555       * @param int The line number information is requested for
4556       * @access private
4557       * @since 1.0.7.21
4558       */
4559      function get_line_style($line) {
4560          //$style = null;
4561          $style = null;
4562          if (isset($this->highlight_extra_lines_styles[$line])) {
4563              $style = $this->highlight_extra_lines_styles[$line];
4564          } else { // if no "extra" style assigned
4565              $style = $this->highlight_extra_lines_style;
4566          }
4567  
4568          return $style;
4569      }
4570  
4571      /**
4572      * this functions creates an optimized regular expression list
4573      * of an array of strings.
4574      *
4575      * Example:
4576      * <code>$list = array('faa', 'foo', 'foobar');
4577      *          => string 'f(aa|oo(bar)?)'</code>
4578      *
4579      * @param $list array of (unquoted) strings
4580      * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4581      * @return string for regular expression
4582      * @author Milian Wolff <mail@milianw.de>
4583      * @since 1.0.8
4584      * @access private
4585      */
4586      function optimize_regexp_list($list, $regexp_delimiter = '/') {
4587          $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4588              '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4589          sort($list);
4590          $regexp_list = array('');
4591          $num_subpatterns = 0;
4592          $list_key = 0;
4593  
4594          // the tokens which we will use to generate the regexp list
4595          $tokens = array();
4596          $prev_keys = array();
4597          // go through all entries of the list and generate the token list
4598          $cur_len = 0;
4599          for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4600              if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4601                  // seems like the length of this pcre is growing exorbitantly
4602                  $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4603                  $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4604                  $tokens = array();
4605                  $cur_len = 0;
4606              }
4607              $level = 0;
4608              $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4609              $pointer = &$tokens;
4610              // properly assign the new entry to the correct position in the token array
4611              // possibly generate smaller common denominator keys
4612              while (true) {
4613                  // get the common denominator
4614                  if (isset($prev_keys[$level])) {
4615                      if ($prev_keys[$level] == $entry) {
4616                          // this is a duplicate entry, skip it
4617                          continue 2;
4618                      }
4619                      $char = 0;
4620                      while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4621                              && $entry[$char] == $prev_keys[$level][$char]) {
4622                          ++$char;
4623                      }
4624                      if ($char > 0) {
4625                          // this entry has at least some chars in common with the current key
4626                          if ($char == strlen($prev_keys[$level])) {
4627                              // current key is totally matched, i.e. this entry has just some bits appended
4628                              $pointer = &$pointer[$prev_keys[$level]];
4629                          } else {
4630                              // only part of the keys match
4631                              $new_key_part1 = substr($prev_keys[$level], 0, $char);
4632                              $new_key_part2 = substr($prev_keys[$level], $char);
4633  
4634                              if (in_array($new_key_part1[0], $regex_chars)
4635                                  || in_array($new_key_part2[0], $regex_chars)) {
4636                                  // this is bad, a regex char as first character
4637                                  $pointer[$entry] = array('' => true);
4638                                  array_splice($prev_keys, $level, count($prev_keys), $entry);
4639                                  $cur_len += strlen($entry);
4640                                  continue;
4641                              } else {
4642                                  // relocate previous tokens
4643                                  $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4644                                  unset($pointer[$prev_keys[$level]]);
4645                                  $pointer = &$pointer[$new_key_part1];
4646                                  // recreate key index
4647                                  array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4648                                  $cur_len += strlen($new_key_part2);
4649                              }
4650                          }
4651                          ++$level;
4652                          $entry = substr($entry, $char);
4653                          continue;
4654                      }
4655                      // else: fall trough, i.e. no common denominator was found
4656                  }
4657                  if ($level == 0 && !empty($tokens)) {
4658                      // we can dump current tokens into the string and throw them away afterwards
4659                      $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4660                      $new_subpatterns = substr_count($new_entry, '(?:');
4661                      if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4662                          $regexp_list[++$list_key] = $new_entry;
4663                          $num_subpatterns = $new_subpatterns;
4664                      } else {
4665                          if (!empty($regexp_list[$list_key])) {
4666                              $new_entry = '|' . $new_entry;
4667                          }
4668                          $regexp_list[$list_key] .= $new_entry;
4669                          $num_subpatterns += $new_subpatterns;
4670                      }
4671                      $tokens = array();
4672                      $cur_len = 0;
4673                  }
4674                  // no further common denominator found
4675                  $pointer[$entry] = array('' => true);
4676                  array_splice($prev_keys, $level, count($prev_keys), $entry);
4677  
4678                  $cur_len += strlen($entry);
4679                  break;
4680              }
4681              unset($list[$i]);
4682          }
4683          // make sure the last tokens get converted as well
4684          $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4685          if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4686              if ( !empty($regexp_list[$list_key]) ) {
4687                ++$list_key;
4688              }
4689              $regexp_list[$list_key] = $new_entry;
4690          } else {
4691              if (!empty($regexp_list[$list_key])) {
4692                  $new_entry = '|' . $new_entry;
4693              }
4694              $regexp_list[$list_key] .= $new_entry;
4695          }
4696          return $regexp_list;
4697      }
4698      /**
4699      * this function creates the appropriate regexp string of an token array
4700      * you should not call this function directly, @see $this->optimize_regexp_list().
4701      *
4702      * @param &$tokens array of tokens
4703      * @param $recursed bool to know wether we recursed or not
4704      * @return string
4705      * @author Milian Wolff <mail@milianw.de>
4706      * @since 1.0.8
4707      * @access private
4708      */
4709      function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4710          $list = '';
4711          foreach ($tokens as $token => $sub_tokens) {
4712              $list .= $token;
4713              $close_entry = isset($sub_tokens['']);
4714              unset($sub_tokens['']);
4715              if (!empty($sub_tokens)) {
4716                  $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4717                  if ($close_entry) {
4718                      // make sub_tokens optional
4719                      $list .= '?';
4720                  }
4721              }
4722              $list .= '|';
4723          }
4724          if (!$recursed) {
4725              // do some optimizations
4726              // common trailing strings
4727              // BUGGY!
4728              //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4729              //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4730              // (?:p)? => p?
4731              $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4732              // (?:a|b|c|d|...)? => [abcd...]?
4733              // TODO: a|bb|c => [ac]|bb
4734              static $callback_2;
4735              if (!isset($callback_2)) {
4736                  $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4737              }
4738              $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4739          }
4740          // return $list without trailing pipe
4741          return substr($list, 0, -1);
4742      }
4743  } // End Class GeSHi
4744  
4745  
4746  if (!function_exists('geshi_highlight')) {
4747      /**
4748       * Easy way to highlight stuff. Behaves just like highlight_string
4749       *
4750       * @param string The code to highlight
4751       * @param string The language to highlight the code in
4752       * @param string The path to the language files. You can leave this blank if you need
4753       *               as from version 1.0.7 the path should be automatically detected
4754       * @param boolean Whether to return the result or to echo
4755       * @return string The code highlighted (if $return is true)
4756       * @since 1.0.2
4757       */
4758      function geshi_highlight($string, $language, $path = null, $return = false) {
4759          $geshi = new GeSHi($string, $language, $path);
4760          $geshi->set_header_type(GESHI_HEADER_NONE);
4761  
4762          if ($return) {
4763              return '<code>' . $geshi->parse_code() . '</code>';
4764          }
4765  
4766          echo '<code>' . $geshi->parse_code() . '</code>';
4767  
4768          if ($geshi->error()) {
4769              return false;
4770          }
4771          return true;
4772      }
4773  }
4774