[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/ -> Sitemapper.php (source)

   1  <?php
   2  /**
   3   * Sitemap handling functions
   4   *
   5   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6   * @author     Michael Hamann <michael@content-space.de>
   7   */
   8  
   9  if(!defined('DOKU_INC')) die('meh.');
  10  
  11  /**
  12   * A class for building sitemaps and pinging search engines with the sitemap URL.
  13   *
  14   * @author Michael Hamann
  15   */
  16  class Sitemapper {
  17      /**
  18       * Builds a Google Sitemap of all public pages known to the indexer
  19       *
  20       * The map is placed in the cache directory named sitemap.xml.gz - This
  21       * file needs to be writable!
  22       *
  23       * @author Michael Hamann
  24       * @author Andreas Gohr
  25       * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
  26       * @link   http://www.sitemaps.org/
  27       *
  28       * @return bool
  29       */
  30      public static function generate(){
  31          global $conf;
  32          if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
  33  
  34          $sitemap = Sitemapper::getFilePath();
  35  
  36          if(file_exists($sitemap)){
  37              if(!is_writable($sitemap)) return false;
  38          }else{
  39              if(!is_writable(dirname($sitemap))) return false;
  40          }
  41  
  42          if(@filesize($sitemap) &&
  43             @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400
  44              dbglog('Sitemapper::generate(): Sitemap up to date');
  45              return false;
  46          }
  47  
  48          dbglog("Sitemapper::generate(): using $sitemap");
  49  
  50          $pages = idx_get_indexer()->getPages();
  51          dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
  52          $items = array();
  53  
  54          // build the sitemap items
  55          foreach($pages as $id){
  56              //skip hidden, non existing and restricted files
  57              if(isHiddenPage($id)) continue;
  58              if(auth_aclcheck($id,'',array()) < AUTH_READ) continue;
  59              $item = SitemapItem::createFromID($id);
  60              if ($item !== null)
  61                  $items[] = $item;
  62          }
  63  
  64          $eventData = array('items' => &$items, 'sitemap' => &$sitemap);
  65          $event = new Doku_Event('SITEMAP_GENERATE', $eventData);
  66          if ($event->advise_before(true)) {
  67              //save the new sitemap
  68              $event->result = io_saveFile($sitemap, Sitemapper::getXML($items));
  69          }
  70          $event->advise_after();
  71  
  72          return $event->result;
  73      }
  74  
  75      /**
  76       * Builds the sitemap XML string from the given array auf SitemapItems.
  77       *
  78       * @param $items array The SitemapItems that shall be included in the sitemap.
  79       * @return string The sitemap XML.
  80       *
  81       * @author Michael Hamann
  82       */
  83      private static function getXML($items) {
  84          ob_start();
  85          echo '<?xml version="1.0" encoding="UTF-8"?>'.NL;
  86          echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
  87          foreach ($items as $item) {
  88              /** @var SitemapItem $item */
  89              echo $item->toXML();
  90          }
  91          echo '</urlset>'.NL;
  92          $result = ob_get_contents();
  93          ob_end_clean();
  94          return $result;
  95      }
  96  
  97      /**
  98       * Helper function for getting the path to the sitemap file.
  99       *
 100       * @return string The path to the sitemap file.
 101       *
 102       * @author Michael Hamann
 103       */
 104      public static function getFilePath() {
 105          global $conf;
 106  
 107          $sitemap = $conf['cachedir'].'/sitemap.xml';
 108          if (self::sitemapIsCompressed()) {
 109              $sitemap .= '.gz';
 110          }
 111  
 112          return $sitemap;
 113      }
 114  
 115      /**
 116       * Helper function for checking if the sitemap is compressed
 117       *
 118       * @return bool If the sitemap file is compressed
 119       */
 120      public static function sitemapIsCompressed() {
 121          global $conf;
 122          return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
 123      }
 124  
 125      /**
 126       * Pings search engines with the sitemap url. Plugins can add or remove
 127       * urls to ping using the SITEMAP_PING event.
 128       *
 129       * @author Michael Hamann
 130       *
 131       * @return bool
 132       */
 133      public static function pingSearchEngines() {
 134          //ping search engines...
 135          $http = new DokuHTTPClient();
 136          $http->timeout = 8;
 137  
 138          $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&'));
 139          $ping_urls = array(
 140              'google'    => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url,
 141              'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url,
 142              'yandex'    => 'http://blogs.yandex.ru/pings/?status=success&url='.$encoded_sitemap_url
 143          );
 144  
 145          $data = array('ping_urls' => $ping_urls,
 146                              'encoded_sitemap_url' => $encoded_sitemap_url
 147          );
 148          $event = new Doku_Event('SITEMAP_PING', $data);
 149          if ($event->advise_before(true)) {
 150              foreach ($data['ping_urls'] as $name => $url) {
 151                  dbglog("Sitemapper::PingSearchEngines(): pinging $name");
 152                  $resp = $http->get($url);
 153                  if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error");
 154                  dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
 155              }
 156          }
 157          $event->advise_after();
 158  
 159          return true;
 160      }
 161  }
 162  
 163  /**
 164   * An item of a sitemap.
 165   *
 166   * @author Michael Hamann
 167   */
 168  class SitemapItem {
 169      public $url;
 170      public $lastmod;
 171      public $changefreq;
 172      public $priority;
 173  
 174      /**
 175       * Create a new item.
 176       *
 177       * @param string $url        The url of the item
 178       * @param int    $lastmod    Timestamp of the last modification
 179       * @param string $changefreq How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
 180       * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
 181       */
 182      public function __construct($url, $lastmod, $changefreq = null, $priority = null) {
 183          $this->url = $url;
 184          $this->lastmod = $lastmod;
 185          $this->changefreq = $changefreq;
 186          $this->priority = $priority;
 187      }
 188  
 189      /**
 190       * Helper function for creating an item for a wikipage id.
 191       *
 192       * @param string       $id         A wikipage id.
 193       * @param string       $changefreq How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
 194       * @param float|string $priority   The priority of the item relative to other URLs on your site. Valid values     range from 0.0 to 1.0.
 195       * @return SitemapItem The sitemap item.
 196       */
 197      public static function createFromID($id, $changefreq = null, $priority = null) {
 198          $id = trim($id);
 199          $date = @filemtime(wikiFN($id));
 200          if(!$date) return null;
 201          return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority);
 202      }
 203  
 204      /**
 205       * Get the XML representation of the sitemap item.
 206       *
 207       * @return string The XML representation.
 208       */
 209      public function toXML() {
 210          $result = '  <url>'.NL
 211                   .'    <loc>'.hsc($this->url).'</loc>'.NL
 212                   .'    <lastmod>'.date_iso8601($this->lastmod).'</lastmod>'.NL;
 213          if ($this->changefreq !== null)
 214              $result .= '    <changefreq>'.hsc($this->changefreq).'</changefreq>'.NL;
 215          if ($this->priority !== null)
 216              $result .= '    <priority>'.hsc($this->priority).'</priority>'.NL;
 217          $result .= '  </url>'.NL;
 218          return $result;
 219      }
 220  }