Subversion Repositories web.active

Rev

Rev 22 | Blame | Compare with Previous | Last modification | View Log | Download

<?php namespace ProcessWire;

/**
 * ProcessWire Markup RSS Module
 *
 * Given a PageArray of pages, this module will render an RSS feed from them. 
 * This is intended to be used directly from a template file. See usage below.
 *
 * USAGE
 * ~~~~~~
 * $rss = $modules->get('MarkupRSS'); 
 * $rss->setArray([ // specify RSS feed settings
 *   'title' => 'Latest updates',
 *   'description' => 'The most recent pages updated on my site',
 *   'itemTitleField' => 'title',
 *   'itemDateField' => 'created', // date field or 'created', 'published' or 'modified'
 *   'itemDescriptionField' => 'summary', 
 *   'itemDescriptionLength' => 1000, // truncate descriptions to this max length or 0 to allow HTML
 *   'itemContentField' => 'body', // optional HTML full-content, or omit to exclude
 *   'itemAuthorField' => 'author', // optional text or Page field containing author(s)
 * ]); 
 * $items = $pages->find('limit=10, sort=-modified'); // or any pages you want
 * $rss->render($items); 
 * exit; // exit now, or if you don’t then at least stop sending further output
 * ~~~~~~
 *
 * See also the $defaultConfigData below (first thing in the class) to see what
 * options you can change at runtime. 
 *
 * 
 * ProcessWire 3.x, Copyright 2023 by Ryan Cramer
 * https://processwire.com
 * 
 * @property string $title
 * @property string $url
 * @property string $description
 * @property string $xsl
 * @property string $css
 * @property string $copyright
 * @property int $ttl
 * @property string $itemTitleField
 * @property string $itemContentField
 * @property string $itemDateField
 * @property string $itemDescriptionField Field to use for item description.
 * @property string $itemDescriptionLength Max length for item description or 0 to allow HTML markup with any length (default=1024)
 * @property string $itemAuthorField 
 * @property string $itemAuthorElement
 * @property string $header
 * @property array|PageArray $feedPages
 * @property bool $stripTags Strip tags from item description? Applies only if `itemDescriptionLength>0`. (default=true)
 *
 *
 */

class MarkupRSS extends WireData implements Module, ConfigurableModule {
  
  /**
   * Return general info about the module for ProcessWire
   *
   */
  public static function getModuleInfo() {
    return array(
      'title' => 'Markup RSS Feed',
      'version' => 105,
      'summary' => 'Renders an RSS feed. Given a PageArray, renders an RSS feed of them.',
      'icon' => 'rss-square',
    );
  }

  protected static $defaultConfigData = array(
    'title' => 'Untitled RSS Feed',
    'url' => '', 
    'description' => '', 
    'xsl' => '', 
    'css' => '',
    'copyright' => '',
    'ttl' => 60,
    'stripTags' => true, 
    'itemTitleField' => 'title',
    'itemContentField' => '', // for <content:encoded>
    'itemDescriptionField' => 'summary',
    'itemDescriptionLength' => 1024, 
    'itemDateField' => 'created',
    'itemAuthorField' => '', // i.e. createdUser.title or leave blank to not use
    'itemAuthorElement' => 'dc:creator', // may be 'dc:creator' or 'author' (author if email address)
    'header' => 'Content-Type: application/xml; charset=utf-8;',
    'feedPages' => array(), 
  ); 

  /**
   * Set the default config data
   *
   */
  public function __construct() {
    parent::__construct();
    $this->setArray(self::$defaultConfigData);
  }

  /**
   * Module init
   *
   */
  public function init() { }

  /**
   * @param string $str
   * @return string
   * 
   */
  protected function ent1($str) {
    if(strpos($str, '&') !== false) $str = $this->wire()->sanitizer->unentities($str, true);
    return $this->ent($str);
  }

  /**
   * @param string $str
   * @return string
   * 
   *
   */
  protected function ent($str) {
    $str = htmlspecialchars($str, ENT_XML1 | ENT_QUOTES, 'UTF-8');
    $str = strtr($str, array(
      // https://validator.w3.org/feed/
      // recommends using hexadecimal entities here
      '&gt;' => '&#x0003E;',
      '&lt;' => '&#x0003C;',
      '&amp;' => '&#x00026;',
      '&quot;' => '&#x00022;',
      '&apos;' => '&#x00027;',
      '&#39;' => '&#x00027;',
    ));
    return $str;
  }

  /**
   * Render RSS header
   * 
   * @return string
   *
   */
  protected function renderHeader() {
    
    if(!$this->url) $this->url = $this->page->httpUrl;

    $xsl = $this->ent1($this->xsl); 
    $css = $this->ent1($this->css); 
    $title = $this->ent1($this->title);
    $url = $this->ent1($this->url);
    $description = $this->ent1($this->description);
    $pubDate = date(\DATE_RSS); 
    $ttl = (int) $this->ttl; 
    $copyright = $this->ent1($this->copyright);

    $out = '<?xml version="1.0" encoding="utf-8" ?>' . "\n";
    if($xsl) $out .= "<?xml-stylesheet type='text/xsl' href='$xsl' ?>\n";
    if($css) $out .= "<?xml-stylesheet type='text/css' href='$css' ?>\n";
    
    $xmlns = array(
      'xmlns:atom="http://www.w3.org/2005/Atom"',
      'xmlns:dc="http://purl.org/dc/elements/1.1/"'
    );
    if($this->itemContentField) {
      $xmlns[] = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"';
    }
    $xmlns = implode(' ', $xmlns);
    
    $out .= 
      "<rss version=\"2.0\" $xmlns>\n" .
      "<channel>\n" .
      "\t<title>$title</title>\n" . 
      "\t<link>$url</link>\n" . 
      "\t<atom:link href=\"$url\" rel=\"self\" type=\"application/rss+xml\" />\n" . 
      "\t<description>$description</description>\n" . 
      "\t<pubDate>$pubDate</pubDate>\n";

    if($copyright) $out .= "\t<copyright>$copyright</copyright>\n";
    if($ttl) $out .= "\t<ttl>$ttl</ttl>\n";

    return $out; 
  }

  /**
   * Render individual RSS item
   * 
   * @param Page $page
   * @return string
   *
   */
  protected function renderItem(Page $page) {
    
    $sanitizer = $this->wire()->sanitizer;
    $title = strip_tags($page->get($this->itemTitleField));
    
    if(empty($title)) return '';
    
    $author = '';
    $description = '';
    $content = '';
    $pubDate = '';
    $title = $this->ent1($title);

    if($this->itemDateField && ($ts = $page->getUnformatted($this->itemDateField))) {
      // date
      $pubDate = "\t\t<pubDate>" . date(DATE_RSS, $ts) . "</pubDate>\n";
    }
  
    if($this->itemAuthorField) {
      // author
      $author = $page->get($this->itemAuthorField); 
      if($author instanceof Page) {
        $author = $author->get('title|name');
      } else if($author instanceof PageArray) {
        $author = $author->implode(', ', 'title');
      }
      $author = (string) $author;
      if(strlen($author)) {
        $author = $this->ent1($author); 
        $author = "\t\t<$this->itemAuthorElement>$author</$this->itemAuthorElement>\n"; 
      } else {
        $author = '';
      }
    }
    
    if($this->itemDescriptionField) {
      // description summary
      $description = $page->get($this->itemDescriptionField);
      if($description !== null) {
        if($this->itemDescriptionLength == 0) {
          // direct markup allowed in item description
          $description = $this->relativeToAbsoluteHtml($description, $page);
        } else {
          $description = $sanitizer->unentities($description, true);
          $description = $this->truncateDescription($description);
          $description = $this->ent($description);
        }
        $description = '<![CDATA[' . $description . ']]>';
      } else {
        $description = '';
      }
    }
    
    if($this->itemContentField) {
      // full HTML content, like that from CKEditor
      $content = (string) $page->get($this->itemContentField);
      $content = $this->relativeToAbsoluteHtml($content, $page);
      $content = "\t\t<content:encoded><![CDATA[" . $content . "]]></content:encoded>\n";
    }

    $out = 
      "\t<item>\n" .
      "\t\t<title>$title</title>\n" .
      "\t\t<description>$description</description>\n" .
      $pubDate . 
      $author . 
      $content . 
      "\t\t<link>$page->httpUrl</link>\n" .
      "\t\t<guid>$page->httpUrl</guid>\n" . 
      "\t</item>\n";

    return $out; 
  }

  /**
   * Render the feed and return it
   * 
   * @param PageArray|null $feedPages
   * @return string
   *
   */
  public function renderFeed(PageArray $feedPages = null) {

    if(!is_null($feedPages)) $this->feedPages = $feedPages;

    $out = $this->renderHeader();

    foreach($this->feedPages as $page) {
      if(!$page->viewable()) continue;
      $out .= $this->renderItem($page);
    }

    $out .= "</channel>\n</rss>\n";

    return $out; 
  }

  /**
   * Render the feed and echo it (with proper http header)
   * 
   * @param PageArray|null $feedPages
   * @return bool
   *
   */
  public function render(PageArray $feedPages = null) {
    header($this->header); 
    echo $this->renderFeed($feedPages);
    return true; 
  }

  /**
   * Truncate the description to a specific length and then truncate to avoid splitting any words.
   * 
   * @param string $str
   * @return string
   *
   */
  protected function truncateDescription($str) {

    $str = trim($str);
    $maxlen = $this->itemDescriptionLength;
    
    if(!$maxlen) return $str;

    if($this->stripTags) $str = strip_tags($str); 

    if(strlen($str) < $maxlen) return $str; 

    $str = trim(substr($str, 0, $maxlen)); 

    // boundaries that we can end the summary with
    $boundaries = array('. ', '? ', '! ', ', ', '; ', '-');
    $bestPos = 0;

    foreach($boundaries as $boundary) {
      if(($pos = strrpos($str, $boundary)) !== false) {
        // find the boundary that is furthest in string
        if($pos > $bestPos) $bestPos = $pos;
      }
    }

    // determine if we should truncate to last punctuation or last space.
    // if the last punctuation is further away then 1/4th the total length, then we'll 
    // truncate to the last space. Otherwise, we'll truncate to the last punctuation.
    $spacePos = strrpos($str, ' '); 
    if($spacePos > $bestPos && (($spacePos - ($maxlen / 4)) > $bestPos)) $bestPos = $spacePos; 

    if(!$bestPos) $bestPos = $maxlen;

    return trim(substr($str, 0, $bestPos+1)); 
  }
  
  /**
   * Update links and other references in HTML content to be suitable for RSS
   *
   * @param string $content
   * @param Page $page
   * @return string
   *
   */
  protected function relativeToAbsoluteHtml($content, Page $page) {

    $rootUrl = $this->wire()->config->urls->httpRoot;
    $pageUrl = $page->httpUrl();

    $a = array(
      ' href="/' => ' href="' . $rootUrl,
      " href='/" => " href='" . $rootUrl,
      ' src="/' => ' src="' . $rootUrl,
      " src='/" => " src='" . $rootUrl,
      ' href="#' => ' href="' . $pageUrl . '#',
      " href='#" => " href='" . $pageUrl . '#',
      '<![CDATA[' => '&lt;![CDATA[',
      ']]>' => ']]&gt;'
    );

    return str_replace(array_keys($a), array_values($a), $content);
  }

  /**
   * Provide fields for configuring this module
   * 
   * @param array $data
   * @return InputfieldWrapper
   *
   */
  public function getModuleConfigInputfields(array $data) {
  
    /** @var Modules $modules */
    $modules = $this->wire('modules');

    /** @var InputfieldWrapper $form */
    $form = $this->wire(new InputfieldWrapper());
    /** @var InputfieldFieldset $inputfields */
    $inputfields = $modules->get('InputfieldFieldset');
    $inputfields->attr('name', '_defaults');
    $inputfields->label = 'RSS feed defaults';
    $inputfields->icon = 'rss';
    $inputfields->description = 
      "Select the default options for any given feed. Each of these may be overridden in the API, " . 
      "so the options you select below should be considered defaults, unless you only have 1 feed. " . 
      "If you only need to support 1 feed, then you will not need to override any of these in the API.";
    $form->add($inputfields);

    foreach(self::$defaultConfigData as $key => $value) {
      if(!isset($data[$key])) $data[$key] = $value; 
    }

    /** @var InputfieldText $f */
    $f = $modules->get('InputfieldText');
    $f->attr('name', 'title');
    $f->attr('value', $data['title']);
    $f->label = "Feed title";
    $f->description = "The primary title of the RSS feed.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldURL $f */
    $f = $modules->get('InputfieldURL');
    $f->attr('name', 'url');
    $f->attr('value', $data['url']);
    $f->label = "Feed URL";
    $f->description = "Optional URL on your site that serves as a feed index.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldText $f */
    $f = $modules->get('InputfieldText');
    $f->attr('name', 'description');
    $f->attr('value', $data['description']);
    $f->label = "Feed description";
    $f->description = "Optional default description for a feed.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldURL $f */
    $f = $modules->get('InputfieldURL');
    $f->attr('name', 'xsl');
    $f->attr('value', $data['xsl']);
    $f->label = "Link to XSL stylesheet";
    $f->description = "Optional URL/link to an XSL stylesheet.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldURL $f */
    $f = $modules->get('InputfieldURL');
    $f->attr('name', 'css');
    $f->attr('value', $data['css']);
    $f->label = "Link to CSS stylesheet";
    $f->description = "Optional URL/link to a CSS stylesheet.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldText $f */
    $f = $modules->get('InputfieldText');
    $f->attr('name', 'copyright');
    $f->attr('value', $data['copyright']);
    $f->label = "Feed copyright";
    $f->description = "Optional default copyright statement for a feed.";
    $f->columnWidth = 50;
    $inputfields->add($f);

    /** @var InputfieldSelect $f3 */
    $f3 = $modules->get('InputfieldSelect');
    $f3->attr('name', 'itemDateField');
    $f3->attr('value', $data['itemDateField']);
    $f3->label = "Feed item date field";
    $f3->description = "The default field to use as an individual feed item's date.";
    $f3->addOption('created');
    $f3->addOption('modified');
    $f3->addOption('published');
    $f3->columnWidth = 50;

    /** @var InputfieldSelect $f1 */
    $f1 = $modules->get('InputfieldSelect');
    $f1->attr('name', 'itemTitleField');
    $f1->attr('value', $data['itemTitleField']);
    $f1->label = "Feed item title field";
    $f1->description = "The default field to use as an individual feed item's title.";
    $f1->columnWidth = 50;

    /** @var InputfieldSelect $f2 */
    $f2 = $modules->get('InputfieldSelect');
    $f2->attr('name', 'itemDescriptionField');
    $f2->attr('value', $data['itemDescriptionField']);
    $f2->label = "Feed item description field";
    $f2->columnWidth = 50;
    $f2->description = "The default field to use as an individual feed item's description (typically a summary or body field). Note that HTML will be stripped out.";

    /** @var InputfieldInteger $f2a */
    $f2a = $modules->get('InputfieldInteger');
    $f2a->attr('name', 'itemDescriptionLength');
    $f2a->attr('value', (int) $data['itemDescriptionLength']);
    $f2a->label = "Maximum characters for item description field";
    $f2a->columnWidth = 50;
    $f2a->description = "The item description will be truncated to be no longer than the max length. When greater than 0, HTML tags will be removed or encoded.";
    $f2a->notes = "Specify `0` for no max length AND to allow HTML in the description.";

    /** @var InputfieldSelect $f4 */
    $f4 = $modules->get('InputfieldSelect');
    $f4->attr('name', 'itemContentField');
    $f4->attr('value', $data['itemContentField']);
    $f4->label = "HTML content/body field";
    $f4->description = "Optional field that contains the entire article/bodycopy in HTML. Select only if you intend to include the entire content in the RSS feed, otherwise use just the description field.";
    $f4->columnWidth = 50;

    /** @var InputfieldInteger $ttl */
    $ttl = $modules->get('InputfieldInteger');
    $ttl->attr('name', 'ttl');
    $ttl->attr('value', (int) $data['ttl']);
    $ttl->label = "Feed TTL";
    $ttl->description = "TTL stands for \"time to live\" in minutes. It indicates how long a channel can be cached before refreshing from the source. Default is 60.";
    $ttl->columnWidth = 50;

    foreach($this->wire()->fields as $field) {
      $fieldtype = $field->type; 

      if($fieldtype instanceof FieldtypeTextarea) {
        $f2->addOption($field->name); 
        $f4->addOption($field->name); 
        
      } else if($fieldtype instanceof FieldtypeText) {
        $f1->addOption($field->name); 
        $f2->addOption($field->name); 
        
      } else if($fieldtype instanceof FieldtypeDatetime) {
        $f3->addOption($field->name); 
      }
    }

    $inputfields->add($f1);
    $inputfields->add($f3);
    $inputfields->add($f2);
    $inputfields->add($f2a);
    $inputfields->add($f4);
    $inputfields->add($ttl);

    return $form;

  }

}