<?php namespace ProcessWire;

/**
 * ProcessWire Language Parser
 * 
 * Parses a PHP file to locate all function calls containing translatable text and their optional comments. 
 *
 * Return the results by calling $parser->getUntranslated() and $parser->getComments();
 *
 * ProcessWire 3.x, Copyright 2020 by Ryan Cramer
 * https://processwire.com
 *
 *
 */

class LanguageParser extends Wire {

	/**
	 * Instance of LanguageTranslator
	 *
	 */
	protected $translator; 

	/**
	 * Textdomain for $file provided to this instance
	 *
	 */
	protected $textdomain = '';

	/**
	 * Array of found comments, indexed by hash of text they go with
	 *
	 */
	protected $comments = array();

	/**
	 * Array of found phrases (in English) indexed by hash
	 *
	 */
	protected $untranslated = array();

	/**
	 * Array of phrase alternates, indexed by source phrase
	 * 
	 * @var array
	 * 
	 */
	protected $alternates = array();

	/**
	 * Total number of phrases found
	 *
	 */
	protected $numFound = 0;

	/**
	 * Construct the Language Parser
	 *
	 * @param LanguageTranslator $translator
	 * @param string $file PHP filename to parse
	 *
	 */
	public function __construct(LanguageTranslator $translator, $file) {
		$this->translator = $translator; 
		$this->textdomain = $this->translator->filenameToTextdomain($file); 
		$this->translator->loadTextdomain($this->textdomain); 
		$this->execute($file); 
	}

	/**
	 * Get phrase alternates
	 * 
	 * @param string $hash Specify phrase hash to get alternates or omit to get all alternates
	 * @return array
	 * 
	 */
	public function getAlternates($hash = '') {
		if(empty($hash)) return $this->alternates;
		return isset($this->alternates[$hash]) ? $this->alternates[$hash] : array();
	}

	/**
	 * Return all found comments, indexed by hash
	 * 
	 * @return array
	 *
	 */
	public function getComments() { return $this->comments; }

	/**
	 * Return all found phrases (in untranslated form), indexed by hash
	 * 
	 * @return array
	 *
	 */
	public function getUntranslated() { return $this->untranslated; }

	/**
	 * Return number of phrases found total
	 * 
	 * @return int
	 *
	 */
	public function getNumFound() { return $this->numFound; }

	/**
 	 * Given a hash, return the untranslated text associated with it
	 * 
	 * @param string $hash
	 * @return string|bool Returns untranslated text (string) on success or boolean false if not available
	 *
	 */
	public function getTextFromHash($hash) { 
		return isset($this->untranslated[$hash]) ? $this->untranslated[$hash] : false; 
	}

	/**
	 * Begin parsing given file
	 * 
	 * @param string $file
	 *
	 */
	protected function execute($file) {

		$matches = $this->parseFile($file);

		foreach($matches as $m) {
			// $m[3] is always the text
			if(empty($m)) continue; 
			foreach($m[3] as $key => $text) { 
				$match = $this->buildMatch($m, $key, $text); 
				$this->processMatch($match); 
				if($match['plural']) {
					$match['text'] = $match['plural'];
					$this->processMatch($match); 
				}
			}
		}
	}

	/**
	 * Find text array values and place in alternates
	 * 
	 * This method also converts the __(['a','b','c']) array calls to single value calls like __('a')
	 * as a pre-parser for all parsers that follow it, so they do not need to be * aware of array values 
	 * for translation calls. 
	 * 
	 * @param string $data
	 * 
	 */
	protected function findArrayTranslations(&$data) {
		
		if(!strpos($data, '_([')) return;
		
		$regex = 
			'/((?:->_|\b__|\b_n|\b_x)\(\[\s*)' . // "->_([" or "__([" or "_n([" or "_x(["
			'([\'"])(.+?)(?<!\\\\)\\2' . // 'text1'
			'([^\]]*?\])\s*' . // , 'text2', 'text3' ]"
			'([^)]*\))/m'; // and the remainder of the function call

		$funcTypes = array('->_(' => '>', '__(' => '_', '_n(' => 'n', '_x(' => 'x');
		
		if(!preg_match_all($regex, $data, $m)) return;
			
		foreach($m[0] as $key => $find) {
			
			$func = trim(str_replace('[', '', $m[1][$key])); // "->_([" or "__([" or "_n([" or "_x(["
			$funcType = isset($funcTypes[$func]) ? $funcTypes[$func] : '_';
			$quote = $m[2][$key]; // single quote or double quote ['"]
			$text1 = $m[3][$key]; // first text in array
			$textArrayStr = trim($m[4][$key], ' ,[]'); // the other text phrases in the array (CSV and quoted)
			$theRest = $m[5][$key]; // remainder of function call, i.e. ", __FILE__)" or ", 'context-str'"
			$context = '';

			$trimRest = ltrim($theRest, ', '); 
			if($funcType === 'x' && (strpos($trimRest, '"') === 0 || strpos($trimRest, "'") === 0)) {
				if(preg_match('/^([\'"])(.+?)(?<!\\\\)\\1/', $trimRest, $matches)) {
					$context = $matches[2];
				}
			}
			
			// Convert from: "__(['a', 'b', 'c'])" to "__('a')" and remember 'b' and 'c' alternates
			$replace = $func . $quote . $text1 . $quote . $theRest;
			$data = str_replace($find, $replace, $data);
			$text1 = $this->unescapeText($text1);
			
			// Given string "'b', 'c'" convert to array and place in alternates
			if(preg_match_all('/(^|,\s*)([\'"])(.+?)(?<!\\\\)\\2/', $textArrayStr, $matches)) {
				$hash1 = $this->getTextHash($text1, $context); 
				if(!isset($this->alternates[$hash1])) $this->alternates[$hash1] = array();
				foreach($matches[3] as $text) {
					$text2 = $this->unescapeText($text);
					$hash2 = $this->getTextHash($text, $context); 
					$this->alternates[$hash1][$hash2] = $text2;
				}
			}
		}
	}

	/**
	 * Run regexes on file contents to locate all translation functions
	 * 
	 * @param string $file
	 * @return array
	 *
	 */
	protected function parseFile($file) { 

		$matches = array(
			1 => array(), 	// $this->_('text'); 
			2 => array(),	// __('text', [textdomain]);
			3 => array(),	// _x('text', 'context', [textdomain]) or $this->_x('text', 'context'); 
			4 => array(),	// _n('singular', 'plural', $cnt, [textdomain]) or $this->_n(...); 
		);

		if(!is_file($file)) return $matches; 

		$data = file_get_contents($file); 
		$this->findArrayTranslations($data);

		// Find $this->_('text') style matches
		preg_match_all(	
			'/(>_)\(\s*' . // $this->_( 
			'([\'"])(.+?)(?<!\\\\)\\2' . // "text"
			'\s*\)+(.*)$/m', // and everything else
			$data, $matches[1]
		); 

		// Find __('text', textdomain) style matches
		preg_match_all(	
			'/([\s.=(\\\\]__|^__)\(\s*' . // __(
			'([\'"])(.+?)(?<!\\\\)\\2\s*' . // "text"
			'(?:,\s*[^)]+)?\)+(.*)$/m', // , textdomain (optional) and everything else
			$data, $matches[2]
		); 

		// Find _x('text', 'context', textdomain) or $this->_x('text', 'context') style matches
		preg_match_all(	
			'/([\s.=>(\\\\]_x|^_x)\(\s*' . // _x( or $this->_x(
			'([\'"])(.+?)(?<!\\\\)\\2\s*,\s*' . // "text", 
			'([\'"])(.+?)(?<!\\\\)\\4\s*' . // "context"
			'[^)]*\)+(.*)$/m', // , textdomain (optional) and everything else 
			$data, $matches[3]
		); 

		// Find _n('singular text', 'plural text', $cnt, textdomain) or $this->_n(...) style matches
		preg_match_all(	
			'/([\s.=>(\\\\]_n|^_n)\(\s*' . // _n( or $this->_n(
			'([\'"])(.+?)(?<!\\\\)\\2\s*,\s*' . // "singular", 
			'([\'"])(.+?)(?<!\\\\)\\4\s*,\s*' . // "plural", 
			'.+?\)+(.*)$/m', // $count, optional textdomain, closing function parenthesis ) and rest of line
			$data, $matches[4]
		); 

		return $matches; 
	}

	/**
	 * Build the match abstracted away from the preg_match result
	 * 
	 * @param array $m
	 * @param int $key
	 * @param string $text
	 * @return array
	 *	
	 */
	protected function buildMatch(array $m, $key, $text) {
	
		// $match is where we store the results generated by this function
		$match = array('text' => $text, 'context' => '', 'plural' => '', 'tail' => '');

		// determine the function type
		$funcType = substr($m[1][$key], 0, 1); // '>' OR '_' , for '$this->_()' OR '__()'
		$funcType2 = substr($m[1][$key], -1); // 'x' OR 'n' OR '_'
		if($funcType2 == 'x' || $funcType2 == 'n') $funcType = $funcType2; 

		// tail, plural and context vary in position according to function type
		if($funcType == 'x') {
			// context function _x()
			$match['tail'] = $m[6][$key]; 
			$match['context'] = $m[5][$key];

		} else if($funcType == 'n') { 
			// plural function _n()
			$match['tail'] = $m[6][$key]; 
			$match['plural'] = $m[5][$key];

		} else {
			// tail containing optional label comment
			$match['tail'] = $m[4][$key]; 
		}

		return $match;
	}

	/**
	 * Process the match and populate $this->untranslated and $this->comments
	 * 
	 * @param array $match
	 *
	 */
	protected function processMatch(array $match) { 

		$text = $this->unescapeText($match['text']);
		$tail = $match['tail'];
		$context = $match['context'];
		$plural = $match['plural'];	
		$comments = '';

		// get the translation for $text in $context
		$translation = $this->translator->getTranslation($this->textdomain, $text, $context);

		// if translation == $text then that means no translation was found, make $translation blank
		if($translation == $text) $translation = ''; 

		// set a pending translation to get the hash
		$hash = $this->translator->setTranslation($this->textdomain, $text, $translation, $context); 
		if(!$hash) return;

		// store the untranslated (English) version of $hash
		$this->untranslated[$hash] = $text; 
		$this->numFound++;

		// check if there are comments in the $tail and record them if so
		if(($pos = strpos($tail, '//')) !== false) {
			if(preg_match('![^:"\']//(.+)$!', $tail, $matches)) {
				$comments = $matches[1];
			}
		}

		// check if a plural was found and set an automatic comment to indicate which is which
		if($plural) {
			$note = $plural == $text ? "Plural" : "Singular";
			// force note saying Plural or Singular
			$comments = ($comments ? $comments : $text) . " // $note Version"; 

		} else if($context) { 
			$comments = ($comments ? $comments : $text) . " // Context: $context";
		}

		// save the comments indexed to the hash
		if($comments) $this->comments[$hash] = $comments; 
	}

	/**
	 * Replace any escaped characters with non-escaped versions
	 * 
	 * @param string $text
	 * @return string
	 * 
	 */
	protected function unescapeText($text) {
		if(strpos($text, '\\') !== false) {
			$text = str_replace(array('\\"', '\\\'', '\\$', '\\'), array('"', "'", '$', '\\'), $text);
		}
		return $text;
	}

	/**
	 * Get hash for given text + context
	 * 
	 * @param string $text
	 * @param string $context
	 * @return string
	 * 
	 */
	protected function getTextHash($text, $context) {
		$translation = $this->translator->getTranslation($this->textdomain, $text, $context); // get the translation for $text in $context
		if($translation == $text) $translation = ''; // if translation == $text then that means no translation was found, make $translation blank
		$hash = $this->translator->setTranslation($this->textdomain, $text, $translation, $context);
		if(!$hash) $hash = $text;
		return $hash;
	}

}
