<?php
/**
 * This file is part of the transcoding library. It contains the
 * definition of the {@link TranscodingActionLineartables} class.
 * 
 * @author Sylvain Lequeux
 * @author Francois Daoust <fd@w3.org>
 * @package TransPythia
 * @version $Revision: 1.24 $
 * @license http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html W3C Software Notice and License
 * @copyright Copyright (c) 2009, W3C (MIT, ERCIM, Keio)
 */

/**
 * Include the {@link TranscodingAction} base class definition.
 */
require_once(dirname(__FILE__) . '/transcodingaction.php');

/**
 * Constants used to determine the type of table being handled.
 */
// Table that contains data (some headers found)
define("TRANSPYTHIA_TABLE_DATA", 1);
// Table used for layout (no headers found)
define("TRANSPYTHIA_TABLE_LAYOUT", 2);


/**
 * Transcoding action that linearizes tables found in HTML content when the
 * requesting device does not support tables, and that removes nested tables
 * and tables used for layout purpose when the requesting device is identified
 * as mobile.
 * 
 * XHTML Basic 1.1 forbids nested tables. Thus this action linearizes nested
 * tables when the requesting device is identified as mobile, even when it
 * supports tables.
 * 
 * XHTML Basic 1.1 only supports basic tables (no "thead", "tbody", "tfoot"
 * elements for instance). Thus this action converts all tables to basic
 * tables when the requesting device is identified as mobile.
 * 
 * The action recognizes 2 types of tables:
 * - tables that define some header cells (th). They are considered to
 * contain actual tabular data. This use of tables is good and this action
 * leaves these tables untouched (once they have been converted to basic tables,
 * that is). These tables get linearized are when the device does not support
 * tables at all, and when they would create a nested table in the adapted
 * content.
 * 
 * - tables without header cells (th). They are considered to be used to
 * control the layout of the page. This is a mis-use of tables that should be
 * avoided when possible. This action linearizes these tables when the
 * requesting device is identified as mobile.  
 * 
 * The transcoding action requires tables to be well-formed, i.e. tables will
 * be parsed as XML content.
 * 
 * Linearization of layout tables is easy, cells are just written one after the
 * other. Proper linearization of data tables is not trivial and has not yet
 * been implemented: this action applies the same algorithm to both types of
 * tables, which may yield to a very poor user experience.
 * 
 * @author Sylvain Lequeux
 * @author Francois Daoust <fd@w3.org>
 * @package TransPythia
 * @version $Revision: 1.24 $
 * @license http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html W3C Software Notice and License
 * @copyright Copyright (c) 2009, W3C (MIT, ERCIM, Keio)
 */
class TranscodingActionLineartables extends TranscodingAction {
	/**
	 * @var bool The requesting device supports tables.
	 */
	private $tablesSupported = false;
	/**
	 * @var array<string> A backup of the list of data tables kept for later
	 *   replacement not to confuse the recursive algorithm with nested tables.
	 */
	private $savedTables = array();
	
	/**
	 * Linearizes tables when tables are not supported by the requesting device.
	 * 
	 * @param Evidence $evidence The evidence that identifies the requesting device.
	 * @param string $content The HTML content to transcode.
	 * @return string The transcoded content.
	 * @exception SystemException The evidence is not valid.
	 */
	public function apply($content, $evidence){
		$this->initPropertyValues($evidence);
		
		$this->initproperty('mobile_device',
			TranscodingAction::$WURFL_MOBILE_DEVICE,
			TranscodingAction::$WURFL_VOCABULARY,
			TranscodingAction::$WURFL_DEFAULT_ASPECT);
		$this->initProperty('table_support',
			'xhtml_table_support',
			TranscodingAction::$WURFL_VOCABULARY,
			TranscodingAction::$WURFL_DEFAULT_ASPECT);
		
		// Linearizes tables when:
		// 1. the device does not support them, or
		// 2. the device is identified as mobile. If the mobile device supports
		// tables, simple tables will be kept, nested tables will be removed.
		// Viewed from the opposite angle, tables are left untouched when:
		$property = $this->getOption('mobile_device');
		$is_mobile_device = $this->getPropertyValuePr($property);
		
		$property = $this->getOption('table_support');
		$table_support = $this->getPropertyValuePr($property);
		if (!isset($table_support) || $table_support->getBoolean()) {
			$this->tablesSupported = true;
		}
		else {
			$this->tablesSupported = false;
		}
		
		if ((!isset($table_support) || $table_support->getBoolean())
		&& (!isset($is_mobile_device) || !$is_mobile_device->getBoolean())) {
			// The device supports tables and is not identified as a mobile device.
		  	return $content;
		}
		
		return $this->linearizeTables($content);
	}
	
	/**
	 * Linearizes tables defined in the HTML content.
	 * 
	 * Main function of the class, called when tables need to be linearized
	 * because the requesting device does not support them.
	 * 
	 * @param string $content The HTML content to transcode.
	 * @return string The transcoded content.
	 */
	private function linearizeTables($content) {
		// Linearizes tables
		// (this may generate a few placeholders and saved tables)
		$this->savedTables = array();
		
		$adaptedContent = $this->recLinearizeTables($content, false);
		
		// Re-insert the tables that were kept but put inside not
		// to interfer with regular expressions in the recursive
		// algorithm
		while ($savedTable = array_pop($this->savedTables)) {
			$adaptedContent = str_replace($savedTable[0], $savedTable[1], $adaptedContent);
		}
		
		return $adaptedContent;
	}
	
	/**
	 * Linearizes tables defined in the HTML content, recursive part.
	 * 
	 * Main function of the class, called when tables need to be linearized
	 * because the requesting device does not support them.
	 * 
	 * @param string $content The HTML content to transcode.
	 * @param bool $wouldBeNestedTable Tables found in the HTML content would appear
	 *   as nested tables if we kept them intact (in other words, tables found must
	 *   be linearized)
	 * @return string The transcoded content.
	 */
	private function recLinearizeTables($content, $wouldBeNestedTable) {
		$adaptedContent = '';
		$index = 0;
		
		while (true) {
			// Note the structure of the block info:
			// 0: The extracted block
	 		// 1: The matched tag name
	 		// 2: The block's starting offset in the string
	 		// 3: The block's starting tag
	 		// 4: The block's ending offset in the string
	 		// 5: The block's ending tag (may be an empty string if empty tag
	 		// or if ending tag was not found)
			$subblockInfo = $this->getNextBlock('table', $content, $index);
			if (!$subblockInfo) {
				break;
			}
			
			// We now have two blocks:
			// - the first one between position $index and the identified table starting at $subblockInfo[2].
			// This first block does not contain any table.
			// - the second one between $startOffset and $endOffset. This second block is the table, and may
			// contain nested tables.
			$subblock = substr($content, $index, $subblockInfo[2] - $index);
			if ($subblock != "") {
				$adaptedContent .= $subblock;
			}
			
			// Note the starting and closing "table" tags are removed for the time being to prevent
			// infinite loops.
			$table = $subblockInfo[0];
			$table = trim(substr(
				$table,
				strlen($subblockInfo[3]),
				strlen($table) - strlen($subblockInfo[3]) - strlen($subblockInfo[5])));
			$tableType = $this->detectTableType($table); 

			// Decision to linearize the table depends on the type of table:
			// - a layout table is aways linearized.
			// - a data table is linearized if the device does not support
			// tables or if it would appear as a nested table in the adapted
			// content (i.e. because some ancestor data table was kept)
			$tableShouldBeLinearized = false;
			if ($this->tablesSupported) {
				if ($tableType == TRANSPYTHIA_TABLE_LAYOUT) {
					$tableShouldBeLinearized = true;
				}
				else if ($wouldBeNestedTable) {
					$tableShouldBeLinearized = true; 
				}
			}
			else {
				$tableShouldBeLinearized = true;
			}
			
			// Linearize nested tables
			$subtableWouldBeNestedTable = $wouldBeNestedTable || !$tableShouldBeLinearized;
			$table = $this->recLinearizeTables($table, $subtableWouldBeNestedTable);
			
			// Linearize current table if needed
			// or re-insert starting and closing tag and convert the table to
			// a basic table if not
			if ($tableShouldBeLinearized) {
				$table = $this->linearizeTable($table);
			}
			else {
				$table = $this->convertTableToBasic(
					$subblockInfo[3]
					. $table
					. $subblockInfo[5]);
				// Save the table for now as we might be in the middle
				// of a deep recursion and don't want the cells of that
				// table to affect the algorithm at the upper level.
				$savedTable = array();
				$savedTable[0] = '__TABLE_' . count($this->savedTables) . '__';
				$savedTable[1] = $table;
				array_push($this->savedTables, $savedTable);
				$table = $savedTable[0];
			}
			
			// Add result to adapted content
			$adaptedContent .= $table;
			
			// Continue with the rest of the block
			$index = $subblockInfo[4];
		}
		
		// Add final block
		$subblock = substr($content, $index, strlen($content) - $index);
		$adaptedContent .= $subblock;

		return $adaptedContent;
	}
	
	
	/**
	 * Linearizes the current table based on its type.
	 * 
	 * @param string $content The HTML content to transcode.
	 * @return string The transcoded content.
	 */
	private function linearizeTable($content) {
		$adaptedContent = $this->convertTableToBasic($content);
		
		// Replace caption by a div
		$adaptedContent = preg_replace('/\<caption.*>(.*)<\/caption>/Usi', '\1', $adaptedContent);
		
		$tableType = $this->detectTableType($adaptedContent);
		
		switch ($tableType) {
			case TRANSPYTHIA_TABLE_LAYOUT:
				$adaptedContent = $this->linearizeLayoutTable($adaptedContent);
				break;
			case TRANSPYTHIA_TABLE_DATA:
				// TODO: replace by appropriate call when implemented
				$adaptedContent = $this->linearizeLayoutTable($adaptedContent);
				//$adaptedContent = $this->linearizeHorizontalTable($adaptedContent);
				//$adaptedContent = $this->linearizeVerticalTable($adaptedContent);
				break;
		}
		
		return $adaptedContent;
	}
	
	
	/**
	 * Removes elements of the given table that are not defined in the Basic
	 * Tables module:
	 * http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_simpletablemodule
	 * 
	 * In particular, the function removes "thead", "tbody" and "tfoot", and
	 * "colgroup" elements that are not allowed in the Basic Tables module.
	 *  
	 * @param $content The table to convert, must not contain any nested table
	 * @return The converted table
	 */
	private function convertTableToBasic($content) {
		$adaptedContent = preg_replace('/\<(thead|tbody|tfoot).*>(.*)<\/(thead|tbody|tfoot)>/Usi', '\2', $content);
		$adaptedContent = preg_replace('/\<colgroup.*>(.*)<\/colgroup.*>/Usi', '', $adaptedContent);
		return $adaptedContent;
	}
	
	/**
Unexpected PHP error [preg_match(): Unknown modifier ')'] severity [E_WARNING] in [/home/fd/w3c/dev/2009/mobileok-authoring/common/transcoding/transcodingactionlineartables.php line 297]
	in test_test
	 * Determines the type of the given table that will be used to linearize
	 * the table appropriately.
	 * 
	 * @param string $content The XHTML table to parse
	 * @return int The type of table, i.e. one of TRANSPYTHIA_TABLE_LAYOUT
	 *   or TRANSPYTHIA_TABLE_DATA
	 */
	private function detectTableType($content) {
		// Remove nested tables to ease cell identification
		$index = 0;
		$contentNoTables = '';
		while (true) {
			$subblockInfo = $this->getNextBlock('table', $content, $index);
			if (!$subblockInfo) {
				break;
			}
			$contentNoTables .= substr($content, $index, $subblockInfo[2] - $index);
			$index = $subblockInfo[4];
		}
		$contentNoTables .= substr($content, $index, strlen($content) - $index);
		
		// Remove "thead" element to avoid matching it when looking for "th"
		$contentNoTables = preg_replace('/<thead.*>.*<\/thead>/Usi', '', $contentNoTables);
		
		// No header cell (th) means layout table
		if (!preg_match('|<th.*>.*</th>|Usi', $contentNoTables, $headerCells)) {
			return TRANSPYTHIA_TABLE_LAYOUT;
		}
		else {
			return TRANSPYTHIA_TABLE_DATA;
		}
	}
	

	/**
	 * Linearizes the given layout table.
	 * 
	 * NB: detection of a table used to control the layout of a page is based
	 * on the absence of table headers. Linearization may corrupt the meaning
	 * of the content when tables are used extensively to control the layout
	 * of text.
	 * 
	 * Cells are displayed one after the other in <div> blocks.
	 * 
	 * @param string $content The layout table to linearize
	 * @return string the linearized table.
	 */
	private function linearizeLayoutTable($content) {
		$adaptedContent = '';
		
		preg_match_all(
			'/<(td|th).*>(.*)<\/(td|th)/Usi',
			$content,
			$cells);
		
		foreach ($cells[2] as $cell) {
			$adaptedContent .= '<div>'
				. trim($cell)
				. '</div>';
		}
		
		return $adaptedContent;
	}

	/**
	 * Linearizes the table when first row contains the headers.
	 * 
	 * Not implemented yet!
	 * 
	 * The first column determines the item name. The remaining columns contain
	 * the item's description.
	 * 
	 * @param string $content The vertical table to linearize.
	 * @return string The linearized content.
	 */
	private function linearizeVerticalTable($content){
		$adaptedContent = '';
		
		if (preg_match_all('|<tr.*>(.*)</tr>|Usi', $content, $rows) === false) {
			// No rows, nothing to linearize
			return $content;
		}
		
		foreach ($cells[2] as $cell) {
			$adaptedContent .= '<div>'
				. trim($cell)
				. '</div>';
		}
		
		return $adaptedContent;
	}

	/**
	 * Linearizes the given horizontal table.
	 * 
	 * Not implemented yet!
	 * 
	 * The first line determines the item name. The remaining lines contain
	 * the item description.
	 * 
	 * @param DOMDocument $doc The DOM document that contains the table.
	 * @param DOMElement $table The table to linearize in the DOM document.
	 * @return DOMDocument The updated DOM document.
	 */
	private function linearizeHorizontalTable(&$doc, $table){
		return $adaptedContent;
	}
}

?>