<?php
/**
 * This file is part of the WURFL implementation of the DDR Simple API and
 * contains a regular Web page with a form and the implementation of an
 * internal class used to prepare the WURFL database to suit the needs of
 * the DDR Simple API.
 * 
 * @author Sylvain Lequeux
 * @author Francois Daoust <fd@w3.org>
 * @package AskPythia
 * @subpackage Implementation
 * @version $Revision: 1.22 $
 * @license http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html W3C Software Notice and License
 * @copyright Copyright (c) 2009, W3C (MIT, ERCIM, Keio)
 */

/**
 * Prepares the WURFL XML database for use in AskPythia.
 * 
 * The class is part of the WURFL implementation of the DDR Simple API.
 * 
 * @author Sylvain Lequeux
 * @author Francois Daoust <fd@w3.org>
 * @package AskPythia
 * @subpackage Implementation
 * @link http://www.w3.org/TR/DDR-Simple-API/ Device Description Repository Simple API
 * @link http://wurfl.sourceforge.net/ WURFL
 * @version $Revision: 1.22 $
 * @license http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html W3C Software Notice and License
 * @copyright Copyright (c) 2009, W3C (MIT, ERCIM, Keio)
 */
class WURFLPrepareDatabase {
	/**
	 * The path of the folder in witch there are the WURFL resources.
	 * @var string
	 * @static
	 * @access private
	 */
	static private $WURFL_home;
	
	/**
	 * The path of the folder in wich the result will be placed.
	 * @var string
	 * @static
	 * @access private
	 */
	static private $WURFL_home_src;
	
	/**
	 * The path of the WURFL main resource file.
	 * @var string
	 * @access private
	 */
	private $WURFL_resource;
	
	/**
	 * The path of the WURFL config file.
	 * @var string
	 * @access private
	 */
	private $WURFL_config;
	
	/**
	 * Contains the names in the DDR corresponding to the WURFL capabilities
	 * @var array
	 * @access private
	 * 
	 * TODO: wurflMapping should replace usable_properties and some of the
	 * following variables.
	 */
	//private $usable_properties;
	private $wurflMapping;
	
	/**
	 * Contains the format of the value (E fot Enumeration, V for Value).
	 * @var array
	 * @access private
	 */
	//private $ddr_properties_format;
	
	/**
	 * Translation table for value when it's an enumeration.
	 * @var array
	 * @access private
	 */
	//private $ddr_names;
	
	/**
	 * The list of patches to apply
	 * @var array
	 */
	private $patches;
	
	/**
	 * It represents the content of all the resource files. 
	 * The key is the ID of the device. The value is an array with :
	 * the id, the user agent, the fall back and the capabilities as an array.
	 * @var array
	 * @access private
	 */
	private $result;
	
	/**
	 * It represents the content to put in the token file.
	 * @var array 
	 * @access private
	 */
	private $tokens;
	
	/**
	 * It is an error message to display where ther is something wrong.
	 * @var array
	 * @access private
	 */
	private $error;
	
	/**
	 * @var string the version id of the wurfl resource file.
	 */
	private $version;
	
	/**
	 * Set all the needed variables.
	 * @param string The path of the folder in wich there 
	 * 			are all the resources files.
	 * @access public
	 */
	public function __construct($WURFL_home){
		self::$WURFL_home = $WURFL_home;
		self::$WURFL_home_src = self::$WURFL_home;
		$this->WURFL_resource = self::$WURFL_home . "wurfl.xml";
		$this->WURFL_patch = self::$WURFL_home . "wurfl_patch.xml";
		
		$this->version = '';
		
		// The following mapping defines how to read and convert WURFL
		// properties to DDR Core Vocabulary properties. Please note that some
		// properties are directly loaded from the WURFL vocabulary, which
		// only works because there is (so far) no overlap between the local
		// names defined in the DDR Core Vocabulary and those defined in the
		// WURFL vocabulary.
		// 
		// The list may be completed to load more properties as needed, but
		// make sure to update WURFLService::$WURFL_PROPERTIES consequently.
		$this->wurflMapping = array(
			'brand_name' => array(
				'property' => 'vendor', 
				'aspect' => 'device',
				'type' => 'string'),
			'mobile_browser' => array(
				'property' => 'vendor', 
				'aspect' => 'webBrowser',
				'type' => 'string'),
			'model_name' => array(
				'property' => 'model', 
				'aspect' => 'device',
				'type' => 'string'),
			'resolution_width' => array(
				'property' => 'displayWidth', 
				'aspect' => 'device',
				'type' => 'int'),
			'resolution_height' => array(
				'property' => 'displayHeight', 
				'aspect' => 'device',
				'type' => 'int'),
			'colors' => array(
				'property' => 'displayColorDepth', 
				'aspect' => 'device',
				'type' => 'int'),
			'html_wi_w3_xhtmlbasic' => array(
				'property' => 'markupSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'xhtmlBasic11')),
			'html_wi_oma_xhtmlmp_1_0' => array(
				'property' => 'markupSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'xhtmlMP10')),
			'gif' => array(
				'property' => 'imageFormatSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'gif')),
			'jpg' => array(
				'property' => 'imageFormatSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'jpg')),
			'png' => array(
				'property' => 'imageFormatSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'png')),
			'accept_third_party_cookie' => array(
				'property' => 'cookieSupport', 
				'aspect' => 'webBrowser',
				'type' => 'bool'),
			'ajax_support_javascript' => array(
				'property' => 'scriptSupport', 
				'aspect' => 'webBrowser',
				'type' => 'enumeration',
				'valueMapping' => array('true' => 'ecmascript-MP')),
		
			// A couple of properties within the WURFL vocabulary
			'xhtml_table_support' => array(
				'property' => 'xhtml_table_support', 
				'aspect' => '__NULL',
				'type' => 'bool'),
			'is_wireless_device' => array(
				'property' => 'is_wireless_device', 
				'aspect' => '__NULL',
				'type' => 'bool'),
			);
		
		$this->result = array();
		$this->error = '';
	}
	
	/**
	 * Manages the preparation of the resources files.
	 * @return boolean The state of the preparation (If it was a failure or
	 * 				a success).
	 * @access public
	 */
	public function prepare_files(){
		echo '<ul>';
		echo '<li>Loading files... ';
		flush();
		//We load the files in the $this->result variable
		if(!$this->load_files()){
			echo '<strong>failed</strong></li></ul>';
			throw new Exception('Fail to load files');
			return false;
		}
		else {
			echo '<strong>OK</strong>';
		}
		echo '</li>';
		
		echo '<li>Computing device properties... ';
		flush();
		//We determine all the capabilities recursively
		if(!$this->determine_all_attribute()){
			echo '<strong>failed</strong></li></ul>';
			throw new Exception('Failed to determine attributes');
			return false;
		}
		if(trim($this->error) != ''){
			echo '<strong>OK</strong> (' . $this->error . ')';
			$this->error = '';
		}
		else{
			echo '<strong>OK</strong>';
		}
		echo '</li>';
		
		echo '<li>Writing prepared file... ';
		flush();
		//We generate the main resource file
		$patched_file = $this->generate_source_file();
		if(trim($this->error) != ''){
			echo '<strong>OK</strong> (' . $this->error . ')';
			$this->error = '';
		}
		else{
			echo '<strong>OK</strong> (' . count($this->result) . ' devices found)';
		}
		echo '</li>';
		
		echo '<li>Computing devices index... ';
		flush();
		//We generate the token file
		$token_file   = $this->createDevicesFile($patched_file);
		if(trim($this->error) != ''){
			echo '<strong>OK</strong> (' . $this->error . ')';
			$this->error = '';
		}
		else{
			echo '<strong>OK</strong>';
		}
		echo '</li>';
		
		echo '<li>Computing families index... ';
		flush();
		//We generate the name file
		$name_file    = $this->createFamiliesFile($token_file);
		if(trim($this->error) != ''){
			echo '<strong>OK</strong> (' . $this->error . ')';
			$this->error = '';
		}
		else{
			echo '<strong>OK</strong>';
		}
		echo '</li>';
		echo '</ul>';
		return true;
	}
	
	/**
	 * Loads the files in memory.
	 * It determines the list of files to be included.
	 * It sets the $this->result variable.
	 * 
	 * @return boolean false if there is a failure, true if there is no failure.
	 * @access private
	 */
	private function load_files(){
		if(!file_exists($this->WURFL_resource)){
			throw new Exception('WURFL file not found: '.$this->WURFL_resource);
			return false;
		}
		$this->load_file($this->WURFL_resource);
		
		// Apply patch if one exists
		if (file_exists($this->WURFL_patch)) {
			$this->load_file($this->WURFL_patch);
		}

		return true;
	}	
	
	/**
	 * It loads a file in memory.
	 * It uses the SimpleXML library to parse the file.
	 * It converts the names and values of the properties
	 * (called capabilities in the xml file).
	 * 
	 * @param string The path of the file to load.
	 * @access private
	 */
	private function load_file($path){
		$xml = simplexml_load_file($path);
		
		if($path == $this->WURFL_resource && $this->version == ''){
			//We have to get the version
			$versionNode = $xml->version->children();
			$this->version = (string) $versionNode->ver;
		}
		
		foreach($xml->devices->children() as $device){
			$newDevice = array();
			$id = (string) $device['id'];
			$newDevice['id'] = $id;
			$newDevice['user_agent'] = (string) $device['user_agent'];
			$newDevice['fall_back'] = (string) $device['fall_back'];
			if(array_key_exists($id, $this->result)){
				$capas = $this->result[$id]['capabilities'];
			}
			else{
				$capas = array();
			}			
			foreach($device->children() as $group){
				foreach($group->children() as $capa){
					//Deal with capa
					$name = (string) $capa['name'];
					$value = (string) $capa['value'];				
					
					if(array_key_exists($name, $this->wurflMapping)){
						$prop = $this->wurflMapping[$name];
						$real_name = $prop['property'];
						
						if($prop['type'] == 'enumeration'){
							if(array_key_exists($value, $prop['valueMapping'])){
								$real_value = $prop['valueMapping'][$value];
							}
							else{
								$real_value = '';
							}
						}
						else{
							$real_value = $value;
						}
						
						if( array_key_exists($real_name, $capas)
							&& array_key_exists($prop['aspect'], $capas[$real_name])){
							$capas[$real_name][$prop['aspect']] .= '//' . $real_value;
							$capas[$real_name][$prop['aspect']] = trim($capas[$real_name][$prop['aspect']]);
						}
						else{
							if(!array_key_exists($real_name, $capas)){
								$capas[$real_name] = array();
							}
							$capas[$real_name][$prop['aspect']] = $real_value;
						}
					}
				}
			}
			$newDevice['capabilities'] = $capas;
			$this->result[$id] = $newDevice;
		}
	}
	
	/**
	 * Calculates the value of a capacity for a certain device.
	 * The search is done recursively and the algorithm stop when
	 * it found a property or when the device is the root.
	 * 
	 * When a device does not determine the value of the given property,
	 * we search for the value in the fall back device.
	 * 
	 * @param string $id The id of to search in.
	 * @param string $capaName The name of the capability we look for.
	 * 
	 * @return string The value of the capacity. It may be an empty string.
	 */
	private function determine_attribute($id, $capaName, $aspect){
		if(array_key_exists($capaName, $this->result[$id]['capabilities'])
			&& array_key_exists($aspect, $this->result[$id]['capabilities'][$capaName])
			&& trim($this->result[$id]['capabilities'][$capaName][$aspect]) != ''){
				return $this->result[$id]['capabilities'][$capaName][$aspect];
		}
		else if($id=='generic'){
			$this->error ++;
			return $this->result[$id]['capabilities'][$capaName][$aspect];
		}
		else{
			return $this->determine_attribute($this->result[$id]['fall_back'], $capaName, $aspect);
		}
	}
	
	/**
	 * The aim of this function is to determine all the capabilities for all 
	 * the devices. In fact, when a device doesn't declare a property, we have
	 * to look for in its fall back device.
	 * 
	 * To speed up the research of property values, we will produce a 
	 * file in which each device declare all the properties.	 * 
	 * 
	 * @return true if there was no problem during the execution.
	 */
	private function determine_all_attribute(){
		//We consider that the root device (id=generic) defines 
		//all the capabilities to transmit to its children
		if(!array_key_exists('generic', $this->result) 
			|| !array_key_exists('capabilities', $this->result['generic']))
		
		//We start by loading all the capabilitie's name
		$capabilitiesName = array();
		foreach($this->result['generic']['capabilities'] as $name=>$props){
			$capabilitiesName[$name] = array();
			foreach($props as $aspect=>$value){
				$capabilitiesName[$name][] = $aspect;
			}
		}
		
		$this->error = 0;
		//Then we look at each device.
		foreach($this->result as $id=>$device){
			if($id!='generic'){
				//For each device, we examine each capability
				foreach($capabilitiesName as $name=>$props){
					foreach($props as $i=>$aspect){
						$nameExists = array_key_exists($name, $device['capabilities']);
						
						if(!$nameExists){
							$this->result[$id]['capabilities'][$name] = array();
						}
						
						$aspectExists = $nameExists 
							&& array_key_exists($aspect, $device['capabilities'][$name]); 
						$alreadyExists = $aspectExists
							&& trim($device['capabilities'][$name][$aspect]) != '';
						
						if(!$alreadyExists){
							$this->result[$id]['capabilities'][$name][$aspect]
								= $this->determine_attribute($id, $name, $aspect);
						}
					}
				}
			}
		}
			
		if($this->error == 0){
			$this->error = '';
		}
		else{
			$message = $this->error . ' ';
			if($this->error == 1){
				$message .= 'capability has';
			}
			else{
				$message .= 'capabilities have';
			}
			$message .= ' been set to the value of the root device.';
			//$this->error = $message;
			$this->error = '';
		}
		return true;
	}
	
	/**
	 * Generates the main resource file.
	 * It uses the $this->result variable.
	 * It needs to have the writing rights in the self::$WURFL_home_src folder.
	 * 
	 * @return string The name of the generated file
	 * @access private
	 */
	private function generate_source_file(){
		$file = self::$WURFL_home_src . 'wurfl_patched.xml';
		if(file_exists($file)){
			$this->error = $file . " overrided";
		}
		
		//Determine the content to place in the file.
		$string = '<?xml version="1.0" encoding="utf-8"?>' . chr(13)
			. '<devices version="'.$this->version.'">' . chr(13) ;
		foreach($this->result as $device){
			$string .= chr(9) . '<device id="' . htmlentities($device['id']) 
				. '" user_agent="' . htmlentities($device['user_agent'])
				. '" fall_back="' . htmlentities($device['fall_back']) . '" >' . chr(13);
			foreach($device['capabilities'] as $name=>$props){
				foreach($props as $aspect=>$value){
					$string .= chr(9) . chr(9) . '<capability name="'
						. $name . '" value="' . htmlentities($value) . '" aspect="'
						. $aspect . '" />' . chr(13);
				}
			}
			$string .= chr(9) . '</device>' . chr(13);
		}
		$string .= '</devices>' . chr(13);
		
		//Writing the file
		$handle = fopen($file, 'w');
		fwrite($handle, $string);
		fclose($handle);
		
		return $file;
	}
	
	/**
	 * Generates token_file.xml.
	 * This file contains a list of token. Each token has
	 * a name which is the first part of a device's user agent.
	 * So, devices are grouped by their first user agent's token.
	 * Each device is only represented by its full user 
	 * agent and the offset to search
	 * in the source file to find the device's properties.
	 * It sets the $this->tokens variable.
	 * 
	 * @see generate_source_file()
	 * @param string The name of the main resource file generated by 
	 * 			the generate_source_file() function of this class.
	 * @return string The name of generated token file. 
	 * @access private
	 */
	private function createDevicesFile($patched_file){
		$token_file = self::$WURFL_home_src . '/wurfl_devices.xml';
		
		//read the content of the source file
		$handle = fopen($patched_file, 'r');
		$contents = fread($handle, filesize($patched_file));
		fclose($handle);
		
		//Set the $this->tokens variable
		$this->tokens = array();
		$offset = 0;
		foreach($this->result as $device){
			$user_agent = $device['user_agent'];
			$ua_split = explode(' ', $user_agent);
			$firstToken = strtolower($ua_split[0]); 
			if(!array_key_exists($firstToken, $this->tokens)){
				$this->tokens[$firstToken] = array();
			}
			$offset = strpos($contents, '<device id="'.$device['id'], $offset);
			$endOffset = strpos($contents, '</device>', $offset);
			$length = $endOffset + strlen('</device>') - $offset;
			$this->tokens[$firstToken][$user_agent] = array($offset, $length);
		}
		
		if(file_exists($token_file)){
			$this->error = $token_file . ' overrided';
		}
		
		//Determine the content to put in the file.
		$string = '<?xml version="1.0" encoding="utf-8"?>' . chr(13)
			. '<tokens>' . chr(13);
		foreach($this->tokens as $name=>$token){
			$string .= chr(9) . '<token name="' . htmlentities($name)  
				. '" >' . chr(13);
			foreach($token as $ua => $offset){
				$string .= chr(9) . chr(9) . '<device user_agent="'
					. htmlentities($ua) . '" offset="'
					. htmlentities($offset[0]) . '" length="'
					. htmlentities($offset[1]+1) . '" />' . chr(13);
			}
			$string .= chr(9) . '</token>' . chr(13);
		}
		$string .= '</tokens>' . chr(13);

		//Writing the file
		$handle = fopen($token_file, 'w');
		fwrite($handle, $string);
		fclose($handle);
		
		return $token_file;
	}
	
	/**
	 * Generate name_file.xml.
	 * This file contains a list of token. Each token has
	 * a name which is the first part of a device's user agent.
	 * Each token has a value too which reprensents 
	 * the offset to search in the token
	 * file to find more details.
	 * 
	 * @see createTokenFile()
	 * @param string The name of the token file generated by the 
	 * 			createTokenFile() function of this class
	 * @return string The name of the generated name file.
	 * @access private
	 */
	private function createFamiliesFile($token_file){
		$name_file = self::$WURFL_home_src . 'wurfl_families.xml';
		if(file_exists($name_file)){
			$this->error = $name_file . 'overrided';
		}
		
		if(!file_exists($token_file)){
			$this->error = 'No source file ('.$token_file.'). Can\'t create tokens file';
			return '';
		}
		
		//Read the token file
		$handle = fopen($token_file, 'r');
		$contents = fread($handle, filesize($token_file));
		fclose($handle);
		
		//Determine the content to put in the file
		$string = '<?xml version="1.0" encoding="utf-8"?>' . chr(13)
			. '<tokens>' . chr(13);
		foreach($this->tokens as $name=>$token){
			$offset = strpos($contents, '<token name="'. $name);
			$endOffset = strpos($contents, '</token>', $offset);
			$length = $endOffset + strlen('</token>') - $offset + 1;
			$string .= chr(9) . '<token name="'
				. htmlentities(strtolower($name)) . '" offset="'
				. htmlentities($offset) . '" length="'
				. htmlentities($length) . '" />' . chr(13);
		}
		$string .= '</tokens>' . chr(13);
		
		//Wrinting the file
		$handle = fopen($name_file, 'w');
		fwrite($handle, $string);
		fclose($handle);
		
		return $name_file;
	}
	
	public function writeHtmlHeader() {
		echo '<?xml version="1.0" encoding="utf-8"?>';
		?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
	<head>
		<title>WURFL preparation for use in AskPythia</title>
		<style type="text/css">
			html,body {
			  color: black;
			  background: white;
			  font-family: "Gill Sans", arial, helvetica, sans-serif;
			  margin: 0;
			  margin-left: 1em;
			  padding: 0;
			}
			p { text-align: justify; }
			h2 { border-bottom: 1px #95BDFB solid; font-size: 1.2em; padding-bottom: 4px; margin-top: 1em; }
			
		</style>
	</head>
	<body>
		<h1>WURFL preparation for use in AskPythia</h1>
		<?php
	}
	
	public function writeHtmlFooter() {
		?>
		</body></html>
		<?php
	} 
}

/*
 * Main part of the script, display a form to select the path of
 * the wurfl files.
 */
if(array_key_exists('src', $_POST) && $_POST['src']!=''){
	$path = $_POST['src'];
	if(strrpos($path, '/') != (strlen($path) - 1)){
		$path .= '/';
	}
	$prepare = new WURFLPrepareDatabase($path);
	
	$prepare->writeHtmlHeader();
	
	echo '<h2>Step 4: Processing WURFL file</h2>';
	
	if($prepare->prepare_files()){
		echo '<p>Processing done, check above for eventual error messages!</p>';
	}
	else {
		echo '<p>An error occurred while processing the WURFL file, please check above!</p>';
	}
	
	?>
	<h2>Step 5: Clean-up</h2>
	<p>Check above messages for errors. If files could be properly generated, the folder that contained the <code>wurfl.xml</code> should now contain 3 additional files:</p>
	<ul>
		<li><code>wurfl_patched.xml</code></li>
		<li><code>wurfl_devices.xml</code></li>
		<li><code>wurfl_families.xml</code></li>
	</ul>
	<p>Write access rights on the folder are no longer needed and should be removed.</p>
	<p>The initial WURFL file (and patch) are no longer needed and may be deleted.</p>
	
	<p><a href="WURFLPrepareDatabase.php">Prepare another WURFL file</a>.</p>
	<?php
	$prepare->writeHtmlFooter();
}
else {
	$prepare = new WURFLPrepareDatabase("");
	$prepare->writeHtmlHeader();
	?>
	<h2>Step 1: Download and extract WURFL</h2>
	<p>The latest version of the WURFL file is available at:<br/>
	<a href="http://sourceforge.net/projects/wurfl/files/WURFL/">http://sourceforge.net/projects/wurfl/files/WURFL/</a></p>
	<p>The zipped WURFL file must be downloaded and extracted to a <code>wurfl.xml</code> file in a folder that is accessible by the Web server that runs this page.</p>
	<p>You may include a patch file in the folder as needed. This script expects the patch file to be named <code>wurlf_patch.xml</code> and to be saved in the same folder as <code>wurfl.xml</code>.<br/>
	See <a href="http://wurfl.sourceforge.net/patchfile.php">WURFL Patch File</a> for a detailed description of patch files in WURFL.
	</p>

	<h2>Step 2: Ensure settings are correct</h2>
	<p>This script requires:</p>
	<ul>
		<li><strong>read/write access</strong> to the folder that contains the <code>wurfl.xml</code> file.</li>
		<li><strong>256MB</strong> of memory available for PHP as this script processes the file in memory. To check and update the amount of memory that is available for PHP, refer to the <code>php.ini</code> file of your PHP installation and search for the <code>memory_limit</code> property. It should be set to <code>memory_limit = 256M</code> for this script to run properly (this setting may be reset once the script has run).</li> 
	</ul>
	
	<p><strong>NB:</strong> This script will be improved in a future version not to require a change in the memory settings of PHP.</p>   
	
	<h2>Step 3: Enter the path to the WURFL file</h2>
	<form action="" method="post">
		<p>Please enter the local (where <em>local</em> refers to the Web server that runs this PHP page) path to the folder that contains the <code>wurfl.xml</code> file in the field below (e.g. <code>file:///home/user/webroot/wurfl/</code>):<br/>
		<input type="text" name="src" size="80" /></p>
		
		<p><input type="submit" name="confirm" id="confirm" value="Prepare WURFL for use in AskPythia" /></p>
	</form>
	<?php
	$prepare->writeHtmlFooter(); 
}
?>