#!/usr/bin/perl -w

# Script to convert SVG 1.1 to SVG 1.2T tests for SVG 1.2T testsuite, Andreas Neumann, 2007-05

#instructions:
#install the perl module XML::DOM::Lite
#search for the file 'Serializer.pm' and replace it with the patched 'Serializer.pm' available at http://www.w3.org/Graphics/SVG/Group/repository/testsuite/1.2T/script/Serializer.pm
#make sure that the file http://www.w3.org/Graphics/SVG/Group/repository/testsuite/1.2T/script/translation_table.txt is up-to-date
#cd to the testsuite/1.2T/script/ directory and run the script with the command 'perl trans_11_to_12T_tests.pl'

#open issues:
# * provide an option to translate individual tests
# * grab the translation table directly from the latest CVS HTML file ('oneone-to-onetwo.html')
# * make entity code more stable (but already seems to handle current tests)

#required perl modules
use strict;
use XML::DOM::Lite qw(Parser Serializer NodeIterator :constants);

#variable definitions
my @infiles;
my @outfiles;
my @testnames;
my @temp;
my @entities;
my @filesWithDifferentVBs; #viewBox
my @filesWithDifferentPARs; #preserveAspectRatio
my @filesWithJS; #javascript
my $file_counter = 0;
my $attribLength;
my ($owner, $reviewer, $desc, $status, $testname, $version, $childLength, $i, $j, $k, $id, $elementAllowed, $line, $pEl, $pContent);
my ($docTypeNode);
my %fontFamilies;

# first read in the translation table
open(TRANSFILE,"<translation_table.txt") or die "Can't open translation table file 'translation_table.txt'\n";

while (<TRANSFILE>) {
	@temp = split(/\s+/,$_);
	if ($temp[1] ne "Not") {
		push(@infiles,"../../1.1/svg/".$temp[0]);
		push(@testnames,$temp[1]);
		push(@outfiles,"../svg/".$temp[1]);
		$file_counter++;
	}
}

close(TRANSFILE);

#processing all files
print "processing ".($file_counter + 1)." files\n";

for ($i=0;$i<$file_counter;$i++) { #comment out this line and the last '}' to translate a single test
#$i = 75; #remove this comment for translating a single specific test (find out the number with the translation table)

print "working on test $outfiles[$i]";

#parsing the template file
my $parser = Parser->new(whitespace => 'normalize');
my $whiteStripParser = Parser->new(whitespace => 'strip');
my $serializer = Serializer->new;
my $template = $parser->parseFile("../status/template12-t_for_script.svg");
my $SVGTestCaseTemplate = $template->getElementsByTagName("SVGTestCase")->item(0);
my $testcontentTemplate = $template->getElementById("test-body-content");
my $titleElementTemplate = $template->getElementById("test-title");
my $OperatorScriptTemplate = $template->getElementsByTagName("OperatorScript")->item(0);
my $rootElementTemplate = $template->documentElement;
#empty any existing content
$childLength = $testcontentTemplate->childNodes->length;
for ($j=0;$j<$childLength;$j++) {
	$testcontentTemplate->removeChild($testcontentTemplate->firstChild);
}
#remove previous content from test descriptions in template
$childLength = $OperatorScriptTemplate->childNodes->length;
for ($j=0;$j<$childLength;$j++) {
	$OperatorScriptTemplate->removeChild($OperatorScriptTemplate->firstChild);
}

#$doc is the document currently processed
my $doc = $parser->parseFile($infiles[$i]);
my $docWS = $whiteStripParser->parseFile($infiles[$i]);
my $testcontentFile = $doc->getElementById("test-body-content");
my $SVGTestCaseFile = $doc->getElementsByTagName("SVGTestCase")->item(0);
my $OperatorScriptFile = $doc->getElementsByTagName("OperatorScript")->item(0);
my $rootElementFile = $doc->documentElement;

#this is a hack because the parser without whitespace is skip doesn't handle CDATA sections correctly, it collapses all newlines
my $scriptElements = $doc->getElementsByTagName("script");
my $scriptElementsWS = $docWS->getElementsByTagName("script");
$childLength = $scriptElementsWS->length;
for ($j=0;$j<$childLength;$j++) {
	my $sElChildNum = $scriptElements->item($j)->childNodes->length;
	for ($k=$sElChildNum-1;$k>-1;$k--) {
		$scriptElements->item($j)->removeChild($scriptElements->item($j)->childNodes->item($k));
	}
	$sElChildNum = $scriptElementsWS->item($j)->childNodes->length;
	for ($k=0;$k<$sElChildNum;$k++) {
		if ($scriptElementsWS->item($j)->childNodes->item($k)->nodeType == CDATA_SECTION_NODE) {
			$scriptElements->item($j)->appendChild($scriptElementsWS->item($j)->childNodes->item($k));
		}
	}
}
if ($scriptElements->length > 0) {
	push(@filesWithJS,$infiles[$i]);
}

#see if there are entities defined in the doc type
undef @entities;
foreach (@{$doc->childNodes}) {
	if ($_->nodeType == DOCUMENT_TYPE_NODE) {
		my $DTContent = $_->nodeValue."\n";
		if ($DTContent =~ /<!ENTITY/) {
			$DTContent =~ s/.+dtd"\s+\[\s*<!ENTITY\s+//;
			$DTContent =~ s/">\s*\]$//;
			@entities = split(/">\s*<!ENTITY\s+/,$DTContent);
		}
	}
}

#see if there is a different viewBox value
my $viewBoxValueFile = $rootElementFile->getAttribute("viewBox");
if ($viewBoxValueFile ne "0 0 480 360") {
	push(@filesWithDifferentVBs,$infiles[$i]);
	if ($viewBoxValueFile) {
		$rootElementTemplate->setAttribute("viewBox",$viewBoxValueFile);
	}
	else {
		$attribLength = $rootElementTemplate->attributes->length;
		for ($k=0;$k<$attribLength;$k++) {
			if ($rootElementTemplate->attributes->item($k)) {
				if ($rootElementTemplate->attributes->item($k)->nodeName eq "viewBox") {
					$rootElementTemplate->attributes->removeNode($rootElementTemplate->attributes->item($k));
				}
			}
		}
	}
	print "\nFile '$infiles[$i]' contains different viewBox: '$viewBoxValueFile'\n";
}

#see if there is a preserveAspectRatio attribute
my $preserveAspectRatio = $rootElementFile->getAttribute("preserveAspectRatio");
if ($preserveAspectRatio) {
	push(@filesWithDifferentPARs,$infiles[$i]);
	$rootElementTemplate->setAttribute("preserveAspectRatio",$preserveAspectRatio);
}

#see if there are <defs/> sections floating around which should be appended to the 'test-body-content'
my $defsSections = $doc->getElementsByTagName("defs");
$childLength = $defsSections->length;
for ($j=0;$j<$childLength;$j++) {
	$testcontentTemplate->appendChild($defsSections->item($j));
}

#see if there is an onload event handler attribute
my $onloadEventHandlerAttrib = $doc->documentElement->getAttribute("onload");
if ($onloadEventHandlerAttrib) {
	$template->documentElement->setAttribute("onload",$onloadEventHandlerAttrib);
}

#see if there is a zoomAndPan attribute
my $zoomAndPanAttrib = $doc->documentElement->getAttribute("zoomAndPan");
if ($zoomAndPanAttrib) {
	$template->documentElement->setAttribute("zoomAndPan",$zoomAndPanAttrib);
}

#see if we can determine an owner
$owner = $SVGTestCaseFile->getAttribute("owner");
if (!$owner) {
	#try to figure the owner out of header comments
	foreach (@{$doc->childNodes}) {
     if ($_->nodeType == COMMENT_NODE) {
         if ($_->nodeValue =~ /Author/) {
         	$owner = $_->nodeValue;
         	$owner =~ s/\s*Author\s*:\s*//;
         	$owner =~ s/\s*\d.*$//;
         }
     }
 }
}
if ($owner) { 
	#strip off trailing white spaces
	$owner =~ s/\s*$//;
	$SVGTestCaseTemplate->setAttribute("owner",$owner);
	print ", owner=$owner";
}

#see if we can determine the reviewer
$reviewer = $SVGTestCaseFile->getAttribute("reviewer");
if ($reviewer) { 
	$SVGTestCaseTemplate->setAttribute("reviewer",$reviewer);
	print ", reviewer=$reviewer";
}

#see if we can determine the desc
$desc = $SVGTestCaseFile->getAttribute("desc");
if (!$desc) {
	my $testDesc = $doc->getElementById("test-desc");
	if ($testDesc) {
		$desc = $testDesc->firstChild->nodeValue;
	}
}
if (!$desc) {
	#try to figure the desc out of header comments
	foreach (@{$doc->childNodes}) {
     if ($_->nodeType == COMMENT_NODE) {
         if ($_->nodeValue =~ /^\s*Test/i) {
         	$desc = $_->nodeValue;
         }
     }
 }
}
if ($desc) {
    #trim leading spaces
    $desc =~ s/^\s*//;
    #trim trailing spaces
    $desc =~ s/\s*$//;
    #replace double quotes with single quotes
    $desc =~ s/"/'/g;
	$SVGTestCaseTemplate->setAttribute("desc",$desc);
	print ", desc=$desc";
}

#see if we can determine the status
$status = $SVGTestCaseFile->getAttribute("status");
if ($status) {
	$SVGTestCaseTemplate->setAttribute("status",$status);
	print ", status=$status";
}

#see if we can determine the version
$version = $SVGTestCaseFile->getAttribute("version");
if (!$version) {
	$version = $OperatorScriptFile->getAttribute("version");
}
if ($version) {
	$SVGTestCaseTemplate->setAttribute("version",$version);
	#set content of revision text element
	my $revisionNode = $template->getElementById("revision");
	#this parser seems to have a problem with '$' signs in nodeValue, so try a hack with removing content first
	$revisionNode->removeChild($revisionNode->firstChild);
	my $versionTextNode = $template->createTextNode($version);
	$revisionNode->appendChild($versionTextNode);
	print ", version=$version";
}

#set the testname and title
$testname = "\$"."RCSfile: ".$testnames[$i].",v \$";
$SVGTestCaseTemplate->setAttribute("testname",$testname);
#first delete old content from template, '$' sign workaround ...
$titleElementTemplate->removeChild($titleElementTemplate->firstChild);
my $testTitleTextnode = $template->createTextNode($testname);
$titleElementTemplate->appendChild($testTitleTextnode);
print "\n";

#deal with font-family issues
my $text_nodesFile = $doc->getElementsByTagName("text");
$childLength = $text_nodesFile->length;
for ($j=0;$j<$childLength;$j++) {
	my $font_family = $text_nodesFile->item($j)->getAttribute("font-family");
	if ($font_family) {
		if (!exists($fontFamilies{$font_family})) {
			$fontFamilies{$font_family} = 0;
		}
		$fontFamilies{$font_family}++;
		#remove font-family attribute if it matches this criteria
		if ($font_family eq "Arial" || $font_family eq "Verdana") {
			$attribLength = $text_nodesFile->item($j)->attributes->length;
			for ($k=0;$k<$attribLength;$k++) {
				if ($text_nodesFile->item($j)->attributes->item($k)) {
					if ($text_nodesFile->item($j)->attributes->item($k)->nodeName eq "font-family") {
						$text_nodesFile->item($j)->attributes->removeNode($text_nodesFile->item($j)->attributes->item($k));
						print "removing font-family attribute ($font_family)\n";
					}
				}
			}			
		}
	}
}

#now add the test content of old file to new test body
$childLength = $testcontentFile->childNodes->length;
for ($j=0;$j<$childLength;$j++) {
	if ($testcontentFile->childNodes->item($j)) {
		if ($testcontentFile->childNodes->item($j)->nodeType == ELEMENT_NODE) {
			$elementAllowed = 1;
			$id = $testcontentFile->childNodes->item($j)->getAttribute("id");
			if ($id) {
				#filtering out revision text and test-frame
				if ($id eq "revision" || $id eq "test-frame") {
					$elementAllowed = 0;
				}
			}
			if ($elementAllowed == 1) {
				$testcontentTemplate->appendChild($testcontentFile->childNodes->item($j));
			}
		}
		if ($testcontentFile->childNodes->item($j)->nodeType == COMMENT_NODE) {
			$testcontentTemplate->appendChild($testcontentFile->childNodes->item($j));
		}
	}
}

#process the test descriptions
#first remove old and unnecessary attributes, also to get rid of the evil '$' signs in the attributes ...
$attribLength = $OperatorScriptFile->attributes->length;
for ($j=$attribLength-1;$j>-1;$j--) {
	$OperatorScriptFile->attributes->removeNode($OperatorScriptFile->attributes->item($j));
}
#add content from old testfile
foreach (@{$OperatorScriptFile->childNodes}) {
	if ($_) {
		if ($_->nodeType == ELEMENT_NODE) {
			if ($_->nodeName eq "Paragraph") {
				my $temp_text = $_->firstChild->nodeValue;
				$temp_text =~ s/^\s+//; #trim leading spaces
				$temp_text =~ s/\s+$//; #trim trailing spaces
				$pContent = $template->createTextNode($temp_text);
				$pEl = $template->createElement("p");
				$pEl->appendChild($pContent);
				$OperatorScriptTemplate->appendChild($pEl);
			}
			else {
				$OperatorScriptTemplate->appendChild($_);
			}
		}
	}
}

#now serialize the modified content to a new file
open(FILE,">".$outfiles[$i]) or die "Can't open outfile '".$outfiles[$i]."' for writing\n";
print FILE "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
print FILE $serializer->serializeToString($template->documentElement)."\n";
close(FILE);

#postprocessing because of strange serializer/parser, get rid of blank lines
open(FILE,"<".$outfiles[$i]) or die "Can't open outfile '".$outfiles[$i]."' for reading\n";
my @cleanLines;
while (<FILE>) {
	chomp($_);
	chomp($_);
	if ($_ =~ /^\s*$/) { 
		#do nothing
	}
	else {
		push(@cleanLines,$_);
	}
}
close(FILE);

#now write out cleaned lines to file again
open(FILE,">".$outfiles[$i]) or die "Can't open outfile '".$outfiles[$i]."' for writing\n";
print FILE $cleanLines[0]."\n";
#process entities
if (@entities) {
	print "found entities ...\n";
	print FILE "<!DOCTYPE svg [\n";
	for ($j=0;$j<($#entities+1);$j++) {
		@temp = split(/\s+"\s*?/,$entities[$j]);
		#trim off leading and trailing whitespaces
		$temp[1] =~ s/^\s*//;
		$temp[1] =~ s/\s*$//;
		print FILE "\t<!ENTITY ".$temp[0]." \"";
		#see if content of entity is xml
		if (($temp[1] =~ /^</) && ($temp[1] =~ />$/)) {
			#first add a temp node so we can process all children
			my $tempNode = $whiteStripParser->parse("<temp>".$temp[1]."</temp>");
			my $contentTempNode = $tempNode->firstChild;
			for ($k=0;$k<$contentTempNode->childNodes->length;$k++) {
				my $serializedNode = $serializer->serializeToString($contentTempNode->childNodes->item($k));
				$serializedNode =~ s/"/'/g;
				if ($k == 0) {
					$serializedNode =~ s/^\s*//;
				}
				print FILE $serializedNode;
			}
		}
		else {
			print FILE $temp[1];
		}
		print FILE "\">\n";
	}
	print FILE "]>\n";
}
for ($j=1;$j<$#cleanLines+1;$j++) {
	#replace id's with xml:id's
	$cleanLines[$j] =~ s/\s+id=/ xml:id=/g;
	print FILE $cleanLines[$j]."\n";
}
close(FILE);

} #end of for loop ($i)

#output list of files with different viewBoxes
print "\n\nFiles with different viewBoxes:";
print "\n".join("\n",@filesWithDifferentVBs)."\n";

#output list of files with different preserveAspectRatios
print "\n\nFiles with different preserveAspectRatio values:";
print "\n".join("\n",@filesWithDifferentPARs)."\n";

#output list of files with script elements
print "\n\nFiles with script elements:";
print "\n".join("\n",@filesWithJS)."\n";

#now output all fonts found:
#my @uniqueFonts = keys(%fontFamilies);
#print "\n\nList of Unique fonts:\n\n";
#foreach (@uniqueFonts) {
#	print $_.", used $fontFamilies{$_} times\n";
#}
