#!/usr/bin/perl

# scanTests -- scan DAWG tests for what grammar productions they hit
# $Id: scanTests,v 1.6 2007/01/16 18:58:33 eric Exp $

#my $PARSER_INVOCATION = 'perl -M/Users/eric/perl/modules/W3C/Grammar/bin/uploads/SPARQL/SPARQL.pm -e test';
my $PARSER_INVOCATION; # = "perl -I$path -MSPARQL -e test";
my $REPORT_MISSING = 1;
my $SHOW_MAPPINGS = 0;
my $WATCH_PRODUCTION = '999_O_QVar_E_Plus_Or_QGT_TIMES_E_C';

use strict;
use utf8;
use constant facet => 0;
use constant type => 1;
use constant example => 2;
use constant prodName => 3;
use constant prodURI => 4;
use constant yaccProd => 5;
use constant rule => 6;
use constant regexp => 7;
use constant yaccProdStr => 8;
use constant ruleStr => 9;

use W3C::Util::Exception;
use W3C::Util::Filter;
use W3C::Grammar::YackerTraceParser;
use W3C::Grammar::YaccParser;
use SPARQL;


my $facets = {};
my $SEP = '│'; # chr(9474);
my $FacetsNS = 'http://www.w3.org/2001/sw/DataAccess/tests/facets#';
my $TypesNS = 'http://www.w3.org/2001/sw/DataAccess/tests/#';
my $UseType = 'types:DAWGfacet';
my $IgnoreType = 'types:xDAWGfacet';

sub main {
    my ($argv) = @_;
    local($SIG{"__DIE__"}) = \&DieHandler;
    eval {
	&readFacets(shift @$argv, $facets);
	foreach my $file (@$argv) {
	    print "$file\n";
	    &testQuery($file, $facets, "$file.dump");
	    print "--------------------\n";
	}
	if ($REPORT_MISSING) {
	    foreach my $productionName (sort keys %$facets) {
		next if ($productionName =~ m/^_Q(.*?)_E$/ && $facets->{$1});
		next if ($productionName =~ m/^_O(.*?)_C$/ && $facets->{$1});
		if (!$facets->{$productionName}[0]) {
		    foreach my $facet (@{$facets->{$productionName}[1]}) {
			print "!$facet->[facet] $facet->[example]\n";
		    }
		}
	    }
	}
    }; if ($@) {
	if (my $ex = &catch('W3C::Util::Exception')) {
	    die $ex->toString();
	} else {
	    die $@;
	}
    }
}

sub readFacets {
    my ($facetsFile, $facets) = @_;
    my $facetsText = &readFile($facetsFile, 'query file');
    open(F, "<:utf8", $facetsFile) || die "could not open facets \"$facetsFile\": $!";
    foreach my $facet (<F>) {
	if ($facet =~ m/^$SEP *<(.*?)> *$SEP *<(.*?)> *$SEP *"(.*?)" *$SEP *"(.*?)" *$SEP *<(.*?)> *$SEP *(?:"(.*?)"|NULL) *$SEP *(?:"(.*?)"|NULL) *$SEP *(?:"(.*?)"|NULL) *$SEP$/) {
	    my ($facetName, $type, $example, $prodName, $prodURI, $yaccProd, $rule, $regexp) = ($1, $2, $3, $4, $5, $6, $7, $8);
	    $facetName =~ s{$FacetsNS}{test:};
	    $type =~ s{$TypesNS}{types:};
	    next if ($type ne $UseType);
	    my ($ruleStr, $yaccProdStr) = ($rule, $yaccProd);
	    my $lookFors;
	    if (defined $rule) {
		$ruleStr = $rule;
		$rule = &getProductionName($rule, $facetsFile);
		if ($SHOW_MAPPINGS && $rule ne $ruleStr) {print "rule: $facetName $ruleStr -> $rule\n";}
	    }
	    if (defined $yaccProd) {
		$yaccProdStr = $yaccProd;
		$yaccProd = &getProductionName($yaccProd, $facetsFile);
		if ($SHOW_MAPPINGS && $yaccProd ne $yaccProdStr) {print "yaccProd: $facetName $yaccProdStr -> $yaccProd\n";}
		push (@$lookFors, $yaccProd);
		push (@$lookFors, "_Q${yaccProd}_E");
		push (@$lookFors, "_O${yaccProd}_C");
	    } else {
		push (@$lookFors, $prodName);
	    }
	    foreach my $lookFor (@$lookFors) {
		if (exists $facets->{$lookFor}) {
		    push (@{$facets->{$lookFor}[1]}, [$facetName, $type, $example, $prodName, $prodURI, $yaccProd, $rule, $regexp, $yaccProdStr, $ruleStr]);
		} else {
		    $facets->{$lookFor} = [0, [[$facetName, $type, $example, $prodName, $prodURI, $yaccProd, $rule, $regexp, $yaccProdStr, $ruleStr]]];
		}
	    }
	}
    }
    close(F);
}

sub getProductionName {
    my ($rule, $filename) = @_;
    my $p = new W3C::Grammar::YaccParser(1); # no integrity check
    $p->setFilename($filename);
    $p->fake('bodyTransition');
    my ($g, $errs) = $p->Parse("__foo: $rule", 0x00);
    my $gen = new W3C::Grammar::GenSpec::Perl();
    $g->toString(Stubs => $gen, GenSpec => $gen, EndProductionString => '');
    return $g->getProductionByName($g->getStartProductionName())->getProductionName();
}

sub testQuery {
    my ($queryFile, $facets, $dumpTrace) = @_;

    my $queryText = &readFile($queryFile, 'query file');

    # Run parser and get production trace.
    my $path = 'uploads/SPARQL';
    my ($results, $error, $trace);
    if (defined $PARSER_INVOCATION) {
	my $env = {TRACE_FD => 3};
	W3C::Util::FilterN->new([$queryText, \$results, \$error, \$trace], 
				[\*STDIN, \*STDOUT, \*STDERR], 2)->
				    execute($PARSER_INVOCATION, $env, 65536);
    } else {
	eval {
	    &utf8::decode($queryText);
	    my $parser = new SPARQL();
	    my $root = $parser->parse($queryText)->[0];
	    $trace = $parser->getTrace();
	    my $text = $root->toXML('');
	    &utf8::encode($text);
	};
	$error = $@;
    }
    if ($error) {
	print "error parsing $queryFile: $error\n";
	return;
    }
    if ($dumpTrace) {
	if (open(T, ">:utf8", $dumpTrace)) {
	    print T $trace;
	    close T;
	    print sprintf("created $dumpTrace: \n", length $trace);
	} else {
	    warn "unable to open trace file \"$dumpTrace\": $!\n";
	}
    }
    my $parser = new W3C::Grammar::YackerTraceParser(-noErrors => 1);
    $parser->parse($trace);

    # Check each production to see if it matches a facet.
    my $c = @{$parser->getProductions()};
    foreach my $production (@{$parser->getProductions()}) {
	my $productionName = $production->getProduction();
	if ($productionName eq $WATCH_PRODUCTION) {
	    print "$productionName refd $facets->{$productionName}[0] times\n";
	    my $count = @{$production->getRules()};
	    print "$count rules\n";
	}
	foreach my $rule (@{$production->getRules()}) {
	    my $match = join(' ', map {$_->getMatch()} @{$rule->getMatched()});
	    if ($facets->{$productionName}) {
		if ($productionName eq $WATCH_PRODUCTION) {
		    print "RULE:".$rule->getRule()." $match\n";
		}
		my $possibleFacets = $facets->{$productionName}[1];
		my $remainingFacets = [];
		foreach my $possibleFacet (@$possibleFacets) {
		    if ($productionName eq $WATCH_PRODUCTION) {
			print $possibleFacet->[rule].' vs '.$rule->getRule()."\n";
			my $match999 = (!defined $possibleFacet->[rule] || $possibleFacet->[rule] eq $rule->getRule()) ? 'true' : 'false';
			print "rule: $match999\n";
		    }
		    my $ruleStr = $rule->getRule();
		    if ((!defined $possibleFacet->[rule] || ($possibleFacet->[rule] eq $ruleStr || 
							     $possibleFacet->[rule] eq "_Q${ruleStr}_E" || 
							     $possibleFacet->[rule] eq "_O${ruleStr}_C")) &&
			(!defined $possibleFacet->[regexp] || $match =~ m/$possibleFacet->[regexp]/)) {
			my $prodStr = defined $possibleFacet->[yaccProd] ? "$possibleFacet->[yaccProdStr]" : "$possibleFacet->[prodName]";
			my $ruleStr = defined $possibleFacet->[rule] ? " rule: $possibleFacet->[ruleStr]" : '';
			my $regexpStr = defined $possibleFacet->[regexp] ? " regexp: $possibleFacet->[regexp]" : '';
			my $whyStr = "$prodStr$ruleStr$regexpStr";
			push (@$remainingFacets, [$possibleFacet, $ruleStr, $match, $whyStr]);
		    }
		}
		if (!@$remainingFacets) {
		} elsif (@$remainingFacets == 1) {
		    print "$remainingFacets->[0][0][facet] matched $remainingFacets->[0][3] \"$remainingFacets->[0][2]\"\n";
		    if ($productionName =~ m/^_Q(.*?)_E$/) {
			$facets->{$1}[0]++;
		    } elsif ($productionName =~ m/^_O(.*?)_C$/) {
			$facets->{$1}[0]++;
		    } else {
			$facets->{$productionName}[0]++;
		    }
		} else { # @$remainingFacets > 1
		    print "find \"$match\" in\n";
		    foreach my $remainingFacet (@$remainingFacets) {
			print "  $remainingFacet->[0][facet]:$remainingFacet->[3]: $remainingFacet->[example]\n";
		    }
		}
	    }
	}
    }
}


sub readFile {
    my ($file, $type) = @_;
    open(F, "<:utf8", $file) || die "could not open $type \"$file\": $!";
    local $/ = undef;
    my $text = <F>;
    close F;
    return $text;
}

&main(\@ARGV);

# $Log: scanTests,v $
# Revision 1.6  2007/01/16 18:58:33  eric
# - give up on warnings (two parsers and regexp range exclusion)
# + conrol behavoir with constants at the top
# ~ facets uses grammar strings instead of generated names
# + exception support
# + stupid _Q.*?_E hacks
# + invoke SPARQL module directly
# + $WATCH_PRODUCTION
# ~ improve feedback
#
# Revision 1.5  2007/01/10 03:28:04  eric
# + scan multiple files
# + comments
# + improve ambiguous feature selection feedback
# + handle errors
#
# Revision 1.4  2007/01/09 12:43:38  eric
# + split out YackerTraceParser
# + store and show match info
#
# Revision 1.3  2007/01/03 21:16:12  eric
# + changed field names
# + type
# ~ new ambiguity presenter
#
# Revision 1.2  2007/01/02 11:19:02  eric
# snapshot
#
# Revision 1.1  2007/01/01 13:55:26  eric
# created
#
# 
