#!/usr/bin/perl
# -*- Mode: cperl; coding: utf-8; cperl-indent-level: 4 -*-

# See copyright info at the bottom.
# CVS Version: $Id: yacker,v 1.86 2015-03-09 15:03:43 gerald Exp $

# Quickee install example:
#    unagi:/home/eric$ mkdir /tmp/yack
#    unagi:/home/eric$ cd /tmp/yack
#    unagi:/tmp/yack$ cvs -Q -d eric@homer.w3.org:/sources/public co perl/modules/W3C/{Grammar,Util/{Exception,YappDriver}.pm}
#    unagi:/tmp/yack$ wget -O sparqlTest.bnf 'http://www.w3.org/2005/01/yacker/uploads/sparqlTest/bnf?lang=c'
#    unagi:/tmp/yack$ PERL5LIB=perl/modules perl/modules/W3C/Grammar/bin/yacker --lang=perl sparqlTest.bnf > sparqlTest.yp

use strict;

my $REVISION = '$Id: yacker,v 1.86 2015-03-09 15:03:43 gerald Exp $';

use W3C::Util::Exception qw(&throw &catch &DieHandler);
use W3C::Grammar::YaccParser;
use W3C::Grammar::Presenter;
use Parse::Yapp::Driver;
use W3C::Grammar::YaccCompileTree qw($Name2Symbol);

my $LANGS = {'c' => new W3C::Grammar::GenSpec::C_(), 
	     'cpp' => new W3C::Grammar::GenSpec::CPP(), 
	     'perl' => new W3C::Grammar::GenSpec::Perl(), 
	     'n3' => new W3C::Grammar::GenSpec::N3(), 
	     'python' => new W3C::Grammar::GenSpec::Python(), 
	     'java' => new W3C::Grammar::GenSpec::Java()};

my ($GenSpec, $name, $code, $stubs, $mimick, $suppressLineDirectives, $generateNames, $processed) = ($LANGS->{'perl'}, 'a', undef, 0, 0, 0, 0, 0);
my $LegalFileName = '[a-zA-Z_][a-zA-Z_0-9\.-]*';
my $LanguageParmNames = [['perl', 'Perl'], ['c', 'C'], ['cpp', 'C++'], ['python', 'Python']];

my $man = 0;
my $help = 0;

if (exists $ENV{QUERY_STRING}) {
    &cgiMode($ARGV[0] eq 'DEBUG' ? defined $ARGV[1] ? $ARGV[1] : -1 : undef);
} else {
    &cliMode();
}

sub cgiMode {
    my ($debugSession) = @_;
    # Run in CGI mode.

    require W3C::Util::W3CDebugCGI;
    require W3C::Util::Filter;
    require Digest::MD5;

    my $query;
    eval {
	local($SIG{"__DIE__"}) = \&DieHandler;
	$W3C::Util::W3CDebugCGI::DEBUG_SESSION = $ARGV[1]; # use a session id like 957296047.909868 or -2;
	$query = new W3C::Util::W3CDebugCGI($0, defined $debugSession, 
					    {-dieNoOpen => 1, 
					     -logExt => '.log', 
					     -storeIn => '/usr/local/yacker/logs', 
					     -rerun => 'rerun'});
	my $presenter = new W3C::Grammar::XHTMLPresenter($REVISION, $LegalFileName);
	my $action = $query->param('action');

	# Some constantsKinda like globals in use:
	my $CreateParser = 'create parser';
	my $GenerateText = 'generate text';
	my $UGenerateText = CGI::escape($GenerateText);
	$UGenerateText =~ s/\%20/\+/g;
	my $ValidateText = 'validate text';
	my $AddTest = 'add test';
	my $ValidateTextOptions = [$ValidateText]; # , $AddTest];
	my $ListGrammars = 'list grammars';
	my $path = $ENV{PATH_INFO};
	if ($path eq '/') {
	    $path = undef;
	}

	# name
	my $nameWarning = undef;
	$name = $query->param('name');
	if (!$name) {
	    $nameWarning = "No name supplied";
	} elsif ($name !~ m/^$LegalFileName$/) {
	    $presenter->error("invalid name \"$name\". must match ^$LegalFileName\$");
	}

	my $uploads = 'uploads';

	# path parameter required for some actions, optional for others.
	my $pathWarning = undef;
	my $showFile;
	if ($path =~ m/^\/$uploads\/($LegalFileName)\/?$/) {
	    $name = $1;
	} elsif ($path =~ m/^\/$uploads\/($LegalFileName)\/($LegalFileName)$/) {
	    $name = $1;
	    $showFile = $2;
	} else {
	    $pathWarning = "invalid name \"$name\"";
	}

	# lang parameter required for some actions, optional for others.
	my $langWarning = undef;
	my $langStr = $query->param('lang');
	if ($langStr) {
	} elsif (open (COMPILE, '<', "$uploads/$name/compilationResults")) {
	    $langStr = <COMPILE>; # ignore lang string
	    chomp $langStr;
	    close (COMPILE);
	} else {
	    if (!$query->param('markup')) {
		$presenter->warning("no grammar language supplied - defaulting to perl");
	    }
	    $langStr = 'perl';
	}
	if ($GenSpec = $LANGS->{$langStr}) {
	} else {
	    my $opts = join(', ', keys %$LANGS);
	    $langWarning = "unknown language \"$langStr\". must be one of $opts";
	}

	# Switch on form action parameter.
	if (!$path && !$action) {

	    # Step 0: create
	    my $defaultBNF;
	    my $defaultFile = $name ? "$uploads/$name/bnf" : 'BNFTemplate';
	    if (open(DEF, '<', $defaultFile)) {
		local $/ = undef;
		$defaultBNF = <DEF>;
		close(DEF);
	    } else {
		$presenter->error($!);
	    }
	    $presenter->promptCreateGrammar('.', $CreateParser, $LanguageParmNames, $name, scalar $query->param('replace'), $GenSpec->getName(), $defaultBNF);
	} else {

	    # Subsequent actions need a language.
	    if ($langWarning && !$query->param('markup') && 
		$action ne $ListGrammars) {
		$presenter->warning($langWarning);
	    }

	    if ($action eq $ListGrammars) {
		$presenter->listGrammars($uploads);
	    } elsif ($action eq $CreateParser) {

		# Step 1: create grammar and show results

		# The Create action needs the name to come from a cgi parameter.
		if ($nameWarning) {
		    $presenter->error($nameWarning);
		}

		# Also needs a bnf parameter.
		my $bnf = $query->param('bnf');
		if (!$bnf) {
		    $presenter->error("no grammar supplied");
		}

		if (-d "$uploads/$name" && !$query->param('replace')) {
		    $presenter->warning("name <a href=\"yacker/$uploads/$name/bnf?markup=html\">$name</a> already exists");
		    $presenter->promptCreateGrammar('.', $CreateParser, $LanguageParmNames, $name, 0, $GenSpec, $bnf);
		} else {
		    # Parse and transform the grammar to yacc syntax.
		    &utf8::decode($bnf);
		    my ($g, $errs) = &_parseGrammar($bnf);
		    if (@$errs) {
			my $errsStr = &W3C::Grammar::YaccParser::getMessages($errs);
			&throw(new W3C::Util::Exception(-message => "rejected because of the following errors:\n$errsStr"));
		    }
		    my $html = &_getHtmlGrammar($g, 'html', $name);

		    $stubs = 1;
		    $generateNames = $query->param('generate') eq 'short';

		    # Create a Makefile to yacc the created yacc syntax.
		    my $new = 0;
		    if (! -d "$uploads/$name") {
			mkdir("$uploads/$name") || &throw(new W3C::Util::FileCreationException(-filename => $name));
			$new = 1;
		    }
		    eval {
			open(my $MAKE, '>', "$uploads/$name/Makefile") || die "unable to create Makefile";
			$GenSpec->makeMakefile($MAKE, '.', $name, $g);
			close($MAKE);
			&createGrammar($g, $presenter, "$uploads/$name", 
				       $suppressLineDirectives);
		    }; if ($@) { # would be nicer to detect generation errors before needing the dir.
			rmdir("$uploads/$name");
			die $@;
		    }

		    # Keep the BNF around for reference.
		    my $bnfFile = "$uploads/$name/bnf";
		    &utf8::encode($bnf);

		    # Compare the saved BNF to the one we're considering writing.
		    my $savedBnfSum = undef;
		    if (-e $bnfFile && open(BNF, '<', $bnfFile)) {
			local $/ = undef;
			$savedBnfSum = &Digest::MD5::md5(<BNF>);
			close(BNF);
		    }
		    # Write only if they are different. (Handy for local editing mode.)
		    if ($savedBnfSum ne &Digest::MD5::md5($bnf)) {
			open(BNF, '>', $bnfFile) || die "unable to create BNF file";
			print BNF $bnf;
			close(BNF);
		    }

		    # Build the grammar.
		    my $target = $GenSpec->getTarget($name);
		    $ENV{HAVE_BOOST} = 1; # tell the build env that we have libboost
		    my ($results) = 
		      W3C::Util::SyncFilter->new("make -C $uploads/$name $target", undef)->execute(4096, 1);
		    my $errors = $results->getByFlavor('stderr');
		    if ($errors && $name =~ m/\-/) {
			$presenter->warning("some versions of bison yack ungracefully on names with '-'s in them.\n");
		    }
		    my $resultsMarkup;
		    $presenter->creationResults($name, $new, $uploads, $name, $GenSpec->getName(), $results, \$resultsMarkup, $html);

		    # Keep the HTML markup for the results for redisplay.
		    my $compilationResults = "$uploads/$name/compilationResults";
		    open(COMPILE, '>', $compilationResults) || die "unable to create COMPILE file \"$compilationResults\"";
		    print COMPILE "$langStr\n";
		    print COMPILE $resultsMarkup;
		    close(COMPILE);

		}
	    } else {
		# Subsequent actions need a name derived from the path.
		if ($pathWarning && !$query->param('markup') && 
		    $action ne $ListGrammars) {
		    $presenter->warning($pathWarning);
		}

		my $html = $query->param('markup') ? 
		  &_getHtmlGrammar(&_loadGrammar("$uploads/$name/bnf"), 'html', $name) : 
		    '';

		open (COMPILE, '<', "$uploads/$name/compilationResults");
		<COMPILE>; # ignore lang string
		local $/ = undef;
		my $resultsMarkup = <COMPILE>;
		close (COMPILE);

		if (!$action) {

		    my $path = "$uploads/$name/$showFile";

		    # Step 2: show creation results
		    if ($showFile) {

			if ($showFile eq "$name.output") {
			    open (OUTPUT, '<', $path) || die;;
			    my @markup;
			    local $/ = "\n";
			    my %ruleNames;
			    my $lineNo = 1;
			    while (my $line = <OUTPUT>) {
				if (($line eq "Warnings:\n") && (my $line2 = <OUTPUT>) =~ m/^\-+$/) {
				    push (@markup, $line);
				    push (@markup, $line2);
				    my $errorLine = <OUTPUT>;
				    if ($errorLine eq "Useless terminals:\n") {
					push (@markup, $errorLine);
					push (@markup, scalar <OUTPUT>); # blank line
					while ((my $terminalLine = <OUTPUT>) ne "\n") {
					    push (@markup, CGI::escapeHTML($terminalLine));
					}
					push (@markup, "\n");
					$errorLine = <OUTPUT>;
				    }
				    if ($errorLine eq "Useless non-terminals:\n") {
					push (@markup, $errorLine);
					push (@markup, scalar <OUTPUT>); # blank line
					while ((my $terminalLine = <OUTPUT>) ne "\n") {
					    push (@markup, CGI::escapeHTML($terminalLine));
					}
					push (@markup, "\n");
					$errorLine = <OUTPUT>;
				    }
				    if ($errorLine eq "Useless rules:\n") {
					push (@markup, $errorLine);
					push (@markup, scalar <OUTPUT>); # blank line
					while ((my $ruleLine = <OUTPUT>) ne "\n") {
					    push (@markup, CGI::escapeHTML($ruleLine));
					}
					push (@markup, "\n");
					$errorLine = <OUTPUT>;
				    }
				    push (@markup, CGI::escapeHTML($errorLine)); # 2 shift/reduce conflicts and 24 reduce/reduce conflicts
				    push (@markup, CGI::escapeHTML(scalar <OUTPUT>)); #
				} elsif (($line eq "Conflicts:\n") && (my $line2 = <OUTPUT>) =~ m/^\-+$/) {
				    push (@markup, $line);
				    push (@markup, $line2);
				    while ((my $conflictLine = <OUTPUT>) ne "\n") {
					$conflictLine =~ m/^State (\d+) (contains .*?)$/ || die;
					my ($state, $contains) = ($1, CGI::escapeHTML($2));
					push (@markup, "<a href=\"#s_$state\">State $state</a> $contains\n");
				    }
				    push (@markup, "\n");
				} elsif (($line eq "Rules:\n") && (my $line2 = <OUTPUT>) =~ m/^\-+$/) {
				    push (@markup, $line);
				    push (@markup, $line2);
				    my @rulez;
				    while ((my $ruleLine = <OUTPUT>) ne "\n") {
					$ruleLine =~ m/^(\d+):(\s+)([^ ]+) -> (?:(\/\* .*? \*\/)|(.*))$/ || (chomp $ruleLine, die "can't parse \"$ruleLine\"");
					my ($ruleNo, $space, $name, $comment, $ruleText) = ($1, $2, $3, $4, $5);
					if (!$ruleNames{$name} && $name !~ m/\$/) {
					    $ruleNames{$name} = scalar @rulez;
                                            $name = CGI::escapeHTML($name);
					    $name = "<a name=\"r_$name\">$name</a>";
					}
					push (@rulez, [$ruleNo, $space, $name, $comment, $ruleText]);
				    }
				    foreach my $rule (@rulez) {
					my ($ruleNo, $space, $name, $comment, $ruleText) = @$rule;
					if ($comment) {
                                            $comment = CGI::escapeHTML($comment);
					    $comment = "<span class=\"comment\">$comment</span>";
					}
					$ruleText =~ s/(\S+)/$ruleNames{$1} ? "<a href=\"#r_$1\">$1<\/a>" : $1/gex;
					push (@markup, "<a name=\"r_$ruleNo\">$ruleNo</a>$space$name -> $ruleText$comment\n");
				    }
				    push (@markup, "\n");
				} elsif (($line eq "States:\n") && (my $line2 = <OUTPUT>) =~ m/^\-+$/) {
				    push (@markup, $line);
				    push (@markup, $line2);
				    my $stateLine = <OUTPUT>;
				  StateLine:
				    while ($stateLine =~ m/^State (\d+):$/) {
					my $stateNo = $1;
					push (@markup, "State <a name=\"s_$stateNo\">$stateNo</a>:\n");
					while ($stateLine = <OUTPUT>) {
					    if ($stateLine =~ m/^State (\d+):$/) {
						next StateLine;
					    }
					    if ($stateLine eq "Summary:/n") {
						last StateLine;
					    }
                                            $stateLine = CGI::escapeHTML($stateLine);
					    $stateLine =~ s/(\S+)/$ruleNames{$1} ? "<a href=\"#r_$1\">$1<\/a>" : $1/gex;
					    $stateLine =~ s/(Rule|rule) (\d+)/<a href="#r_$2">$1 $2<\/a>/g;
					    $stateLine =~ s/state (\d+)/<a href="#s_$1">state $1<\/a>/g;
					    $stateLine =~ s/reduce/<a onclick="history.back()">reduce<\/a>/g;
					    $stateLine =~ s/\[([^\]]*)\]/qq(<span class="error">[$1]<\/span>)/gex;
					    push (@markup, "$stateLine");
					}
				    }
				    push (@markup, "\n");
				    if (($stateLine eq "Summary:\n") && (my $line2 = <OUTPUT>) =~ m/^\-+$/) {
					push (@markup, $stateLine);
					push (@markup, $line2);
                                        foreach (0..3) {
                                            push (@markup, CGI::escapeHTML(scalar <OUTPUT>));
                                        }
				    }
				} else {
				    die "could not parse line $lineNo in output: $line";
				}
				$lineNo++;
			    }
			    $presenter->renderMarkup($name, $GenSpec->getName(), 'output', join('', '<pre>', @markup, '</pre>'));
			} else {

			    # Show a generated file.
			    $presenter->showFile($path);
			}
		    } else {

			# Show the list and prompt for validation.
			$presenter->promptValidateText('.', $ValidateTextOptions, 
						       $UGenerateText, $uploads, 
						       $name, $GenSpec->getName(), undef, 
						       undef, undef, undef, $html, 
						       $resultsMarkup, 0, 50, 90);
		    }
		} elsif ($action eq $GenerateText) {

		    my $g = &_loadGrammar("$uploads/$name/bnf");
		    # $g->yaccify(undef);

		    my $random = $query->param('random');
		    my $seed = defined $query->param('seed') ? $query->param('seed') : 0;
		    my $limit = defined $query->param('limit') ? $query->param('limit') : 0;
		    my $asciiWeight = defined $query->param('asciiWeight') ? $query->param('asciiWeight') : 90;
		    my $reference = $query->param('reference');
		    my $solution = &generateText($g, $random, $seed, $limit, $asciiWeight, $reference);
		    my $text = $solution->getHtml(' ');
		    # Show the list and prompt again for validation.
		    $presenter->promptValidateText('.', $ValidateTextOptions, 
						   $UGenerateText, $uploads, 
						   $name, $GenSpec->getName(), $text, 
						   undef, undef, undef, $html, 
						   $resultsMarkup, $seed, $limit, $asciiWeight);
		} elsif ($action eq $ValidateText) {
		    require W3C::Grammar::YackerSymbolParser;
		    require W3C::Grammar::YackerTraceParser;

		    # Step 3: validate text.
		    my $text = $query->param('text');
		    &utf8::decode($text);
		    if (!$text) {
			$presenter->error("no text supplied");
		    }

		    # Use directory timestamp to keep track of last use.
		    my $time = time;
		    utime($time, $time, "$uploads/$name");

		    # Run the parser.
		    my $argv0 = $GenSpec->getExecString("$uploads/$name", $name);
		    #my $argv1 = $html ? 
		    #  "#term-$name-" : 
		    #	"$name/bnf?markup=html#term-$name-";
		    my $processStdin = $text;
		    &utf8::encode($processStdin);
		    my ($results, $error, $traceHtml);

		    # If the markup param is set, ask the executable for a trace.
		    my $env = {TRACE_FD => 3}; # $query->param('markup') ? {TRACE_FD => 3} : {};
		    W3C::Util::FilterN->new([$processStdin, \$results, \$error, \$traceHtml], 
					    [\*STDIN, \*STDOUT, \*STDERR], 2)->
			execute($argv0, $env, 1048576);
		    &utf8::decode($results);
		    &utf8::decode($error);
		    my $html = '';
		    if ($query->param('markup')) {
			my $grammar = &_loadGrammar("$uploads/$name/bnf");
			$html = &_getHtmlGrammar($grammar, 'html', $name);
		    }
		    if ($traceHtml) {
			my @dl = ();
			my $events = new W3C::Grammar::YackerTraceParser(-noErrors => 1)->parse($traceHtml);
			foreach my $event (@$events) {
			    # Unescape the production names to correspond to the EBNF.
			    if ($event->isa('W3C::Grammar::YackerTraceConsume')) {
				my ($token, $value) = ($event->getToken(), $event->getValue());
				$token =~ s/GT_([a-z]+)/&lt;$Name2Symbol->{$1}&gt;/ig;
				$token =~ s/IT_([a-z]+)/"$1"/ig;
				push (@dl, sprintf("<dt><span class=\"token\">%s</span> <code class=\"result\">%s</code></dt>",
                                                   map { CGI::escapeHTML($_) } ($token, $value)));
			    } elsif ($event->isa('W3C::Grammar::YackerTraceStateChange')) {
				my ($production, $rules) = ($event->getProduction(), $event->getRules());
				my $productionMarkup = &prettyProduction($production, $query->param('markup') ? $name : undef);
				my $ruleStr = '';
				my $line2 = '';
				my $rowSpan = '';
				if (@$rules) {
				    $line2 = "\n<tr>";
				    $rowSpan = ' rowspan="2"';
				    my @ruleStrs;
				    foreach my $rule (@$rules) {
					my $matched = $rule->getMatched();
					foreach my $match (@$matched) {
					    if ($match->isa('W3C::Grammar::YackerTraceMatch')) {
						my $text = $match->getMatch();
						$text =~ s/ +/ /g;
                                                $text = CGI::escapeHTML($text);
						$line2 .= "<td class=\"result\">$text</td>";
					    } else {
						my $error = CGI::escapeHTML($match->getError());
						$line2 .= "<td class=\"error\">$error</td>";
					    }
					}
					my $prettyRule = &prettyProduction($rule->getRule(), $query->param('markup') ? $name : undef);
					my $matchCount = scalar @$matched;
					if ($matchCount == 0) {
					    $line2 .= "<td></td>";
					    $matchCount = 1;
					} elsif ($matchCount > 1) {
					    $prettyRule .= "($matchCount)";
					}
					push (@ruleStrs, "<th colspan=\"$matchCount\"><span class=\"term\">$prettyRule</span></th>");
				    }
				    $ruleStr = join('', @ruleStrs);
				    $line2 .= "</tr>";
				}
				push (@dl, "<dd><table border=\"1\" style=\"border-style: none; border-collapse: collapse; \"><tr><th$rowSpan style=\"vertical-align: top\"><span class=\"production\">$productionMarkup</span>:</th>$ruleStr</tr>$line2</table></dd>\n");
			    } else {
				my $error = CGI::escapeHTML($event->getError());
				push (@dl, "<dd class=\"error\">$error</dd>");
			    }
			}
			$traceHtml = join("\n", @dl);
		    }
		    $presenter->promptValidateText('.', $ValidateTextOptions, 
						   $UGenerateText, $uploads, 
						   $name, $GenSpec->getName(), $text, 
						   $results, $error, $traceHtml, $html, 
						   $resultsMarkup, 0, 50, 90);

		} elsif ($action eq $AddTest) {
		} else {
		    $presenter->error("unknown action \"$action\"");
		}
	    }
	}
	print $presenter->flush(200);
    }; if ($@) {
	# Some exception handling pasted here -- @@@ needs work.
	my $sessionId = $query ? $query->getSessionId : undef;
	if (my $ex = &catch('W3C::Http::HttpMessageException')) {
	    my $message = $ex->getHttpMessage();
	    $message->addHeader('Session-Id', $sessionId) if (defined $sessionId);
	    print $message->toString;
	} elsif (my $ex = &catch('W3C::Rdf::CGIApp::AppException')) {
	    print $ex->toString($sessionId);
	} elsif (my $ex = &catch('W3C::Util::Exception')) {
	    print "Status: 500\n";
	    print "Content-Type: text/html\n\n";
	    my $title = $ex->getMessage;
	    print "<html><head><title>Yacker Error</title></head><body>\n";
	    printf("<pre>%s</pre>\n", CGI::escapeHTML($ex->toString));
	    if ($sessionId) {
		print "<p>Session-id: $sessionId</p>\n";
	    }
	    print "</body></html>\n";
	} else {
	    print "Status: 500\n\n";
	    print "died with $@";
	    if ($sessionId) {
		print "<p>Session-id: $sessionId</p>\n";
	    }
	}
    }
}

sub prettyProduction {
    my ($production, $language) = @_;
    my $count = undef;
    if ($production =~ s/\((\d+)\)$//) {
      $count = $1;
    }
    my $parser = new W3C::Grammar::YackerSymbolParser();
    my ($ret, $error) = (undef, undef);
    eval {
	my $root = $parser->parse($production);
	$ret = $root->toString(Markup => 'html', LanguageName => $language);
	if (defined $count) {
	    $ret = "$ret($count)";
	}
    }; if ($@) {
	my $ex = &catch('W3C::Util::Exception');
	$error = $ex->getMessage;
    }
    if (defined $ret) {
	return $ret;
    }
    $production =~ s/GT_([A-Z]+)/<$Name2Symbol->{$1}>/ig;
    $production =~ s/IT_([A-Z]+)/"$1"/ig;
    $production =~ s/_Opt/?/g;
    $production =~ s/_Plus/+/g;
    $production =~ s/_Star/*/g;
    $production =~ s/_Or_/ | /g;
    $production =~ s/_O/( /g;
    $production =~ s/_C/ )/g;
    $production =~ s/_Q//g;
    $production =~ s/_E//g;
    $production =~ s/_S/ /g;
    $production =~ s/__/_/g;

    # Escape HTML.
    $production =~ s/</&<<;/g;
    $production =~ s/>/&>>;/g;

    # Add markup.
    if ($language) {
	$production =~ s/([\w\d]+)/<a href="#prod-$language-$1">$1<\/a>/gm;
    }
    $production =~ s/&<<;/&lt;/g;
    $production =~ s/&>>;/&gt;/g;
    if (defined $count) {
	$production = "$production($count)";
    }
    return $production; # "$production <span class=\"error\">$error</span>";
}

sub cliMode {

    # Run as a command line tool.

    require Getopt::Long;
    require Pod::Usage;

    my $presenter = new W3C::Grammar::TextPresenter($REVISION, $LegalFileName);
    my $solve = 0;
    my $seed = 0;
    my $limit = 50;
    my $asciiWeight = 90;
    &Getopt::Long::GetOptions(
      '<>' => sub {
	  eval {
	      my ($file) = @_;
	      my $bnf;
	      open (G, '<', $file) || die "can't open file \"$file\"; $!\n";
	      {
		  local $/=undef;
		  $bnf=<G>;
	      }
	      local($SIG{"__DIE__"}) = \&DieHandler;
	      my ($g, $errs) = &_parseGrammar($bnf);
		    if (@$errs) {
			my $errsStr = &W3C::Grammar::YaccParser::getMessages($errs);
			&throw(new W3C::Util::Exception(-message => "rejected because of the following errors:\n$errsStr"));
		    }
	      if ($solve) {
		  my ($random, $reference) = (0, 0);
		  my $solution = &generateText($g, $random, $seed, $limit, $asciiWeight, $reference);
		  my $t = $solution->getHtml(' ');
		  &utf8::encode($t);
		  print "$t\n";
	      } else {
		  &createGrammar($g, $presenter, '.', $suppressLineDirectives);
		  if (my @missingActions = $GenSpec->getMissingActions()) {
		      my $count = @missingActions;
		      my $list = join("\n    ", "$count missing actions:", map {
			  $_ =~ s/([^ ]+)([ ]?)/$1:$2/; $_;
		      } @missingActions);
		      print STDOUT "$list\n";
		  }
		  if (my @unusedActions = $GenSpec->getUnusedActions()) {
		      my $count = @unusedActions;
		      my $list = join("\n    ", "$count unused actions:", map {
			  my $lineNo = $GenSpec->getAction($_)->getLineNo();
			  $_ =~ s/([^ ]+)([ ]?)/$1:$2/; "$_ at $lineNo"
		      } @unusedActions);
		      print STDOUT "$list\n";
		  }

		  my $prodNo = 1;
	      }
	      ##print $g->toString(Stubs => $GenSpec, ProdNo => \ $prodNo, LanguageName => "${name}-", ExpandTerminals => $expandTerminals);
	      #print $presenter->print($g->toString(Markup => $GenSpec, ProdNo => \ $prodNo, LanguageName => "${name}-"));
	      #print $presenter->print($g->toString(Markup => undef, ProdNo => \ $prodNo, LanguageName => "${name}-"));
	      $processed++;
	  }; if ($@) {if (my $ex = &catch('W3C::Util::Exception')) {
	      die $ex->toString();
	  } else {
	      die $@;
	  }
		  }
      }, 
      'lang|l=s' => sub {
	  if (defined $code) {
	      die "-lang must be used before -code\n";
	  }
	  if (!($GenSpec = $LANGS->{$_[1]})) {
	      my $opts = join(', ', keys %$LANGS);
	      if ($_[1] eq 'help') {
		  print "$opts\n";
		  exit (0);
	      } else {
		  die "unknown language \"$_[1]\". must be one of $opts\n";
	      }
	  }
      }, 
      'output|o=s' => \$name, 
      'nolines|n!' => \$suppressLineDirectives, 
      'stubs|s!' => \$stubs, 
      'debug=s' => sub {
	  my (undef, $debugSession) = @_;
	  &cgiMode($debugSession);
	  exit(0);
      }, 
      'code|c=s' => sub {
	  my (undef, $filename) = @_;
	  $code = $filename;
	  eval {
	      if (!open (C, '<', $filename)) {
		  &throw(new W3C::Util::FileOperationException(-filename => $filename, 
							       -operation => 'open for reading'));
	      }
	      local $/=undef;
	      my $code = <C>;
	      close (C);
	      my $p = new W3C::Grammar::YaccParser(1); # no integrity check
	      $p->setFilename($filename);
	      my ($g, $errs) = $p->Parse($code, 0x00);
		    if (@$errs) {
			my $errsStr = &W3C::Grammar::YaccParser::getMessages($errs);
			&throw(new W3C::Util::Exception(-message => "code file rejected because of the following errors:\n$errsStr"));
		    }
	      $g->yaccify(sub {&throw();}, $GenSpec);
	      $GenSpec->read($g, $filename);
	      $stubs = 1;	# No reason to read code unless you are generating stubs.
	  }; if ($@) {if (my $ex = &catch('W3C::Util::Exception')) {
	      die $ex->toString();
	  } else {
	      die $@;
	  }}
      }, 
      'short!' => \$generateNames, 
      'expandTerminals!' => sub {$GenSpec->toggleExpandTerminals()}, 
      'solve!' => \$solve, 
      'seed=i' => \$seed, 
      'limit=i' => \$limit, 
      'ascii=i' => \$asciiWeight, 
      'help|?' => \$help, 
      'man' => \$man) || &Pod::Usage::pod2usage(2);

    &Pod::Usage::pod2usage(1) if $help;
    &Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 2) if $man;
    if (!$processed) {
	&Pod::Usage::pod2usage(2);
    }
}

sub generateText { # static
    my ($g, $random, $seed, $limit, $asciiWeight, $reference) = @_;
    my $creativePolicy = $random ? 
      new W3C::Grammar::YaccCompileTree::RandomPolicy($seed, $limit, $asciiWeight) : 
	new W3C::Grammar::YaccCompileTree::RepresentativePolicy($seed, $limit, $asciiWeight);
    my $relativePolicy = $reference ? 
      new W3C::Grammar::YaccCompileTree::ReferencePolicy($reference, $creativePolicy) : 
	$creativePolicy;

    return $g->solve($relativePolicy, $g, 1);
}

sub W3C::Grammar::_YaccParser::IsSpacePerl {
    return &utf8::IsSpacePerl;
#    my $argsStr = join(' | ', @_, $a);
#    my $wa = wantarray;
#    return qr(\s\r\n);
#    die "$wa W3C::Grammar::_YaccParser::IsSpacePerl($argsStr)\n";
}

sub _parseGrammar {
    my ($bnf) = @_;
    my $p = new W3C::Grammar::YaccParser();
    # @@ Hack -- to add the default @pass directive.
    my $text = $bnf =~ m/^\s*(?:\[[0-9][0-9a-zA-Z]*\]\s*)?\@pass\s*:/m ? 
      $bnf : 
	"$bnf\n\@pass: [ \\t\\r\\n]+\n";
    if ($text !~ m{^\%\%}) {
	$p->fake('bodyTransition');
    }
    return $p->Parse($text, 0x00);
}

sub _loadGrammar {
    my ($path) = @_;
    # Grab the BNF.
    my $bnf;
    {
	local $/ = undef;
	open(BNF, '<', $path) || die "unable to open BNF file \"$path\"";
	$bnf = <BNF>;
	close(BNF);
	&utf8::decode($bnf);
    }
    my ($g, $errs) = &_parseGrammar($bnf);
		    if (@$errs) {
			my $errsStr = &W3C::Grammar::YaccParser::getMessages($errs);
    # Assume no errors on re-load
			&throw(new W3C::Util::Exception(-message => "inexplicable errors:\n$errsStr"));
		    }
    return $g;
}

sub _getHtmlGrammar {
    my ($g, $lang, $name) = @_;
    my $prodNo = 1;
    return $g->toString(Markup => $lang, ProdNo => \ $prodNo, LanguageName => "${name}-", LexFormat => 'XMLSpec');
}

sub createGrammar {
    my ($g, $presenter, $uploads, $suppressLineDirectives) = @_;
    eval {
	my $symNumber = 0;
	my $genNamesFunc = $generateNames ? sub {
	    my ($grammar, $derivedName) = @_;
	    return 'gen'.$symNumber++;
	} : undef;
	my %flags = (LineNumbers => 0, 
		     Types => 0, 
		     OrigCode => 0, 
		     Comments => ($ENV{'QUERY_STRING'} ? 0 : 1), # per <http://www.w3.org/mid/52B0C45A.90105@w3.org>
		     Yacc => 1, 
		     GenSpec => $GenSpec, 
		     Stubs => $stubs ? $GenSpec : undef, 
		     SuppressLineDirectives => $suppressLineDirectives, 
		     Class => $name);

	if ($stubs) {
	    my $templateDir = ($0 =~ m/^(.*?)yacker/)[0];

	    # We're called upon to create a yacc grammar with
	    # semantic actions that parse the specified language.
	    $GenSpec->createGrammar($g, $genNamesFunc, $uploads, $name, $templateDir, %flags);
	} else {

	    # Simply reformat the BNF and present it.

	    $presenter->print($g->toString(%flags));
	}

    }; if ($@) {
	if (my $ex = &catch('W3C::Util::Exception')) {
	    die $ex->toString();
	} else {
	    die $@;
	}
    }
}

__END__

=head1 NAME

yacker - grammar manipulation tool

=head1 SYNOPSIS

yacker [options] grammar

    -man for more details

=head1 OPTIONS

=over 8

=item B<-lang|s> name

Language for output. -lang help will give a list of supported languages.

B<-lang> must be used before B<-code>.

default: perl

=item B<-output|o> name

Name of the language to generate a parser for.

default: a

=item B<-short>

Flag to generate short names for generated productions.

=item B<-expandTerminals>

Flag to embed the text of one terminal in another, rather than including it by reference. This generally defaults to off, but python and n3 require it so it is forced on regardless of expandTerminals directives.

=item B<-nolines|n>

Suppress line directives (in languages where they are possible).

=item B<-stubs|s>

Generate code stubs for each production. This is useful when starting a parser. See the B<-code> option.

B<-lang> must be used before B<-stubs>.

=item B<-code|c> file

Read code for each production from file. The file can be the output of an earlier B<-stubs> call, potentially updated by your skilled hand. This is useful when updating a parser.

B<-code> file implies B<-stubs>.

B<-lang> must be used before B<-code>.

=item B<-solve>

Flag to generate a piece of text that parses in the new language.

=over 4

=item B<-seed> int

Random seed for parts of the solution that are randomly generated. Using the same seed will yield the same solution.

=item B<-limit> int

How many productions to expand before always taking the shortest solution, i.e.
  - expand * to 0 entries
  - expand ? to 0 entries
  - expand + to 1 entry

=item B<-ascii> int

The weight to give ASCII ranges within larger ranges. This is useful when testing on primative terminals.

=back

=item B<-debug> sessionId

Runs the CGI interface, reading the input for the given session from /usr/local/yacker/logs/yacker.log .
This is exploited by running $(QUERY_STRING=1 ./yacker DEBUG).

=item B<-help>

Print a brief help message and exits.

=item B<-man>

Prints the manual page and exits.

=back

=head1 DESCRIPTION

B<yacker> parses BNF or yack grammars and generates.

The typical maintaince cycle of language will start with the generation of stubs:

    yacker -stubs -lang perl -o foo foo.bnf

This will produce some code, for example foo.yp for the perl B<-lang>. The maintainer will use the output of the above command as a starting point for generating the parser, modifying foo.yp as necessary. If the lanuage (foo.bnf) changes, the maintainer can update the parser, using the previous code to generate the new code:

    cp foo.yp foo.yp.last
    yacker -lang perl -code foo.yp.last -o foo foo.bnf

This will generate a list of changes from one version of the grammar to the next.

=over 4

1 missing actions:
    BaseDecl: IT_BASE Q_IRI_REF

1 unused actions:
    BaseDecl: IT_BASE QuotedIRIref at 212

=back

I (ericP) usually start with the most repeated unused action and look for it something analogous to it in the missing actions. Frequently, a few renamed productions can reduce the mismatch between the old and new grammar.

=head2 missing actions

Each entry is a production name that is present in (or implied by) the new grammar, but for which there is no code in the grammar linked by the B<-code> argument. e.g.

=over 4

    BaseDecl: IT_BASE Q_IRI_REF

=back


=head2 unused actions

Each entry is a production name that was present in the grammar linked by the B<-code> argument, but is not needed (or implied) by the new grammar. The line number is where the code was declared in the code source. e.g.

=over 4

    BaseDecl: IT_BASE QuotedIRIref at 212

=back

=head1 ENVIRONMENT

B<QUERY_STRING> controls whether yacker runs as a CGI script or a command like tool.
E.g. to re-run the last web query: QUERY_STRING=1 ./yacker DEBUG

=head1 AUTHOR

Eric Prud'hommeaux <eric@w3.org>

=head1 COPYRIGHT

Copyright Massachusetts Institute of technology, 2004.

THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS
OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR
FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE
ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. 

COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION. 

The name and trademarks of copyright holders may NOT be used in advertising or publicity pertaining 
to the software without specific, written prior permission. Title to copyright in this software and 
any associated documentation will at all times remain with copyright holders. 

=cut

