#!/usr/bin/perl -w

# SVG specification publication script
# Copyright (C) 2008 Cameron McCormack <cam@mcc.id.au>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# ----------------------------------------------------------------------------

use strict;
use encoding 'UTF-8';
use File::Basename;
use File::Spec;
use POSIX;
use XML::LibXML;

local $XML::LibXML::skipDTD = 1;

sub set;
sub readconf($);
sub syntax;
sub makeMake;
sub publish();
sub deps($);
sub snippets();
sub clean();

my @MONTHS = qw(. January February March April May June July August September October November December);
my %KNOWN_VARS = set(qw(OUTPUT_DIR PUBLICATION_DATE MATURITY THIS_VERSION PREVIOUS_VERSION LATEST_VERSION LATEST_REC STYLESHEETS MULTI_CHAPTER SINGLE_CHAPTER_VERSION CHAPTERS APPENDICES SPEC_FILE TOC_FILE DIRS_TO_COPY GENERATE_HEADERS HEADER_TITLE CHAPTERS_WITH_RUBY GENERATE_DOM GENERATE_IDL IDL_FILE IDD_FILE DOM_HEADER IDL_HEADER DOM_OUTPUT IDL_OUTPUT GENERATE_BINDINGS BINDINGS BINDING_OUTPUTS GENERATE_TABLES RELAXNG_DIR RELAXNG_DRIVER DATATYPE_URL ATTRIBUTE_TABLE_TEMPLATE ELEMENT_TABLE_TEMPLATE ATTRIBUTE_TABLE_OUTPUT ELEMENT_TABLE_OUTPUT GENERATE_RNG_SNIPPETS RNG_SNIPPETS_OUTPUT_DIR STATIC_RNG_SNIPPETS));
my %KNOWN_BINDINGS = set(qw(ecmascript perl python java));
my $EDIT_NS = 'http://xmlns.grorg.org/SVGT12NG/';
my $HTML_NS = 'http://www.w3.org/1999/xhtml';

my $action;
my $target;
my $arg1;

# Parse command line arguments.

while (@ARGV) {
  my $arg = shift(@ARGV);
  if ($arg =~ /^--(clean|makemake|deps|snippets|publish)$/) {
    syntax() if defined $action;
    $action = $1;
  } elsif ($arg =~ /^-/) {
    syntax();
  } else {
    if (defined $target && $action eq 'deps') {
      $arg1 = $arg;
    } elsif (defined $target) {
      syntax();
    } else {
      $target = $arg;
    }
  }
}
syntax() unless defined $action;
syntax() if defined $target && ($action eq 'clean' || $action eq 'makemake' || $action eq 'snippets');
syntax() if !defined $target && ($action eq 'deps' || $action eq 'publish');

# Look for the configuration file.

my $conffile = 'publish.xml';
die "Configuration file not found ($conffile)" unless -f $conffile;

# Read in the configuration file.

my %conf = readconf($conffile);
for my $var (keys(%KNOWN_VARS)) {
  $conf{$var} = '' unless exists $conf{$var};
}

chdir(dirname($conffile));  # All pathnames in the configuration file are
                            # relative to the directory it lives in.

# Parse the configuration file.

my $outputDir = $conf{OUTPUT_DIR};
$outputDir = '../publish' if $outputDir eq '';

my $publicationDate = $conf{PUBLICATION_DATE};
$publicationDate = strftime('%Y%m%d', localtime(time)) if $publicationDate eq '';
die "Invalid PUBLICATION_DATE ($publicationDate)" unless $publicationDate =~ /^\d{8}/;

my $longdate = sprintf('%02d %s %d', substr($publicationDate, 6, 2), $MONTHS[substr($publicationDate, 4, 2)], substr($publicationDate, 0, 4));

my $maturity = $conf{MATURITY};
die "Invalid MATURITY ($maturity)" unless $maturity =~ /^(WD|FPWD|LCWD|FPWDLC|CR|PR|PER|REC|WG-NOTE|ED)$/;

my $thisVersion = $conf{THIS_VERSION};
my $previousVersion = $conf{PREVIOUS_VERSION};
my $latestVersion = $conf{LATEST_VERSION};
my $latestRec = $conf{LATEST_REC};

my @stylesheets = split(/\s+/, $conf{STYLESHEETS});
if ($maturity =~ /^(FPWD|LCWD|FPWDLC)$/) {
  push @stylesheets, "http://www.w3.org/StyleSheets/TR/W3C-WD";
} else {
  push @stylesheets, "http://www.w3.org/StyleSheets/TR/W3C-$maturity";
}

my $multiChapter = $conf{MULTI_CHAPTER} eq 'true';

my $singleChapterVersion = $conf{SINGLE_CHAPTER_VERSION};
$singleChapterVersion = '' unless defined $singleChapterVersion;
die "SINGLE_CHAPTER_VERSION specified but MULTI_CHAPTER is not true" if !$multiChapter && $singleChapterVersion ne '';

my $specFile = $conf{SPEC_FILE};
die "SPEC_FILE not specified" unless $specFile ne '';
die "SPEC_FILE ($specFile) does not refer to a file" unless -f "$specFile.html";

my $tocFile = $conf{TOC_FILE};
die "TOC_FILE does not refer to a file" if $multiChapter && $tocFile ne '' && !-f "$tocFile.html";

my @chapters = ();
my @appendices = ();
my @allChapters = ();
if ($multiChapter) {
  @chapters = split(/\s+/, $conf{CHAPTERS});
  @appendices = split(/\s+/, $conf{APPENDICES});
  @allChapters = ($specFile, ($tocFile ne '' ? $tocFile : ()), @chapters, @appendices);
}

my @dirsToCopy = split(/\s+/, $conf{DIRS_TO_COPY});
-d $_ or die "Entry in DIRS_TO_COPY ($_) does not refer to a directory" for @dirsToCopy;

my $generateHeaders = $conf{GENERATE_HEADERS} eq 'true';

my $headerTitle = $conf{HEADER_TITLE};

my @chaptersWithRuby = split(/\s+/, $conf{CHAPTERS_WITH_RUBY});
-f "$_.html" or die "Entry in CHAPTERS_WITH_RUBY ($_) does not refer to a file" for @chaptersWithRuby;

my (@htmlChapters, @xhtmlChapters);
for my $c (@allChapters) {
  if (grep { $_ eq $c } @chaptersWithRuby) {
    push(@xhtmlChapters, $c);
  } else {
    push(@htmlChapters, $c);
  }
}

my $generateDOM = $conf{GENERATE_DOM} eq 'true';

my $generateIDL = $conf{GENERATE_IDL} eq 'true';

my $idlFile = $conf{IDL_FILE};
die "IDL_FILE does not refer to a file" if ($generateDOM || $generateIDL) && !-f $idlFile;

my $iddFile = $conf{IDD_FILE};
die "IDD_FILE does not refer to a file" if $generateDOM && !-f $iddFile;

my $domHeader = $conf{DOM_HEADER};
die "DOM_HEADER does not refer to a file" if $multiChapter && $generateDOM && !-f $domHeader;

my $domOutput = $conf{DOM_OUTPUT};
die "DOM_OUTPUT not specified" if $generateDOM && $domOutput eq '';

my $idlHeader = $conf{IDL_HEADER};
die "IDL_HEADER does not refer to a file" if $multiChapter && $generateIDL && !-f $idlHeader;

my $idlOutput = $conf{IDL_OUTPUT};
die "IDL_OUTPUT not specified" if $generateIDL && $idlOutput eq '';

my $generateBindings = $conf{GENERATE_BINDINGS} eq 'true';

my @bindings = split(/\s+/, $conf{BINDINGS});
$KNOWN_BINDINGS{$_} or die "Unknown entry in BINDINGS ($_)" for @bindings;

my @bindingOutputs = split(/\s+/, $conf{BINDING_OUTPUTS});
die "Mismatched number of entries in BINDINGS and BINDING_OUTPUTS" if @bindings != @bindingOutputs;

my $generateTables = $conf{GENERATE_TABLES} eq 'true';

my $relaxNGDir = $conf{RELAXNG_DIR};
die "RELAXNG_DIR does not refer to a directory" if $generateTables && !-d $relaxNGDir;

my $relaxNGDriver = $conf{RELAXNG_DRIVER};
die "RELAXNG_DRIVER does not refer to a directory" if $generateTables && !-f "$relaxNGDir/$relaxNGDriver";

my $datatypeURL = $conf{DATATYPE_URL};

my $attributeTableTemplate = $conf{ATTRIBUTE_TABLE_TEMPLATE};
die "ATTRIBUTE_TABLE_TEMPLATE does not refer to a file" if $generateTables && !-f $attributeTableTemplate;

my $elementTableTemplate = $conf{ELEMENT_TABLE_TEMPLATE};
die "ELEMENT_TABLE_TEMPLATE does not refer to a file" if $generateTables && !-f $elementTableTemplate;

my $attributeTableOutput = $conf{ATTRIBUTE_TABLE_OUTPUT};
die "ATTRIBUTE_TABLE_OUTPUT not specified" if $generateTables && $attributeTableOutput eq '';

my $elementTableOutput = $conf{ELEMENT_TABLE_OUTPUT};
die "ELEMENT_TABLE_OUTPUT not specified" if $generateTables && $elementTableOutput eq '';

my $generateRNGSnippets = $conf{GENERATE_RNG_SNIPPETS} eq 'true';

my $rngSnippetsOutputDir = $conf{RNG_SNIPPETS_OUTPUT_DIR};
die "RNG_SNIPPETS_OUTPUT_DIR must refer to a directory" unless -d $rngSnippetsOutputDir;

my @staticRNGSnippets = split(/\s+/, $conf{STATIC_RNG_SNIPPETS});

if ($action eq 'clean') {
  clean();
} elsif ($action eq 'makemake') {
  makeMake();
} elsif ($action eq 'deps') {
  syntax() unless defined $arg1;
  deps($arg1);
} elsif ($action eq 'snippets') {
  snippets();
} elsif ($action eq 'publish') {
  publish();
}


# -- functions --

sub readconf($) {
  my $conffile = shift;
  my %conf;
  open(CONF, "<$conffile");
  while (<CONF>) {
    next if /^\s*(#.*)?\s*$/;
    s/\r$//;
    /^([^=]+)=(.*)/ or die 'Syntax error in configuration file';
    warn "Unknown configuration variable $1" unless exists $KNOWN_VARS{$1};
    warn "Redefinition of configuration variable $1" if exists $conf{$1};
    $conf{$1} = $2;
  }
  close(CONF);
  return %conf;
}

sub set {
  return map { ($_, 1) } @_;
}

sub syntax {
  print <<EOF;
Usage: publish.pl COMMAND

Available commands:
  --clean                 removes all intermediate and output files
  --makemake              generates spec.mk
  --deps CHAPTER DIR      writes dependency rules for the given chapter to a
                          file named CHAPTER.d in DIR
  --snippets              generates all RelaxNG snippets from the RelaxNG
                          schema
  --publish CHAPTER       publishes the given chapter
EOF
  exit 1;
}

sub hasRuby($) {
  my $c = shift;
  return grep { $_ eq $c } @chaptersWithRuby;
}

sub makeMake() {
  my $chapters = join(' ', @htmlChapters, @xhtmlChapters);
  my $chaptersNoIndex = join(' ', @chapters, @appendices);
  if ($tocFile ne '') {
    $chaptersNoIndex = $tocFile . ' ' . $chaptersNoIndex;
  }
  my $htmlTargets = join(' ', map { "$outputDir/$_.html" } @htmlChapters);
  my $xhtmlTargets = join(' ', map { "$outputDir/$_.html $outputDir/$_.xhtml" } @xhtmlChapters);
  my $toClean = '';
  if ($generateDOM) {
    $toClean .= " $domOutput.html";
  }
  if ($generateIDL) {
    $toClean .= " $idlOutput.html";
  }
  if ($generateTables) {
    $toClean .= " $attributeTableOutput.html $elementTableOutput.html ";
  }
  $toClean .= join(' ', map { "$_.html" } @bindingOutputs) . ' ';
  my $single = '';
  if ($singleChapterVersion) {
    $single = "$outputDir/$singleChapterVersion.html";
  }
  my $toCopy = join(' ', @dirsToCopy);
  open FH, '>spec.mk';
  print FH <<EOF;
# WARNING: This file is automatically generated by tools/publish.pl, which is
#          invoked from tools/common.mk.

CHAPTERS=$chapters
CHAPTERS_NO_INDEX=$chaptersNoIndex

all-spec : $htmlTargets $xhtmlTargets $single sync

clean-spec :
	perl \$(TOOLS)/publish.pl --clean
	rm -f \$(DEPDIR)/*.d$toClean
	rmdir \$(DEPDIR) 2>/dev/null || true

sync :
	rsync -auC $toCopy $outputDir/
.PHONY : sync

EOF
  if ($singleChapterVersion) {
    my $od = File::Spec->rel2abs($outputDir);
    print FH <<EOF;
$outputDir/$singleChapterVersion.html : \$(patsubst %,$outputDir/%.html,\$(CHAPTERS_NO_INDEX)) $outputDir/$specFile.html
	xsltproc --novalid --stringparam publish '$od' --stringparam chapters '\$(CHAPTERS_NO_INDEX)' \$(TOOLS)/single-page.xsl $outputDir/$specFile.html >$outputDir/$singleChapterVersion.html

EOF
  }
  if ($generateRNGSnippets) {
    print FH "$rngSnippetsOutputDir/$_ :\t;\n\n" for @staticRNGSnippets;
    print FH <<EOF;
$rngSnippetsOutputDir/% : $relaxNGDir/*.rng
	perl \$(TOOLS)/publish.pl --snippets

EOF
  }
  for my $c (@allChapters) {
    my $lhs = "$outputDir/$c.html";
    $lhs .= " $outputDir/$c.xhtml" if hasRuby($c);
    print FH <<EOF;
\$(DEPDIR)/$c.d : $c.html
	perl \$(TOOLS)/publish.pl --deps $c \$(DEPDIR)

$lhs : $c.html
	perl \$(TOOLS)/publish.pl --publish $c

EOF
  }
  if ($generateDOM) {
    print FH <<EOF;
$domOutput.html : $idlFile $iddFile $domHeader
	java -jar \$(TOOLS)/idl/UdomBuilder.jar -notoc $idlFile $iddFile -htmlDOM $domHeader $domOutput.html

EOF
  }
  if ($generateIDL) {
    print FH <<EOF;
$idlOutput.html : $idlFile $idlHeader
	java -jar \$(TOOLS)/idl/UdomBuilder.jar -notoc $idlFile $iddFile -htmlIDL $idlHeader $idlOutput.html

EOF
  }
  if ($generateTables) {
    print FH <<EOF;
$attributeTableOutput.html $elementTableOutput.html : $attributeTableTemplate $elementTableTemplate $relaxNGDir/*.rng
	perl \$(TOOLS)/tables/makeTables.pl $relaxNGDir $relaxNGDriver '$datatypeURL' properties.txt elements.txt $attributeTableTemplate $elementTableTemplate $attributeTableOutput.html $elementTableOutput.html

EOF
  }
  if (@bindings) {
    print FH <<EOF;
idl.xml : $idlFile
	perl \$(TOOLS)/bindings/idl2xml.pl $idlFile idl.xml

EOF
  }
  for (my $i = 0; $i < @bindings; $i++) {
    print FH <<EOF;
$bindingOutputs[$i].html : $idlFile idl.xml \$(TOOLS)/bindings/idlml2html-base.xslt \$(TOOLS)/bindings/idlml2html-$bindings[$i].xslt
	xsltproc \$(TOOLS)/bindings/idlml2html-$bindings[$i].xslt idl.xml >$bindingOutputs[$i].html

EOF
  }
  print FH <<EOF;
.PHONY : all-spec clean-spec

ifneq (\$(MAKECMDGOALS),clean)
-include \$(CHAPTERS:%=\$(DEPDIR)/%.d)
endif
EOF
  close FH;
}

sub deps($) {
  my $out = shift;
  my %deps = ();
  my $type;
  for (@allChapters) {
    if ($target eq $_) {
      $type = 'chapter';
      last;
    }
  }
  if ($target eq $specFile || $target eq $tocFile) {
    $type = 'chapter';
  } elsif ($target eq $attributeTableOutput) {
    $type = 'attr';
  } elsif ($target eq $elementTableOutput) {
    $type = 'elt';
  } else {
    for (my $i = 0; $i < @bindingOutputs; $i++) {
      if ($target eq $bindingOutputs[$i]) {
        $type = 'binding ' . $bindings[$i];
        last;
      }
    }
  }
  die "Unknown output file ($target)" unless defined $type;
  # First we search the HTML source file(s) for <edit:*> elements.
  my @files;
  if ($type eq 'chapter') {
    @files = "$target.html";
  } elsif ($type eq 'attr') {
    @files = $attributeTableTemplate;
  } elsif ($type eq 'elt') {
    @files = $elementTableTemplate;
  }
  for my $file (@files) {
    # XXX Parsing XML with regular expressions is terrible of course.
    #     This won't work a different namespace prefix is used, or if the
    #     matched text is actually inside a comment or CDATA section.
    open(FH, $file);
    my $html = join('', <FH>);
    close FH;
    # <edit:example> will include the example document's text inline, so we
    # depend on that.
    while ($html =~ /<edit:example[^>]*href=(["'])(.*?)\1[^>]*>/g) {
      $deps{"examples/$2"} = 1;
    }
    # Similarly for <edit:schema>.
    while ($html =~ /<edit:schema[^>]*element=(["'])(.*?)\1[^>]*>/g) {
      $deps{"$rngSnippetsOutputDir/$2"} = 1;
    }
    # An <edit:toc> or <edit:fulltoc> depends on all chapters, since a change in
    # <h1>, <h2> or <h3> element content will cause the ToC to change.
    if ($html =~ /<edit:(toc|fulltoc)[^>]*>/) {
      $deps{"$_.html"} = 1 for @allChapters;
    }
  }
  my @deps = sort keys %deps;
  my $lhs = "$outputDir/$target.html";
  $lhs .= " $outputDir/$target.xhtml" if hasRuby($target);
  mkdir $out;
  open FH, ">$out/$target.d";
  print FH "$lhs \$(DEPS)/$target.d : ", join(' ', @deps), "\n";
  close FH;
}

sub snippets() {
  my @ignore = ();
  opendir DIR, $relaxNGDir;
  for my $file (grep { /\.rng$/ } readdir DIR) {
    open IN, "$relaxNGDir/$file";
    my $rng = join('', <IN>);
    close IN;
    while ($rng =~ /<div\s+xml:id=(["'])(.*?)\1[^>]*>[ \t]*\n?(.*?)[ \t]*<\/div>/gs) {
      open OUT, ">$rngSnippetsOutputDir/$2";
      print OUT $3;
      close OUT;
      push @ignore, $2;
    }
  }
  open FH, ">$rngSnippetsOutputDir/.cvsignore";
  print FH map { "$_\n" } sort @ignore;
  close FH;
}

our $doc;

sub E {
  my $e = $doc->createElementNS($HTML_NS, shift);
  if (@_ > 0 && ref($_[0]) eq 'HASH') {
    my $attrs = shift;
    while (my ($k, $v) = each %$attrs) {
      $e->setAttribute($k, $v);
    }
  }
  $e->appendChild($_) for @_;
  return $e;
}

sub T {
  return $doc->createTextNode(shift);
}

our %secs = ();
our @secs = ();
our $chapterNumber;

sub findSections {
  my $n = shift;
  if ($n->nodeType == 1) {
    my $firstChild = $n->firstChild;
    my $ln = $n->localName;
    if ($n->namespaceURI && $n->namespaceURI eq $HTML_NS) {
      if ($ln =~ /^h([2-3])$/) {
        die "Element <$ln> has no ID ($target.html, line " . $n->line_number . ')' unless $n->hasAttribute('id');
        my $id = $n->getAttribute('id');
        $secs[$1]++;
        for (my $i = $1 + 1; $i <= 6; $i++) {
          $secs[$i] = 0;
        }
        $secs{$id} = join('.', $chapterNumber, grep { $_ } @secs);
      }
    }
    $n = $firstChild;
    while ($n) {
      my $nextSibling = $n->nextSibling;
      findSections($n);
      $n = $nextSibling;
    }
  }
}

our $tocUL;
our $shortnumber;

our $chapterlink;
our $chapter;

sub generateFullToc($);

sub generateFullToc($) {
  my $n = shift;
  if ($n->nodeType == 1) {
    if ($n->namespaceURI && $n->namespaceURI eq $HTML_NS && $n->localName =~ /^h([2-3])/) {
      my $hlevel = $1;
      die "Element <" . $n->localName . "> has no ID ($chapter.html, line " . $n->line_number . ')' unless $n->hasAttribute('id');
      my $id = $n->getAttribute('id');
      $secs[$hlevel]++;
      for (my $i = 6; $i >= $hlevel + 1;  $i--) {
        if ($secs[$i] != 0) {
          $tocUL->appendChild(T('    ' x ($i - 2)));
          $tocUL = $tocUL->parentNode->parentNode;
          $secs[$i] = 0;
        }
      }
      if ($secs[$hlevel] == 1 && $hlevel != 2) {
        my $ul2 = E('ul', { class => 'toc' },
                    T("\n"));
        $tocUL->lastChild->previousSibling->appendChild(T("\n" . ('    ' x ($hlevel - 2))));
        $tocUL->lastChild->previousSibling->appendChild($ul2);
        $tocUL = $ul2;
      }
      my $num = join('.', $shortnumber, grep { $_ } @secs);
      my $secTitle = $n->textContent;
      $secTitle =~ s/^\s+//;
      $secTitle =~ s/\s+$//;
      $secTitle =~ s/\s\s+/ /g;
      my $link = E('a', { href => "$chapterlink#$id" }, T($secTitle));
      $tocUL->appendChild(T('  ' . ('    ' x ($hlevel - 2))));
      $tocUL->appendChild(E('li', T($num . ' '), $link));
      $tocUL->appendChild(T("\n"));
    }
    $n = $n->firstChild;
    while ($n) {
      generateFullToc($n);
      $n = $n->nextSibling;
    }
  }
}

sub findPrevNext($) {
  my $c = shift;
  my @ret = ();
  my @cs = (@chapters, @appendices);
  for (my $i = 0; $i < @cs; $i++) {
    if ($cs[$i] eq $c) {
      $ret[0] = $cs[$i - 1] if $i != 0;
      $ret[1] = $cs[$i + 1] if $i < @cs - 1;
      last;
    }
  }
  return @ret;
}

sub chapterLink($) {
  my $c = shift;
  return $c if grep { $_ eq $c } @chaptersWithRuby;
  return $c . '.html';
}

sub makeHeader {
  my $class = shift || 'header';
  my ($prev, $next) = findPrevNext($target);
  return E('div', { class => $class },
           T($headerTitle eq '' ? "\n  $publicationDate\n  " : "\n  $headerTitle – $publicationDate\n  "),
           E('ul',
             T("\n    "),
             E('li',
               E('a', { href => "$specFile.html" },
                 T('Top'))),
             T("\n"),
             ($tocFile ne '' ? (T('    '),
                                E('li',
                                  E('a', { href => "$tocFile.html" },
                                    T('Contents'))),
                                  T("\n")) : ()),
             (defined $prev ? (T('    '),
                               E('li',
                                 E('a', { href => chapterLink($prev) },
                                   T('Previous'))),
                               T("\n")) : ()),
             (defined $next ? (T('    '),
                               E('li',
                                 E('a', { href => chapterLink($next) },
                                   T('Next'))),
                               T("\n")) : ()),
             ($generateTables ?
              (T('    '),
               E('li',
                 E('a', { href => "$elementTableOutput.html" },
                   T('Elements'))),
               T("\n    "),
               E('li',
                 E('a', { href => "$attributeTableOutput.html" },
                   T('Attributes'))),
               T("\n")) : ()),
             T('  ')),
           T("\n"));
}

sub fileContents($) {
  my $fn = shift;
  my $fh;
  my $isUTF16 = 0;
  open($fh, '<:encoding(UTF-16)', $fn);
  eval {
    getc($fh);
    # if we could get it character, it probably was UTF-16
    $isUTF16 = 1;
  };
  if ($isUTF16) {
    seek($fh, 0, 0);
  } else {
    close($fh);
    open($fh, '<:crlf', $fn);
  }
  my $contents = join('', <$fh>);
  close($fh);
  return $contents;
}

our $parser;

sub publishNode {
  my $n = shift;
  if ($n->nodeType == 1) {
    my $firstChild = $n->firstChild;
    my $ns = $n->namespaceURI;
    my $ln = $n->localName;
    if ($ns eq $EDIT_NS) {
      if ($ln eq 'thisversion') {
        my $a = E('a', { href => $thisVersion },
                  T($thisVersion));
        $n->parentNode->replaceChild($a, $n);
      } elsif ($ln eq 'date') {
        $n->parentNode->replaceChild(T($longdate), $n);
      } elsif ($ln eq 'example') {
        my $title = $n->getAttribute('title');
        my $href = $n->getAttribute('href');
        $title = $href unless defined $title;
        my $link = $n->hasAttribute('link') && lc($n->getAttribute('link')) eq 'yes';
        my $image = $n->hasAttribute('image') && lc($n->getAttribute('image')) eq 'yes';
        my $div = E('div', { class => 'example' },
                    E('div', { class => 'exampleheader' },
                      E('strong', T('Example:')),
                      T(' '),
                      ($link ? E('a', { href => "examples/$href" },
                                 T($title))
                             : T($title))),
                    E('div', { class => 'examplesource' },
                      E('pre', T(fileContents("examples/$href")))),
                    ($image ? E('div', { class => 'exampleimage' },
                                E('img', { src => 'examples/' . substr($href, 0, -4) . '.png', alt => "Rendering of $href" }))
                            : ()));
        $n->parentNode->replaceChild($div, $n);
      } elsif ($ln eq 'schema') {
        my $element = $n->getAttribute('element');
        my $div = E('div', { class => 'schema' },
                    E('div', { class => 'schemaheader' },
                      E('strong', T('Schema:')),
                      T(" $element"),
                    E('div', { class => 'schemasource' },
                      E('pre', T(fileContents("rng/$element"))))));
        $n->parentNode->replaceChild($div, $n);
      } elsif ($ln eq 'previousversion') {
        my $a = E('a', { href => $previousVersion },
                  T($previousVersion));
        $n->parentNode->replaceChild($a, $n);
      } elsif ($ln eq 'latestversion') {
        my $a = E('a', { href => $latestVersion },
                  T($latestVersion));
        $n->parentNode->replaceChild($a, $n);
      } elsif ($ln eq 'latestrec') {
        my $a = E('a', { href => $latestRec },
                  T($latestRec));
        $n->parentNode->replaceChild($a, $n);
      } elsif ($ln eq 'maturity') {
        my $m = $maturity;
        if ($m =~ /^WD|FPWD|LCWD|FPWDLC$/) {
          $m = 'WD';
        }
        if ($m eq 'ED') {
          $m = "Editor’s Draft";
        } elsif ($m eq 'WD') {
          $m = 'Working Draft';
        } elsif ($m eq 'CR') {
          $m = 'Candidate Recommendation';
        } elsif ($m eq 'PR') {
          $m = 'Proposed Recommendation';
        } elsif ($m eq 'PER') {
          $m = 'Proposed Edited Recommendation';
        } elsif ($m eq 'REC') {
          $m = 'Recommendation';
        } elsif ($m eq 'WG-NOTE') {
          $m = 'Working Group Note';
        }
        $n->parentNode->replaceChild(T($m), $n);
      } elsif ($ln eq 'singlemulti' && $singleChapterVersion) {
        my $frag = $parser->parse_balanced_chunk("<p id='single-multi'>A non-normative <a href='$singleChapterVersion.html'>single page version</a> of this document is also available.</p>");
        $n->parentNode->replaceChild($frag, $n);
      } elsif ($ln eq 'copyright') {
        my $frag = $parser->parse_balanced_chunk('<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © ' . substr($publicationDate, 0, 4) . ' <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>®</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.org/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>');
        $n->parentNode->replaceChild($frag, $n);
      } elsif ($ln eq 'toc') {
        my $ul;
        my $div = E('div', { class => 'toc' },
                    E('h2', { id => 'minitoc' },
                      T('Table of Contents')),
                    $ul = E('ul', { class => 'toc' }));
        for (my $i = 0; $i < @chapters + @appendices; $i++) {
          my $number = $i >= @chapters ? 'Appendix ' . chr(65 + $i - @chapters) : $i + 1;
          my $chapter = $i >= @chapters ? $appendices[$i - @chapters] : $chapters[$i];
          my $doc2 = $parser->parse_file($chapter . '.html');
          my $xc = XML::LibXML::XPathContext->new($doc2);
          $xc->registerNs('h', $HTML_NS);
          my ($h1) = $xc->findnodes('//h:h1[1]');
          die "Could not find <h1> in $chapter.html" unless $h1;
          my $li = E('li',
                     T("$number "),
                     E('a', { href => chapterLink($chapter) },
                       T($h1->textContent)));
          $ul->appendChild($li);
        }
        $n->parentNode->replaceChild($div, $n);
      } elsif ($ln eq 'fulltoc') {
        my $ul = E('ul', { class => 'toc' });
        for (my $i = 0; $i < @chapters + @appendices; $i++) {
          my $number;
          local $shortnumber;
          local $chapter;
          if ($i < @chapters) {
            $shortnumber = $i + 1;
            $number = $shortnumber;
            $chapter = $chapters[$i];
          } else {
            $shortnumber = chr(65 + $i - @chapters);
            $number = 'Appendix ' . $shortnumber;
            $chapter = $appendices[$i - @chapters];
          }
          local $chapterlink = chapterLink($chapter);
          my $doc2 = $parser->parse_file($chapter . '.html');
          my $xc = XML::LibXML::XPathContext->new($doc2);
          $xc->registerNs('h', $HTML_NS);
          my ($h1) = $xc->findnodes('//h:h1[1]');
          die "Could not find <h1> in $chapter.html" unless $h1;
          my $li = E('li',
                     T("$number "),
                     E('a', { href => $chapterlink },
                       T($h1->textContent)));
          my $ul2 = E('ul', { class => 'toc' },
                      T("\n"));
          local @secs = (0, 0, 0, 0, 0, 0, 0);
          local $tocUL = $ul2;
          generateFullToc($doc2->documentElement);
          for (my $i = 6; $i >= 2;  $i--) {
            if ($secs[$i] != 0) {
              $tocUL->appendChild(T('    ' x ($i - 2)));
            }
          }
          if ($ul2->firstChild) {
            $li->appendChild(T("\n"));
            $li->appendChild($ul2);
          }
          $ul->appendChild(T("\n"));
          $ul->appendChild($li);
        }
        $ul->appendChild(T("\n"));
        $n->parentNode->insertBefore($ul, $n);
        $n->parentNode->removeChild($n);
      } else {
        die "Unknown element <" . $n->nodeName . ">";
      }
    } elsif ($ns eq $HTML_NS) {
      if ($ln eq 'head') {
        my $m = $n->firstChild;
        while ($m) {
          if ($m->namespaceURI && $m->namespaceURI eq $HTML_NS && $m->localName eq 'link') {
            my $theLink = $m;
            $m = $m->nextSibling;
            if ($theLink->previousSibling && $theLink->previousSibling->nodeType == 3) {
              $n->removeChild($theLink->previousSibling);
            }
            $n->removeChild($theLink);
          } else {
            $m = $m->nextSibling;
          }
        }
        if ($n->lastChild && $n->lastChild->nodeType == 3) {
          $n->removeChild($n->lastChild);
        }
        $n->appendChild(T("\n"));
        for my $stylesheet (@stylesheets) {
          my $link = E('link', { rel => 'stylesheet', type => 'text/css', href => $stylesheet });
          $n->appendChild(T('  '));
          $n->appendChild($link);
          $n->appendChild(T("\n"));
        }
        my ($prev, $next) = findPrevNext($target);
        if ($tocFile) {
          my $link = E('link', { rel => 'contents', href => "$tocFile.html" });
          $n->appendChild(T('  '));
          $n->appendChild($link);
          $n->appendChild(T("\n"));
        }
        if ($prev) {
          my $link = E('link', { rel => 'prev', href => chapterLink($prev) });
          $n->appendChild(T('  '));
          $n->appendChild($link);
          $n->appendChild(T("\n"));
        }
        if ($next) {
          my $link = E('link', { rel => 'next', href => chapterLink($next) });
          $n->appendChild(T('  '));
          $n->appendChild($link);
          $n->appendChild(T("\n"));
        }
      } elsif ($ln eq 'body' && defined($chapterNumber) && $generateHeaders) {
        $n->insertBefore(makeHeader(), $n->firstChild);
        $n->insertBefore(T("\n\n"), $n->firstChild);
        $n->appendChild(T("\n"));
        $n->appendChild(makeHeader('footer'));
        $n->appendChild(T("\n\n"));
      } elsif ($ln eq 'h1' && defined $chapterNumber) {
        local $chapterlink = '';
        local $shortnumber = undef;
        for (my $i = 0; $i < @chapters; $i++) {
          if ($chapters[$i] eq $target) {
            $shortnumber = $i + 1;
            last;
          }
        }
        unless (defined $shortnumber) {
          for (my $i = 0; $i < @appendices; $i++) {
            if ($appendices[$i] eq $target) {
              $shortnumber = chr(65 + $i);
              last;
            }
          }
        }
        if ($n->firstChild) {
          $n->insertBefore(T("$shortnumber "), $n->firstChild);
        } else {
          $n->appendChild(T("$shortnumber "));
        }
        local @secs = (0, 0, 0, 0, 0, 0, 0);
        my $ul2 = E('ul', { class => 'toc' },
                    T("\n"));
        local $tocUL = $ul2;
        local $chapter = "$target.html";
        generateFullToc($doc->documentElement);
        for (my $i = 6; $i >= 2;  $i--) {
          if ($secs[$i] != 0) {
            $tocUL->appendChild(T('    ' x ($i - 2)));
          }
        }
        if ($ul2->firstChild->nextSibling) {
          my $h2 = E('h2', { id => 'toc' },
                     T('Contents'));
          if ($n->nextSibling) {
            $n->parentNode->insertBefore($ul2, $n->nextSibling);
            $n->parentNode->insertBefore(T("\n"), $n->nextSibling);
            $n->parentNode->insertBefore($h2, $n->nextSibling);
            $n->parentNode->insertBefore(T("\n\n"), $n->nextSibling);
          } else {
            $n->parentNode->appendChild(T("\n\n"));
            $n->parentNode->appendChild($h2);
            $n->parentNode->appendChild(T("\n"));
            $n->parentNode->appendChild($ul2);
          }
        }
      } elsif ($ln =~ /^h[2-3]/ && defined $chapterNumber) {
        if ($n->firstChild && $n->nodeType == 3) {
          $n->firstChild->setData($secs{$n->getAttribute('id')} . ' ' . $n->firstChild->data);
        } elsif (!$n) {
          $n->appendChild(T($secs{$n->getAttribute('id')}));
        } else {
          $n->insertBefore(T($secs{$n->getAttribute('id')} . ' '), $n->firstChild);
        }
      }
    }

    $n = $firstChild;
    while ($n) {
      my $nextSibling = $n->nextSibling;
      publishNode($n);
      $n = $nextSibling;
    }
  }
}

sub removeRuby {
  my $n = shift;
  if ($n->nodeType == 1) {
    if ($n->namespaceURI && $n->namespaceURI eq $HTML_NS) {
      my $ln = $n->localName;
      if ($ln =~ /^r(uby|[bpt])$/) {
        $n->setNodeName('span');
        my $class = $n->getAttribute('class');
        if ($n->hasAttribute('class')) {
          $n->setAttribute('class', $n->getAttribute('class') . " $ln");
        } else {
          $n->setAttribute('class', $ln);
        }
      }
    }
    $n = $n->firstChild;
    while ($n) {
      removeRuby($n);
      $n = $n->nextSibling;
    }
  }
}

sub publish() {
  local $parser = XML::LibXML->new;
  $parser->line_numbers(1);
  $parser->no_network(1);
  local $chapterNumber;
  for (my $i = 0; $i < @chapters; $i++) {
    if ($target eq $chapters[$i]) {
      $chapterNumber = $i + 1;
      last;
    }
  }
  unless (defined $chapterNumber) {
    for (my $i = 0; $i < @appendices; $i++) {
      if ($target eq $appendices[$i]) {
        $chapterNumber = chr(65 + $i);
        last;
      }
    }
  }
  local $doc = $parser->parse_file("$target.html");
  local @secs = (0, 0, 0, 0, 0, 0, 0);
  local %secs = ();
  findSections($doc->documentElement) if defined $chapterNumber;
  publishNode($doc->documentElement);
  $doc->documentElement->setNamespaceDeclURI('edit', undef);
  if (hasRuby($target)) {
    for (my $n = $doc->firstChild; $n; $n = $n->nextSibling) {
      if ($n->nodeType == 14) { # XXX Why are DTD nodes type 14?
        $doc->removeChild($n);
        last;
      }
    }
    my $xml = $doc->toString(1, 'UTF-8');
    $xml =~ s/^<\?xml[^>]*>\s*//;
    # Some Appendix C stuff.
    $xml =~ s/(<a\s[^>]*)\/>/$1><\/a>/g;
    $xml =~ s/<([bh]r)\/>/<$1 \/>/g;
    $xml =~ s/(<img\s[^>]*)\/>/$1 \/>/g;
    $xml =~ s{<div xmlns="http://www\.w3\.org/1999/xhtml" (class="(?:example|schema|toc)")>}{<div $1>}g;
    $xml =~ s{ xmlns:idl="http://berjon\.com/ns/svg-idl/"}{}g;
    $xml =~ s{ xmlns:c="http://berjon\.com/ns/xslt-config/"}{}g;
    $xml =~ s{<a xmlns:xsl="http://www\.w3\.org/1999/XSL/Transform"}{<a}g;
    # And a big hack to remove dodgy empty lists in ToCs!
    $xml =~ s{<ul class="toc">\s*<\/ul>}{}g;
    open FH, ">$outputDir/$target.xhtml";
    binmode(FH, ":utf8");
    print FH <<EOF;
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/2001/REC-xhtml11-20010531/DTD/xhtml11.dtd'>
EOF
    print FH $xml;
    close FH;
    removeRuby($doc->documentElement);
  }
  for (my $n = $doc->firstChild; $n; $n = $n->nextSibling) {
    if ($n->nodeType == 14) { # XXX Why are DTD nodes type 14?
      $doc->removeChild($n);
      last;
    }
  }
  my $xml = $doc->toString(1, 'UTF-8');
  $xml =~ s/^<\?xml[^>]*>\s*//;
  # Some Appendix C stuff.
  $xml =~ s/(<a\s[^>]*)\/>/$1><\/a>/g;
  $xml =~ s/<([bh]r)\/>/<$1 \/>/g;
  $xml =~ s/(<img\s[^>]*)\/>/$1 \/>/g;
  $xml =~ s/<!\[CDATA\[(.*?)\]\]>/escape($1)/ges;
  $xml =~ s{<div xmlns="http://www\.w3\.org/1999/xhtml" (class="(?:example|schema|toc)")>}{<div $1>}g;
  # exclude-result-prefixes help me!
  $xml =~ s{ xmlns:idl="http://berjon\.com/ns/svg-idl/"}{}g;
  $xml =~ s{ xmlns:c="http://berjon\.com/ns/xslt-config/"}{}g;
  $xml =~ s{<a xmlns:xsl="http://www\.w3\.org/1999/XSL/Transform"}{<a}g;
  # And a big hack to remove dodgy empty lists in ToCs!
  $xml =~ s{<ul class="toc">\s*<\/ul>}{}g;
  open FH, ">$outputDir/$target.html";
  binmode(FH, ":utf8");
  print FH <<EOF;
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
EOF
  print FH $xml;
  close FH;
}

sub clean() {
  for my $c (@allChapters) {
    unlink "$outputDir/$c.xhtml" if hasRuby($c);
    unlink "$outputDir/$c.html";
  }
  unlink "$outputDir/$singleChapterVersion.html" if $singleChapterVersion ne '';
  unlink "$rngSnippetsOutputDir/$_" for getSnippetNames();
  if (@bindings) {
    unlink 'idl.xml';
  }
}

sub getSnippetNames() {
  my @names;
  opendir DIR, $relaxNGDir;
  for my $file (grep { /\.rng$/ } readdir DIR) {
    open IN, "$relaxNGDir/$file";
    my $rng = join('', <IN>);
    close IN;
    while ($rng =~ /<div\s+xml:id=(["'])(.*?)\1[^>]*>/g) {
      push @names, $2;
    }
  }
  return @names;
}

sub escape($) {
  my $x = shift;
  $x =~ s/&/&amp;/g;
  $x =~ s/</&lt;/g;
  $x =~ s/>/&gt;/g;
  return $x;
}

