/*

Various rules to pattern match against Wiki pages and give us the
content we want.  Mostly focussed on the DataPage, the main control
page for a document.


http://www.w3.org/2005/rules/wg/wiki/UCR?action=raw&rev=21

  myDoc tr:abstract
  *     a hierdoc:Section
        hierdoc:header ....

        tree:parent
	tree:lastChild   [[ should just be provied by tree-closure ]]
	tree:firstChild  [[   ''    ]]
	tree:laterSibling
	tree:earlierSibling   [[ ''  ]]

*/


:- ensure_loaded(library('http/http_open')).
:- ensure_loaded(library(memfile)).
:- ensure_loaded(library(debug)).
:- ensure_loaded(library(sgml)).
:- ensure_loaded(library('semweb/rdf_db')).


:- rdf_register_ns(tr, 'http://www.w3.org/2006/10/trlike#').
:- rdf_register_ns(tree, 'http://www.w3.org/2006/10/tree#').
:- rdf_register_ns(hierdoc, 'http://www.w3.org/2006/10/hierdoc#').
:- rdf_register_ns(ht, 'http://www.w3.org/2006/10/ht#').
:- rdf_register_ns(scrape, 'http://www.w3.org/2006/10/scrape#').


file_search_path(semwalker, '/home/sandro/semwalker').
:- ensure_loaded(semwalker(qtest)).

scrape_data_page(Doc, DataPage, _Date) :-
	wiki_content(DataPage, TopContent),
	Doc=doc(DataPage, TopContent).

scrape_to_rdf(Id, DataPage, Date) :-
	scrape_data_page(Doc, DataPage, Date),
	Source=DataPage,	% should be mangled in some way!!
	%doc(Doc, thisVersion, Id),
	% doc(Doc, pubDate, Date),
	% rdf_assert(Id, tr:foo, Date).
	namify(Id),
	scrape_abstract(Doc, Id),
	scrape_toc(Doc, Id),
	forall(  (   rdf(Section, rdf:type, hierdoc:'Section'),
		     rdf(Section, scrape:source, Source)
		 ),
		 true
	      ).
		 
		 

stech(Id) :-
	rdf_retractall(_,_,_),
	scrape_to_rdf(Id, 'http://www.w3.org/2005/rules/wg/wiki/CORE', _).
	
doc(doc(_, XML), title, Value) :-
	content_match(XML, element(h1, _, Value)).


doc(doc(_, XML), editors, Value) :-
	must((
	findall(XX,
		(match_dtdd(_, XML, 'Editor', X),
		once(html:sgml_compact(X, XX)))
/*		(
		content_match(XML, [element(dt, _, ['Editor']),
				    element(dd, _,
					    [ element(p, _, X) ]
					   )
				   ]),
		  debug(x, 'XML: ~q', XML),

		) */
		,
		ValueUnsorted),
        sort(ValueUnsorted, Value),	      
	Value = [_|_]
	     )).


doc(doc(_, XML), toc, Value) :-
	must(glean_toc(XML, Value)).

doc(doc(URI, _), uri_prefix, Value) :-
	atom_concat(URI, '/', Value).

doc(doc(_, XML), abstract, Value) :-
	once((
	Key='Abstract',
	once((content_match(XML, [element(h2, _, [Key]) | Rest]),
	      append(Text, [element(h2,_,_)|_], Rest))),
	internalize_text(x, Text, Value))).

/*
doc(doc(_, XML), status, Value) :-
	(Key='Status of this Document' ; Key='Status Of This Document'),
	content_match(TopContent, [element(h3, _, [Key]) | Rest]),
	append(Text, [element(h3,_,_)|_], Rest),
	internalize_text(Doc, Text, TextH),
*/

doc(Doc, Property, Value) :-
	Doc = doc(_, XML),
	property_label(PropertyLabel, Property),
	match_dtdd(Doc, XML, PropertyLabel, V1),
	internalize_text(x, V1, Value).

doc(Doc, className, Value) :-
	doc(Doc, classAbbr, A),
	document_class_name(A, Value).

doc(Doc, classAbbr, Value) :-
	doc(Doc, classAbbrText, T),
	T = [Value].

doc(_, P, _) :-
        format(user_error, 'WARNING: missing ~q~n', [P]),
	fail.

scrape_abstract(Doc, Id) :-
	Key='Abstract',
	Doc=doc(_, XML),
	once(
	     (
	       content_match(XML, [element(h2, _, [Key]) | Rest]),
	       append(Text, [element(h2,_,_)|_], Rest)
	     )
	    ),
	clean_text(Doc, Text, Value),
	debug(scrape, 'abstract text: ~q', [Text]),
	debug(scrape, 'abstract text internalized: ~q', [Value]),
	% rdf_assert(Id, tr:abstract, literal(type(rdf:'XMLLiteral', Value))).
	describe(Value, ValObj),
	rdf_assert(Id, tr:abstract, ValObj).



document_class_name('ED', 'Editor\'s Draft').
document_class_name('WD', 'Working Draft').
document_class_name('LC', 'Last Call Working Draft').
document_class_name('CR', 'Candidate Recommendation').
document_class_name('PR', 'Proposed Recommendation').
document_class_name('REC', 'Recommendation').

property_label('Publication Date', pubDate).
property_label('Comments Due By', commentDate).
property_label('Request For Comments', callForComments).
property_label('Document Class Code', classAbbrText).
property_label('Status Notes', otherStatus).
property_label('Editor', editor).
property_label('This Version', thisVersion).
property_label('Latest Version', latestVersion).
property_label('Previous Version', previousVersion).

%  match_dl(XML, DT, DD)
match_dtdd(_Doc, XML, DT, Value) :-
	content_match(XML, [element(dt, _, [DT]), 
		   	    element(dd, _, Noisy)
			   | _ ]),
	once(clean_content(Noisy, Value)).

qtest :-
	clean_content([element(p, [class=line886], ['WD ', element(span, [class=anchor, id='line-10'], [])])], X),
	X = ['WD'].

qtest :-
	clean_content([element(p, [class=line879], ['The Working Group is interested in public feedback on these Use Cases.   If there appear to be significant mistakes or omissions, please let us know, bearing in mind that these use cases need to be representative of a much larger set.  Please send to ', element(a, [class=mailto, href='mailto:public-rif-comments@w3.org'], ['public-rif-comments@w3.org']), ' (', element(a, [class=http, href='http://lists.w3.org/Archives/Public/public-rif-comments/'], ['public archive']), ').  If possible, please offer specific changes to the text which will address your concern. ', element(span, [class=anchor, id='line-8'], [])])], X),
	X = ['The Working Group is interested in public feedback on these Use Cases.   If there appear to be significant mistakes or omissions, please let us know, bearing in mind that these use cases need to be representative of a much larger set.  Please send to ', element(a, [class=mailto, href='mailto:public-rif-comments@w3.org'], ['public-rif-comments@w3.org']), ' (', element(a, [class=http, href='http://lists.w3.org/Archives/Public/public-rif-comments/'], ['public archive']), ').  If possible, please offer specific changes to the text which will address your concern. '].



% remove trailing junk elements
clean_content(List, Cleaner) :-
	append(Good, Junk, List),
	junk(Junk),
	clean_content(Good, Cleaner).
% remove leading junk elements.
clean_content(List, Cleaner) :-
	append(Junk, Good, List),
	junk(Junk),
	clean_content(Good, Cleaner).
% turn just-link into URI  [should we type it in some way?]
clean_content([element(a, Attrs, [Link])], Link) :-
	member(href=Link, Attrs).
% zoom in through wrappers, ignoring attributes
clean_content([element(E, _, Text)], CleanText) :-
	member(E, [p, div, span]),
	clean_content(Text, CleanText).
clean_content([Atom], [CleanedAtom]) :-
	atom(Atom),
	atom_concat(CleanerAtom, ' ', Atom),
	clean_content([CleanerAtom], [CleanedAtom]).
clean_content(X, X).

% @@ how are we losing images
% @@ give back section(....) instead of p(...) stuff
%%%   @@@parse_page

junk([element(span, _, [])]).
junk([' ']).
junk(['\n']).

qtest :-
	clean_content(['foo    '], ['foo']).

qtest :-
	junk([element(span, [class=anchor, id='line-8'], [])]).

%clean_content([element(p, [class=line903], [element(a, [class=http, href='http://www.w3.org/2005/rules/wg/ucr/draft-20060323'], ['http://www.w3.org/2005/rules/wg/ucr/draft-20060323']), ' ', element(span, [class=anchor, id='line-11'], [])]), element(span, [class=anchor, id='line-12'], [])]


xmatch_dtdd(XML, DT, Value) :-
	debug(dtdd, 'match_dtdd for ~q:', [DT]),
	once(
	  (content_match(XML, [element(dt, _, [DT]), 
		   	    element(dd, _,
				    [ element(p, _, [element(a, [href=X], _)|_]) | _]
)]),
	     debug(dtdd, '~q :: got link to ~q', [DT, X])
	  )
	    ;
	     (
	       content_match(XML, [element(dt, _, [DT]), 
			    element(dd, _,
				    [ element(p, _, X) ]
				   )
			   ]),
	     debug(dtdd, '~q :: got ~q', [DT, X])
	     )
	    ;

	     (
	       content_match(XML, [element(dt, _, [DT]), 
			    element(dd, _,
				    [ X ]
				   )
			   ]),
	     debug(dtdd, '~q :: got rough ~q', [DT, X])
	     )

	),
	%    [_G2532]=['ED ', element(span, [class=anchor, id='line-11'], [])] ? skip

	% fall back to returning only the first item in the list?  for
	% document-class.   should have more general white-space stuff!!!!
	(   [Y] = X,
	    debug(dtdd, 'Not Junking...', [])
	;   [Y | Rest] = X,
	    debug(dtdd, 'Junking ~q~n', [Rest])
	),
	(   atom(Y),
	    atom_concat(Trimmed, ' ', Y)
	->  Value = [Trimmed]
	;   Value = X
	),
	debug(dtdd, 'final value:  ~q~n', [Value]).


%%
%%  glean_toc(+XML, -Table)
%%
%%  pull out the Table of Contents section and turn it into
%%      [ section(tag, title, [subsections]), ... ]
%%

glean_toc(TopContent, TOC) :-
	debug(toc, ' looking for toc... ~n ', []),
	content_match(TopContent, [element(h2, _, ['Table Of Contents']) | Rest]),
	(  append(Text, [element(h2,_,_)|_], Rest)
	;  Text = Rest ),
	!,
	whiteless(Text, CleanText),
	clean_content(CleanText, X2),
	once(mapxml(clean_attrs(href_only), X2, X3)),
	debug(toc, 'got: ~q', [X3]),
	content_list(X3, TOC1),
	debug(toc, ' good content list: ~q~n ', [TOC1]),
	!,
	fill_out_toc(TOC1, TOC).


	     

scrape_toc(Doc, Id) :-
	Doc = doc(_, XML),
	must(glean_toc(XML, Value)),      % returning _   !!!
	must(assert_sections(prefix('http://example.com#'), Id, Value)).

prefix(Prefix, Tag, Full) :-
	atom_concat(Prefix, Tag, Full).

%  convert "section" tree into RDF links

%% assert_sections(+TagToURI, +Parent, +SectionList)

assert_sections(_ToURI, _Parent, []).
assert_sections(ToURI, Parent,[section(Tag, Title, Subsections) | Sections]) :-
	call(ToURI, Tag, This),
	rdf_assert(This, rdf:type, hierdoc:'Section'),
	rdf_assert(This, hierdoc:heading, Title),
	% permalink?    baseContent?    subject?
	(   Subsections = [ section(FirstChildTag, _, _) | _ ]
	->  assert_sections(ToURI, This, Subsections),
	    call(ToURI,FirstChildTag, FirstChild),
	    rdf_assert(This, tree:firstChild, FirstChild)
	;   true
	),
	rdf_assert(This, tree:parent, Parent),
	(   Sections = [ section(NextSectionTag, _, _) | _]
	->  call(ToURI, NextSectionTag, NextSection),
	    rdf_assert(This, tree:laterSibling, NextSection),
	    rdf_assert(NextSection, tree:earlierSibling, This),
	    assert_sections(ToURI, Parent, Sections)
	;   rdf_assert(Parent, tree:lastChild, This)
	).



% test (but it has side-effects) :  ?- assert_sections(foo, me,[section('A._RIF_Condition_Language', 'A. RIF Condition Language', [section('A.1_Basis%3A_Positive_Conditions', 'A.1 Basis: Positive Conditions', [])]), section('B._Extension%3A_RIF_Rule_Language', 'B. Extension: RIF Rule Language', [section('B.1_Horn_Rules', 'B.1 Horn Rules', [])])]).

%%
%%  walk the TOC, load any listed pages, and get THEIR TOC
%%  info....?

fill_out_toc(In, In).
	

% remove the annoying noise in XML!
whiteless(Dirty, Clean) :-
	(   Dirty = [ Head | Tail ]
	->  (   white(Head)
	    ->  whiteless(Tail, Clean)
	    ;   whiteless(Head, CleanHead),
		whiteless(Tail, CleanTail),
		Clean = [CleanHead | CleanTail]
	    )
	;   (   Dirty = element(Tag, Attrs, List)
	    ->  whiteless(List, CleanList),
		Clean = element(Tag, Attrs, CleanList)
	    ;   Clean = Dirty
	    )
	).

white(element(span, _, [])).   % remove stupid wiki cruft
white(' ').
white('\n').
white('\n\n').

/*
  whiteless([' ', q(3), '\n', [ '\n\n', x, [ ' ', p=p ], ' ', y ] ], X).
  */

empty_paragraphs([]).
empty_paragraphs([ element(p, [], []) | Rest ]) :-
	empty_paragraphs(Rest).

content_list([], []).
content_list([element(ol, [], LIs) | EmptyParagraphs], Entries) :-
	empty_paragraphs(EmptyParagraphs),
	!, 
	(  li_entry(LIs, Entries)
	-> true
	;   debug(toc, 'failed: ~q', [LIs]),
	    throw(li_failed2(LIs))
	).
content_list(X, _) :-
	%debug(toc, ' bad content list: ~q~n ', [X]).
	throw(bad_content_list(X)).

%

li_entry([], []).
li_entry([H|T], [section(Label, Name, SubsS)|TT]) :-
	(   H = element(li, [], [element(p, _,
					 [element(a, Attrs, [Text]) | _Junk]
					)
				| Subs])
	->  %atom_concat('/', Name, Text),
	    Name=Text,
	    member(href=URI, Attrs),
	    concat_atom(Parts, '/', URI),
	    last(Parts, Label)
	;   format(user_error, ' bad li: ~q~n ', [H]),
	    format(user_error, ' T: ~q~n ', [T]),
	    gensym('x', Label),
	    Name=skipped,
	    Subs=[]
	),
	!,
	%format('  got li: ~q, subs: ~q~n ', [Name, Subs]),
	content_list(Subs, SubsS),
	!,
	%format('  got li2: ~q, subs: ~q~n ', [Name, Subs]),
	li_entry(T, TT).
	%!,
	%format('  got li3: ~q, subs: ~q~n ', [Name, Subs]).
%li_entry(Failed, _) :-
%	format(' li failed: ~q~n ', [Failed]).
	






content_match(Tree, Tree).
content_match(element(_Tag, _Attrs, Kids), SubTree) :-
        content_match(Kids, SubTree).
content_match(Tree, SubTree) :-
	append(SubTree, _, Tree).
content_match([H|T], SubTree) :-
        (   content_match(H, SubTree)
        ;   content_match(T, SubTree)
        ).

content_match(Tree, Tree, 0).
%content_match(element(li, _Attrs, Kids), SubTree) :-
%        content_match(Kids, SubTree).
content_match(element(_Tag, _Attrs, Kids), SubTree, N) :-
        content_match(Kids, SubTree, N1),
	N is N1 + 1.
content_match([H|T], SubTree, N) :-
        (   content_match(H, SubTree, N)
        ;   content_match(T, SubTree, N)
        ).


%uri(doc('http://www.w3.org/2005/rules/wg/wiki/CORE',_) Tag, URI) :-
%	atom_concat('http://www.w3.org/2005/rules/wg/wiki/',Tag,URI).
uri(Doc, Tag, URI) :-
	doc(Doc, uri_prefix, Prefix),
	debug(uri, 'Prefix: ~q', [Prefix]),
	(   Prefix = 'OBSOLETE-http://www.w3.org/2005/rules/wg/wiki/CORE/'
	->  concat_atom(['http://www.w3.org/2005/rules/wg/wiki/', Tag], URI)
        ;   concat_atom([Prefix, Tag], URI)
	).

page_text(Doc, Tag, Shift, URI, H) :-
	format(user_error, 'retreiving ~q...~n', [Tag]),
	uri(Doc, Tag, URI),
	% format(user_error, 'retreiving 2 ~q...~n', [Tag]),
	wiki_content(URI, SgmlText),
	% format(user_error, 'retreiving 3 ~q...~n', [Tag]),
	mapxml(shift_heading_rank(Shift), SgmlText, SgmlText2),
	internalize_text(Doc, SgmlText2, H),
	format(user_error, 'Done.~n', []).


% this is really NOT something we want.  Ah well....
shift_heading_rank(Amt, T, T1) :-
	(   T = element(Tag, Attrs, Kids),
	    atom_concat('h', NA, Tag),
	    debug(atom_number, 'Atom NA: ~q, T: ', [NA, T]),
	    member(NA, ['1', '2', '3', '4', '5', '6']),
	    atom_number(NA, N)
	->  New is N + Amt,
	    (   New < 1
	    ->  N1 = 1
	    ;   N1 = New
	    ),
	    (   N1 > 6
	    ->  N2 = 6
	    ;   N2 = N1
	    ),
	    debug(atom_number, 'Atom NA2: ~q', [NA2]),
	    atom_number(NA2, N2),
	    atom_concat('h', NA2, Tag1),
	    T1 = element(Tag1, Attrs, Kids)
	;   T1 = T
	).

mapxml(F, T, T1) :-
	(   T = [Head | Tail]
	->  mapxml(F, Head, Head1),
	    mapxml(F, Tail, Tail1),
	    T1 = [Head1 | Tail1]
	;   (   T = element(Tag, Attrs, Kids)
            ->  mapxml(F, Kids, KidsX),
		TX = element(Tag, Attrs, KidsX),
		call(F, TX, T1)
	    ;   call(F, T, T1)
	    )
	).


qtest :-
	mapxml(=,
	       [a,b,c,element(x,y,[a,b])],
	       [a,b,c,element(x,y,[a,b])]
	      ).

qtest :-
	mapxml(shift_heading_rank(3),
	       [element(h1, [], was_h1),
		element(h2, [], was_h2),
		element(h3, [], was_h3),
		element(h4, [], was_h4),
		element(h5, [], was_h5),
		element(h6, [], was_h6)],
	       X),
	X = [element(h4, [], was_h1),
	     element(h5, [], was_h2),
	     element(h6, [], was_h3),
	     element(h6, [], was_h4),
	     element(h6, [], was_h5),
	     element(h6, [], was_h6)]
	.


clean_attrs(Pred, T, T1) :-
	(   T = element(Tag, Attrs, Kids)
	->  sublist(Pred, Attrs, Attrs1),
	    (   Attrs1 = [],
		Tag = a
	    ->  T1 = element(span, [], [])
	    ;   T1 = element(Tag, Attrs1, Kids)
	    )
	;   T = T1
	).

id_okay(A) :-
	(   A = (id=ID),
	    bad_id(ID)
	->  fail
	;   true
	).
href_only(href=_).

bad_id(content).
bad_id(top).
bad_id(bottom).
bad_id(X) :-
	atom_prefix(X, 'line-').

qtest :-
	mapxml(clean_attrs(id_okay),
	       [element(div, [a=a,b=b,id=content,id=bottom,c],
			[ element(div, [a=a,b=x,id=content,id=bottom,c],
				  [ element(div, [a,b=p,id=content,id=bottom,c],
					    [ hello ])
				  ])
			])],
	       X),
	X = [element(div, [a=a, b=b, c], [element(div, [a=a, b=x, c], [element(div, [a, b=p, c], [hello])])])].
	

%%  internalize_text(?Doc, +In, -Out)
%%
%%  Transform some page XML (already narrowed to the chunk we want) into
%%  our internal HTML structure, while doing some re-writing of things like
%%  "id" attributes, and making proper links to References.
%%


clean_text(_Doc, In, Out) :-
	%   same as internalize, but without the sgml_compact
	% option(refs(Refs), Doc, _),
	once(mapxml(clean_attrs(id_okay), In, In2)),
	once(mapxml(fix_img_src, In2, In3)),
	must(once(mapxml(fix_links_within_doc, In3, In4))),
	In4 = Out.

internalize_text(_Doc, In, Out) :-
	% option(refs(Refs), Doc, _),
	once(mapxml(clean_attrs(id_okay), In, In2)),
	once(mapxml(fix_img_src, In2, In3)),
	must(once(mapxml(fix_links_within_doc, In3, In4))),
	html:sgml_compact(In4, Out).

fix_img_src(In, Out) :-
	(   In = element(img, Attrs, Content),
	    delete(Attrs, src=Old, OtherAttrs)
	->  atom_concat('http://www.w3.org', Old, New),
	    debug(uri, 'localizing ~q to ~q', [Old, New]),
	    Out = element(img, [src=New, OtherAttrs], Content)
	;   Out = In
	).

fix_links_within_doc(In, Out) :-
	(   In = element(a, Attrs, Content),
	    delete(Attrs, href=Old, OtherAttrs),
	    atom_prefix(Old, '/2005/rules/wg/wiki/UCR'), % @@@@@ HACK!!!!
	    debug(uri, 'internal: ~q', [Old]),
	    concat_atom([_Main, Frag], '#', Old),
	    debug(uri, 'split, frag: ~q', [Frag])
	->  concat_atom(['', Frag], '#', New),
	    debug(uri, 'localizing ~q to ~q', [Old, New]),
	    Out = element(a, [href=New, OtherAttrs], Content)
	;   Out = In
	). 

wiki_content(Page, Out) :-
	atom_concat(Page, '?action=content', URI),
	(   aserved(URI, stream(Stream), _Seconds, _Bytes, [])
	->  dtd(html, DTD),
	    load_structure(stream(Stream),
		       Structure,
			   [dtd(DTD),
			    shorttag(false),
			    dialect(sgml),
			    max_errors(1000) %,
			   % entity(nbsp, ' '),
			   % entity(rarr, '\u8594')
			   ]),
	    Out=[element(div, _, Structure)]
	;   Out = [element(p, [style='color:red; font-size: 200%;'], ['Content Retrieval Failed on ', URI])],
	    format(user_error, 'Failed on ~q~n', [URI])
	).


		   
aserved(Address, Content, Seconds, Bytes, Options) :-
        get_time(Start),
        http_open(Address, Stream, Options),
        (   Content = stream(Stream)
        ->  true
        ;   new_memory_file(F),
            open_memory_file(F, write, MemStream),
            copy_stream_data(Stream, MemStream),
            close(Stream),
            close(MemStream),
            size_memory_file(F, Bytes),
            memory_file_convert(F, Content)
        ),
        get_time(Stop),
        Seconds is Stop - Start.

memory_file_convert(M, atom(A)) :-
        memory_file_to_atom(M, A),
        free_memory_file(M).
memory_file_convert(M, codes(C)) :-
        memory_file_to_codes(M, C),
        free_memory_file(M).
memory_file_convert(M, memory_file(M)).   % you free it, of course.


qtest :-
	doc(doc('http://www.w3.org/2005/rules/wg/wiki/UCR', [element(div, _G2076, [element(div, [dir=ltr, id=content, lang=en], [element(span, [class=anchor, id=top], []), '\n', element(span, [class=anchor, id='line-1'], []), element(p, [class=line903], [element(em, [], ['This is the root for the RIF ', element(strong, [], ['Use Cases and Requrements']), ' document.   The official document is extracted from the Wiki, starting at this page, by a program called "wikitr". ']), ' ', element(span, [class=anchor, id='line-2'], [])]), element(span, [class=anchor, id='line-3'], []), '\n', element(h1, [id='head-b571364636a8dc3c8318010609b5b233bb374888'], ['RIF Use Cases and Requirements']), '\n', element(span, [class=anchor, id='line-4'], []), element(dl, [], [element(dt, [], ['Editor']), element(dd, [], [element(p, [class=line879], ['Allen Ginsberg ', element(tt, [], ['<aginsberg@mitre.org>']), ' (Mitre) ', element(span, [class=anchor, id='line-5'], [])])]), element(dt, [], ['Editor']), element(dd, [], [element(p, [class=line879], ['David Hirtle ', element(tt, [], ['<David.Hirtle@nrc-cnrc.gc.ca.ca>']), ' (NRC) ', element(span, [class=anchor, id='line-6'], [])])]), element(dt, [], ['Publication Date']), element(dd, [], [element(p, [class=line886], ['23 March 2006 ', element(span, [class=anchor, id='line-7'], [])])]), element(dt, [], ['Request For Comments']), element(dd, [], [element(p, [class=line879], ['The Working Group is interested in public feedback on these Use Cases.   If there appear to be significant mistakes or omissions, please let us know, bearing in mind that these use cases need to be representative of a much larger set.  Please send to ', element(a, [class=mailto, href='mailto:public-rif-comments@w3.org'], ['public-rif-comments@w3.org']), ' (', element(a, [class=http, href='http://lists.w3.org/Archives/Public/public-rif-comments/'], ['public archive']), ').  If possible, please offer specific changes to the text which will address your concern. ', element(span, [class=anchor, id='line-8'], [])])]), element(dt, [], ['Comments Due By']), element(dd, [], [element(p, [class=line886], ['21 April 2006 ', element(span, [class=anchor, id='line-9'], [])])]), element(dt, [], ['Document Class Code']), element(dd, [], [element(p, [class=line886], ['WD ', element(span, [class=anchor, id='line-10'], [])])]), element(dt, [], ['This Version']), element(dd, [], [element(p, [class=line903], [element(a, [class=http, href='http://www.w3.org/2005/rules/wg/ucr/draft-20060323'], ['http://www.w3.org/2005/rules/wg/ucr/draft-20060323']), ' ', element(span, [class=anchor, id='line-11'], [])]), element(span, [class=anchor, id='line-12'], [])])]), '\n', element(h2, [id='head-dd242aa60adb677287ae406dea056789a7b90ea8'], ['Abstract']), '\n', element(span, [class=anchor, id='line-13'], []), element(p, [class=line886], ['This document specifies use cases and requirements for a Rule Interchange Format (RIF) that allows rules to be translated between rule languages and thus transferred between rule systems. ', element(span, [class=anchor, id='line-14'], [])]), element(span, [class=anchor, id='line-15'], []), element(p, [class=line879], ['The ', element(a, [class=http, href='http://www.w3.org/2005/rules/wg/charter#phase_1'], ['Phase 1']), ' version of this document presents use cases and requirements for the RIF in general.  The Phase 1 deliverables will provide an extensible base with which the use cases can be addressed, but it wont be until ', element(a, [class=http, href='http://www.w3.org/2005/rules/wg/charter#phase_2'], ['Phase 2']), ' that most of these use cases are directly addressed by the Working Group. ', element(span, [class=anchor, id='line-16'], [])]), element(span, [class=anchor, id='line-17'], []), '\n', element(h2, [id='head-aed31c6389e10e407fa1487e2b238af28c538004'], ['Table Of Contents']), '\n', element(span, [class=anchor, id='line-18'], []), element(ol, [type='1'], [element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Use_Cases'], ['/Use Cases']), ' ', element(span, [class=anchor, id='line-19'], [])]), element(ol, [type='1'], [element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Negotiating_eBusiness_Contracts_Across_Rule_Platforms'], ['/Negotiating eBusiness Contracts Across Rule Platforms']), ' ', element(span, [class=anchor, id='line-20'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Negotiating_eCommerce_Transactions_Through_Disclosure_of_Buyer_and_Seller_Policies_and_Preferences'], ['/Negotiating eCommerce Transactions Through Disclosure of Buyer and Seller Policies and Preferences']), ' ', element(span, [class=anchor, id='line-21'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Collaborative_Policy_Development_for_Dynamic_Spectrum_Access'], ['/Collaborative Policy Development for Dynamic Spectrum Access']), ' ', element(span, [class=anchor, id='line-22'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Access_to_Business_Rules_of_Supply_Chain_Partners'], ['/Access to Business Rules of Supply Chain Partners']), ' ', element(span, [class=anchor, id='line-23'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Managing_Inter-Organizational_Business_Policies_and_Practices'], ['/Managing Inter-Organizational Business Policies and Practices']), ' ', element(span, [class=anchor, id='line-24'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [class='nonexistent nonexistent', href='/2005/rules/wg/wiki/UCR/Ruleset_Integration__for_Medical_Decision_Support'], ['/Ruleset Integration  for Medical Decision Support']), ' ', element(span, [class=anchor, id='line-25'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Interchanging_Rule_Extensions_to_OWL'], ['/Interchanging Rule Extensions to OWL']), ' ', element(span, [class=anchor, id='line-26'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Vocabularly_Mapping_for_Data_Integration'], ['/Vocabularly Mapping for Data Integration']), ' ', element(span, [class=anchor, id='line-27'], [])])])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Requirements_%28Not_in_This_Version%29'], ['/Requirements (Not in This Version)']), ' ', element(span, [class=anchor, id='line-28'], [])]), element(span, [class=anchor, id='line-29'], [])])]), '\n', element(h2, [id='head-47ce8fc24831db75f5943481e56e029c80ba0c73'], ['Under Development']), '\n', element(span, [class=anchor, id='line-30'], []), element(p, [class=line886], ['These sections are expected to appear in later drafts of the document, but are separated out for now to clarify that they are not targetting for inclusion in the First Public Working Draft. ', element(span, [class=anchor, id='line-31'], [])]), element(span, [class=anchor, id='line-32'], []), element(ol, [type='1'], [element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Introduction'], ['/Introduction']), ' ', element(span, [class=anchor, id='line-33'], [])]), element(ol, [type='1'], [element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/What_is_a_Rule_Interchange_Format_And_Why_Create_One'], ['/What is a Rule Interchange Format And Why Create One']), ' ', element(span, [class=anchor, id='line-34'], [])]), element(span, [class=anchor, id='line-35'], [])])])]), element(li, [class=gap], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Candidate_Use_Cases_for_2nd_Draft'], ['/Candidate Use Cases for 2nd Draft']), ' ', element(span, [class=anchor, id='line-36'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Design_Goals'], ['/Design Goals']), ' ', element(span, [class=anchor, id='line-37'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [href='/2005/rules/wg/wiki/UCR/Requirements'], ['/Requirements']), ' ', element(span, [class=anchor, id='line-38'], [])])]), element(li, [], [element(p, [class=line903], [element(a, [class='nonexistent nonexistent', href='/2005/rules/wg/wiki/UCR/Acknowledgements'], ['/Acknowledgements']), ' ', element(span, [class=anchor, id='line-39'], [])])])]), element(span, [class=anchor, id=bottom], [])])])]), classAbbrText, L),
	L=['WD'].





qtest :-
	doc(doc(x, [element(dl, [], [element(dt, [], ['This Version']), element(dd, [], [element(p, [class=line903], [element(a, [class=http, href='http://www.w3.org/2005/rules/wg/ucr/draft-20060323'], ['http://www.w3.org/2005/rules/wg/ucr/draft-20060323']), ' ', element(span, [class=anchor, id='line-11'], [])]), element(span, [class=anchor, id='line-12'], [])])]), '\n']), thisVersion, V),
	V = 'http://www.w3.org/2005/rules/wg/ucr/draft-20060323'.

%	V = [p(class=line903, a(class=http, href='http://www.w3.org/2005/rules/wg/ucr/draft-20060323', 'http://www.w3.org/2005/rules/wg/ucr/draft-20060323'), ' ', span(class=anchor)), span(class=anchor)].


qtest :-
	clean_content([element(p, [class=line903], [element(a, [class=http, href='http://www.w3.org/2005/rules/wg/ucr/draft-20060323'], ['http://www.w3.org/2005/rules/wg/ucr/draft-20060323']), ' ', element(span, [class=anchor, id='line-11'], [])]), element(span, [class=anchor, id='line-12'], [])], _X).


/*

bad_content_list(
		 [element(ol,
			  [type=1],
			  [
			   element(li,
				   [],
				   [element(p,
					    [class=line903],
					    [element(a,
						     [href= /2005/rules/wg/wiki/A._RIF_Condition_Language],
						     [A. RIF Condition Language])]),
				    element(ol,
					    [type=1],
					    [element(li,
						     [],
						     [element(p,
							      [class=line903],
							      [element(a, [
									   href= /2005/rules/wg/wiki/A.1_Basis %3A_Positive_Conditions],
									  [A.1 Basis: Positive Conditions])])])])]))
			  element(li, [],
				  [
				   element(p, [class=line903], [element(a, [href= /2005/rules/wg/wiki/B._Extension %3A_RIF_Rule_Language], [B. Extension: RIF Rule Language])]),
									   element(ol, [type=1], [element(li, [], [element(p, [class=line903], [element(a, [href= /2005/rules/wg/wiki/B.1_Horn_Rules], [B.1 Horn Rules])])])])])
							       ]),
				   element(p, [class=line903], [])]) 
*/


x.

/*
[element(ol,
	 [],
	 [
	  element(li, [], [
			   element(p, [], [
					   element(a, [href='/2005/rules/wg/wiki/A._RIF_Condition_Language'], ['A. RIF Condition Language'])]),
			   element(ol, [], [
					    element(li, [], [
							     element(p, [], [
									     element(a, [href='/2005/rules/wg/wiki/A.1_Basis%3A_Positive_Conditions'], ['A.1 Basis: Positive Conditions'])])])])]),
	  element(li, [], [element(p, [], [element(a, [href='/2005/rules/wg/wiki/B._Extension%3A_RIF_Rule_Language'], ['B. Extension: RIF Rule Language'])]),
			   element(ol, [], [element(li, [], [element(p, [], [element(a, [href='/2005/rules/wg/wiki/B.1_Horn_Rules'], ['B.1 Horn Rules'])])])])])]),
 element(p, [], [])]

*/



%%   tree_close(+Root)
%%
%%   Add tree:firstChild, tree:lastChild, tree:earlierSibling,
%%   tree:nextSibling, and tree:previousSibling links to an
%%   RDF DB, based on tree:parent & tree:laterSibling links.
%%
%%   Of course, this assumes no new nodes will appear.

tree_close(Parent) :-
	(  link_earliest_unused_child(Parent),
	   repeat
	);
	true.

child(Parent, Child) :-
	rdf(Child, tree:parent, Parent).

forward_linked_child(Parent, Child) :-
	rdf(Parent, tree:firstChild, Child).
forward_linked_child(Parent, Child) :-
	(   rdf(Parent, tree:firstChild, FirstChild)
	->  tran_next_sibling(FirstChild, Child)
	;   fail
	).

trans_next_sibling(Earlier, Later) :-
	rdf(Earlier, tree:nextChild, Later).
trans_next_sibling(Earlier, Later) :-
	rdf(Earlier, tree:nextChild, Mid),
	trans_next_sibling(Mid, Later).

link_earliest_unused_child(_Parent) :-
	throw(not_implemented).

% why the heck am I using Prolog for something like this.   So goofy.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

namify(Obj) :-
	(   var(Obj)
	->  rdf_bnode(Obj)
	;   true
	).

%%    describe_xml(X, Obj)

%%     atoms as strings or not...???

describe( Atom, Obj ) :-
	atom(Atom),
	Atom \= [],
	Obj = literal(Atom).
describe( [], Obj ) :-
	rdf_global_id(rdf:nil, Obj).
describe( [H|T], Obj ) :-
	namify(Obj),
	describe(H, HObj),
	rdf_assert(Obj, rdf:first, HObj),
	describe(T, TObj),
	rdf_assert(Obj, rdf:rest, TObj).
describe( element(p, XAttrs, Content), Obj ) :-
	namify(Obj),
	rdf_assert(Obj, rdf:type, ht:'Paragraph'),
	describe(Content, CObj),
	rdf_assert(Obj, ht:content, CObj),
	apply_attrs(XAttrs, Obj).
describe( element(span, XAttrs, Content), Obj ) :-
	namify(Obj),
	rdf_assert(Obj, rdf:type, ht:'Span'),
	describe(Content, CObj),
	rdf_assert(Obj, ht:content, CObj),
	apply_attrs(XAttrs, Obj).
	
describe( Other, Obj ) :-
	namify(Obj),
        format(user_error, 'WARNING: unable to describe ~q~n', [Other]).


apply_attrs([], _).
apply_attrs([P=V | Rest], Obj) :-
	format(user_error, 'WARNING: unable to us attribute ~q=~q~n', [P,V]),
	apply_attrs(Rest, Obj).
	
%%  would be nice to be able to run backwards --- recognize

%%  would be nice to match & re-use existing objects, where possible.

