Annotation of libwww/Library/src/HTRDF.c, revision 2.13
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.13 ! vbancrof 4: ** @(#) $Id: HTRDF.c,v 2.12 2000/08/30 13:01:57 barstow Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
2.3 frystyk 40: char * m_sPredicate;
41: char * m_sSubject;
42: char * m_sObject;
2.1 frystyk 43: };
44:
45: struct _HTElement {
2.3 frystyk 46: char * m_sName;
2.1 frystyk 47: HTAssocList * m_attributes;
48: HTList * m_children;
2.13 ! vbancrof 49: HTList * m_lastChild;
2.3 frystyk 50: char * m_sID;
51: char * m_sBagID;
2.1 frystyk 52: HTList * m_vTargets;
53: BOOL m_bDone;
2.3 frystyk 54: char * m_sPrefix;
55: char * m_sContent;
2.1 frystyk 56: };
57:
58: struct _HTRDFParser {
59: HTList * m_namespaceStack;
60: HTList * m_elementStack;
2.8 barstow 61: HTList * m_literalStack;
2.1 frystyk 62: HTElement * m_root;
63: HTList * m_triples;
2.3 frystyk 64: char * m_sSource;
2.1 frystyk 65: HTList * m_vAllNameSpaces;
66:
67: BOOL m_bCreateBags;
68: BOOL m_bFetchSchemas;
69:
70: HTList * m_parseTypeStack;
71: HTList * m_parseElementStack;
2.3 frystyk 72: char * m_sLiteral;
2.1 frystyk 73:
74: HTList * m_vResources;
75: HTList * m_vResolveQueue;
76: HTHashtable * m_hIDtable;
77: int m_iReificationCounter;
78:
79: HTStream * ostream;
80:
81: HTTripleCallback_new * newTripleInstance;
82: void * tripleContext;
83: };
84:
85: /* @@@ Should not be global but controlled by name spaces @@@ */
86: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
87: PRIVATE void * RDFInstanceContext = NULL;
88:
2.3 frystyk 89: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
90: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 91: HTElement *description,
2.3 frystyk 92: char * sTarget,
2.1 frystyk 93: BOOL reificate);
2.3 frystyk 94: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 95: int iCounter);
96: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
97: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 98: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
99: char * sObject, char * sNodeID);
2.4 barstow 100: /* ------------------------------------------------------------------------- */
101:
102: /*
103: ** Append the markup for the given element and its attribute to the
104: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
105: */
106: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
107: {
108: int i=0;
109:
110: if (!rdfp || !name) return;
111:
2.7 barstow 112: StrAllocMCat(&rdfp->m_sLiteral, "<", name, NULL);
2.4 barstow 113:
114: while (atts[i]) {
2.7 barstow 115: StrAllocMCat(&rdfp->m_sLiteral, " ", atts[i], "=\"", atts[i+1], "\"", NULL);
2.4 barstow 116: i+=2;
117: }
118:
119: StrAllocCat(rdfp->m_sLiteral, ">");
120: }
121:
122: /*
123: ** Terminate this element's "Literal" buffer. This buffer is filled in when
124: ** parseType="Literal".
125: */
126: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
127: {
128: if (!rdfp || !name) return;
129:
2.7 barstow 130: StrAllocMCat(&rdfp->m_sLiteral, "</", name, ">", NULL);
2.4 barstow 131: }
2.1 frystyk 132:
133: /* ------------------------------------------------------------------------- */
134:
135: /*
136: ** Searches a whole list of Strings and returns true if the String is found.
137: */
2.3 frystyk 138: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 139: {
140: HTList *cur = list;
2.3 frystyk 141: char * cs = NULL;
142: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 143: if (!strcmp(cs, s)) return YES;
144: }
145: return NO;
146: }
147:
148: /*
149: ** Useful function that Trims a string
150: ** @@@ Should use HTStrip() @@@
151: */
152: PRIVATE char * trim (char *s)
153: {
154: char *p = NULL, *t = NULL;
155: int len = s ? strlen(s) : -1;
156: if (s && len > 0) {
157: StrAllocCopy(t, s);
158: p = &(s[len-1]);
159: while(p!=s) {
160: if (!isspace((int)(*p)))
161: break;
162: p--;
163: }
164: t[(int)(p-s)+1] = '\0';
165: if (isspace((int) t[(int)(p-s)]))
166: t[(int)(p-s)] = '\0';
167: }
168: return t;
169: }
170:
171: /* ------------------------------------------------------------------------- */
172: /* TRIPLE of RDF */
173: /* ------------------------------------------------------------------------- */
174:
2.3 frystyk 175: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 176: {
177: HTTriple * me = NULL;
178: if (p && s && o) {
179: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
180: HT_OUTOFMEM("HTTriple_new");
181: StrAllocCopy(me->m_sPredicate, p);
182: StrAllocCopy(me->m_sSubject, s);
183: StrAllocCopy(me->m_sObject, o);
184: }
185: return me;
186: }
187:
188: PUBLIC BOOL HTTriple_delete (HTTriple * me)
189: {
190: if (me) {
191: HT_FREE(me->m_sPredicate);
192: HT_FREE(me->m_sSubject);
193: HT_FREE(me->m_sObject);
194: HT_FREE(me);
195: return YES;
196: }
197: return NO;
198: }
199:
200: PUBLIC void HTTriple_print (HTTriple * me)
201: {
202: if (me)
203: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
204: me->m_sObject);
205: }
206:
2.3 frystyk 207: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 208: {
209: return me ? me->m_sSubject : NULL;
210: }
211:
2.3 frystyk 212: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 213: {
214: return me ? me->m_sPredicate : NULL;
215: }
216:
2.3 frystyk 217: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 218: {
219: return me ? me->m_sObject : NULL;
220: }
221:
222: /* ------------------------------------------------------------------------- */
223: /* ELEMENT of RDF */
224: /* ------------------------------------------------------------------------- */
225:
2.3 frystyk 226: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 227: {
228: HTElement * me = NULL;
229: if (sName) {
230: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
231: HT_OUTOFMEM("HTElement_new");
232: StrAllocCopy(me->m_sName, sName);
233: me->m_attributes = al ? al : HTAssocList_new();
234: me->m_children = HTList_new();
2.13 ! vbancrof 235: me->m_lastChild = me->m_children;
2.1 frystyk 236: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
237: me->m_vTargets = HTList_new();
238: me->m_bDone = FALSE;
239: }
240: return me;
241: }
242:
243: /*
244: ** Creates a Data Element and saves the data in the Content field.
245: ** Data Element does not have attributes
246: */
2.3 frystyk 247: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 248: {
249: HTElement * me = NULL;
250: if (sContent) {
251: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
252: HT_OUTOFMEM("HTElement_new2");
253: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
254: me->m_attributes = NULL;
255: me->m_children = HTList_new();
2.13 ! vbancrof 256: me->m_lastChild = me->m_children;
2.1 frystyk 257: /*me->m_nodes = HTAssocList_new();*/
258: me->m_vTargets = HTList_new();
259: me->m_bDone = FALSE;
260: StrAllocCopy(me->m_sContent, sContent);
261: }
262: return me;
263: }
264:
2.3 frystyk 265: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 266: {
267: if (me && sContent) {
268: int l = strlen(me->m_sName);
269: StrAllocCat(me->m_sContent, sContent);
270: me->m_sName[l-1]='\0';
271: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
272: return YES;
273: }
274: return NO;
275: }
276:
277: PUBLIC BOOL HTElement_delete (HTElement * me)
278: {
279: if (me) {
280: HT_FREE(me->m_sName);
281: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
282: if (me->m_children) HTList_delete(me->m_children);
283: HT_FREE(me->m_sID);
284: HT_FREE(me->m_sBagID);
285: if (me->m_vTargets) HTList_delete(me->m_vTargets);
286: HT_FREE(me->m_sPrefix);
287: HT_FREE(me->m_sContent);
288: HT_FREE(me);
289: return YES;
290: }
291: return NO;
292: }
293:
294: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
295: {
2.13 ! vbancrof 296: if (me && element) {
! 297: HTList *lastChild;
! 298: if ((lastChild = HTList_addList(me->m_lastChild, element))) {
! 299: me->m_lastChild = lastChild;
! 300: return YES;
! 301: }
! 302: }
! 303: return NO;
2.1 frystyk 304: }
305:
2.3 frystyk 306: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 307: {
308: return (me && sName && sValue) ?
309: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
310: }
311:
2.3 frystyk 312: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 313: {
314: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
315: }
316:
2.3 frystyk 317: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 318: {
319: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
320: }
321:
2.3 frystyk 322: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 323: {
2.3 frystyk 324: char * fValue = NULL;
325: char * fName = NULL;
2.1 frystyk 326: if (me && sNamespace && sName) {
327: StrAllocMCopy(&fName, sNamespace, sName, NULL);
328: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
329: HT_FREE(fName);
330: }
331: return fValue;
332: }
333:
334: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
335: {
336: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
337: }
338:
339: PUBLIC HTElement * HTElement_target (HTElement * me)
340: {
341: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
342: }
343:
344: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
345: {
346: return (me && me->m_sContent) ? YES : NO;
347: }
348:
349: /* ------------------------------------------------------------------------- */
350: /* EXPAT HANDLERS */
351: /* ------------------------------------------------------------------------- */
352:
353: /*
354: * Called for each new element.
355: * Build up the document tree using an element stack
356: */
357: PRIVATE void XML_startElement (void * userData,
358: const XML_Char * name, const XML_Char ** atts)
359: {
360: HTRDF * rdfp = (HTRDF *) userData;
361: HTAssocList * namespaces = HTAssocList_new();
362: HTAssocList * newAL = HTAssocList_new();
363: int i = 0;
2.5 barstow 364:
2.1 frystyk 365: /**
366: * The following loop tries to identify special xmlns prefix
367: * attributes and update the namespace stack accordingly.
368: * While doing all this, it builds another AttributeList instance
369: * which will hold the expanded names of the attributes
370: * (I think this approach is only useful for RDF which uses
371: * attributes as an abbreviated syntax for element names)
372: */
373: if (atts) {
374: while (atts[i]) {
2.3 frystyk 375: char * aName = (char * ) atts[i];
2.1 frystyk 376: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 377: char * aValue = (char *) atts[i+1];
2.1 frystyk 378: int len = aValue ? strlen(aValue) : -1;
379: if (len == 0 && !rdfp->m_sSource)
380: aValue = rdfp->m_sSource;
381: HTAssocList_addObject(namespaces, aName, aValue);
382: /* save all non-RDF schema addresses */
383: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
384: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
385: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 386: char * nname = NULL;
2.1 frystyk 387: StrAllocCopy(nname, aValue);
388: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
389: }
390:
391: /* Special case: Don't save document's own address */
392: if (rdfp->m_sSource &&
393: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 394: char * nname = NULL;
2.1 frystyk 395: StrAllocCopy(nname, aValue);
396: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
397: }
398: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 399: char * aValue = (char *) atts[i+1];
400: char * nName = NULL;
2.1 frystyk 401: int len = aValue ? strlen(aValue) : -1;
402: if (len == 0 && !rdfp->m_sSource)
403: aValue = rdfp->m_sSource;
404: StrAllocCopy(nName, &(aName[6]));
405: HTAssocList_addObject(namespaces, nName, aValue);
406: HT_FREE(nName);
407:
408: /* Save all non-RDF schema addresses */
409: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
410: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
411: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 412: char * nname = NULL;
2.1 frystyk 413: StrAllocCopy(nname, aValue);
414: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
415: }
416:
417: /* Special case: Don't save document's own address */
418: if (rdfp->m_sSource &&
419: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 420: char * nname = NULL;
2.1 frystyk 421: StrAllocCopy(nname, aValue);
422: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
423: }
424: }
425: i+=2;
426: } /* end of while */
427: } /* end of if */
428:
429: /*
430: ** Place new namespace declarations into the stack
431: ** (Yes, I could optimize this a bit, not it wastes space
432: ** if there are no xmlns definitions)
433: */
434: HTList_addObject(rdfp->m_namespaceStack, namespaces);
435:
436: /*
437: ** Figure out the prefix part if it exists and
438: ** determine the namespace of the element accordingly
439: */
440: {
2.3 frystyk 441: char * sNamespace = NULL;
442: char * sElementName = NULL;
443: char * sPrefix2 = NULL;
2.1 frystyk 444: HTElement *newElement = NULL;
445: char *pindex = strchr(name, ':');
446: int ix = pindex ? (int) (pindex - name) : -1 ;
447: if (ix > 0) {
448: if (!(sPrefix2 = HT_MALLOC(ix+1)))
449: HT_OUTOFMEM("XML_startELement");
450: strncpy(sPrefix2, name, ix);
451: sPrefix2[ix]='\0';
452: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
453: StrAllocCopy(sElementName, &(name[ix+1]));
454: HT_FREE(sPrefix2);
455: } else {
456: sNamespace = HTRDF_namespace(rdfp, "xmlns");
457: StrAllocCopy(sElementName, name);
458: }
459:
460: /*
461: * Finally look for attributes other than the special xmlns,
462: * expand them, and place to the new Attribute List
463: */
464: i = 0;
465: if (atts) {
466: while (atts[i]) {
2.3 frystyk 467: char * aName = (char *) atts[i];
468: char * sAttributeNamespace = NULL;
2.1 frystyk 469: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 470: char * aValue = (char *) atts[i+1];
471: char * sPrefix = NULL;
2.1 frystyk 472: /* Expat does not have type for attributes */
473: pindex = strchr(aName, ':');
474: ix = pindex ? (int) (pindex - aName) : -1;
475: if (ix > 0) {
476: if (!(sPrefix = HT_MALLOC(ix+1)))
477: HT_OUTOFMEM("XML_startELement");
478: strncpy(sPrefix, aName, ix);
479: sPrefix[ix] = '\0';
480: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
481: aName = &(aName[ix+1]);
482: HT_FREE(sPrefix);
483: } else {
484: if (!sNamespace)
485: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
486: else
487: StrAllocCopy(sAttributeNamespace, sNamespace);
488: }
489:
490: if (HTRDF_parseLiteral(rdfp)) {
491: if (!sPrefix) {
492: if (!(sPrefix = HT_MALLOC(8)))
493: HT_OUTOFMEM("XML_startELement");
494: sprintf(sPrefix, "gen%d\n", i);
495: }
496: {
2.3 frystyk 497: char * fName = NULL;
2.1 frystyk 498: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
499: HTAssocList_addObject(newAL, fName, aValue);
500: HT_FREE(fName);
501: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
502: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
503: HT_FREE(fName);
504: }
505: } else {
2.3 frystyk 506: char * fName = NULL;
2.1 frystyk 507: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
508: HTAssocList_addObject(newAL, fName, aValue);
509: HT_FREE(fName);
510: }
511:
512: HT_FREE(sAttributeNamespace);
513:
514: /*
515: ** This call will try to see if the user is using
516: ** RDF look-alike elements from another namespace
517: **
518: ** Note: you can remove the call if you wish
519: */
520: #if 0
521: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
522: #endif
523:
524: } /* end of if */
525: i+=2;
526: } /* end of while */
527: } /* end of if atts */
528:
529: /*
530: * If we have parseType="Literal" set earlier, this element
531: * needs some additional attributes to make it stand-alone
532: * piece of XML
533: */
534: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 535: char * fName = NULL;
2.4 barstow 536:
2.1 frystyk 537: if (!sPrefix2) {
538: if (sNamespace)
539: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
540: StrAllocMCopy(&fName, "gen", sElementName, NULL);
541: newElement = HTElement_new(fName, newAL);
542: StrAllocCopy(newElement->m_sPrefix, "gen");
543: HT_FREE(fName);
544: } else {
2.3 frystyk 545: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 546: if (sAttributeNamespace) {
547: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
548: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
549: HT_FREE(fName);
550: }
551: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
552: newElement = HTElement_new(fName, newAL);
553: HT_FREE(fName);
554: }
555: } else {
2.3 frystyk 556: char * fName = NULL;
2.1 frystyk 557: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
558: newElement = HTElement_new(fName, newAL);
559: HT_FREE(fName);
560: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
561: }
562: HT_FREE(sElementName);
563: HT_FREE(sNamespace);
564: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 565:
2.1 frystyk 566: /*
567: ** Check parseType
568: */
569: {
2.3 frystyk 570: char * fName = NULL;
571: char * sLiteralValue = NULL;
2.1 frystyk 572: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
573: sLiteralValue = HTElement_getAttribute(newElement, fName);
574: HT_FREE(fName);
575: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
576: /**
577: * This is the management of the element where
578: * parseType="Literal" appears
579: *
580: * You should notice RDF V1.0 conforming implementations
581: * must treat other values than Literal and Resource as
582: * Literal. This is why the condition is !equals("Resource")
583: */
584:
585: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
586: if (!HTList_isEmpty(rdfp->m_elementStack)) {
587: HTElement *e = (HTElement *)
588: HTList_lastObject(rdfp->m_elementStack);
589: HTElement_addChild(e, newElement);
590: }
591: HTList_addObject(rdfp->m_elementStack, newElement);
592: HTList_addObject(rdfp->m_parseElementStack, newElement);
593: HT_FREE(rdfp->m_sLiteral);
594: StrAllocCopy(rdfp->m_sLiteral, "");
595: return;
596: }
597:
598: if (HTRDF_parseLiteral(rdfp)) {
599: /*
600: * This is the management of any element nested within
601: * a parseType="Literal" declaration
602: */
2.4 barstow 603: /* Add the element to the parser's literal buffer */
604: addMarkupStart (rdfp, name, atts);
605:
2.8 barstow 606: /* Add this literal element to the literal stack */
607: if (!HTList_isEmpty(rdfp->m_literalStack)) {
608: HTElement *e = (HTElement *)
609: HTList_lastObject(rdfp->m_literalStack);
610: HTElement_addChild(e, newElement);
611: }
612: HTList_addObject(rdfp->m_literalStack, newElement);
613:
2.1 frystyk 614: HTList_addObject(rdfp->m_elementStack, newElement);
615: return;
616: }
617:
618: /*
619: ** Update the containment hierarchy with the stack.
620: */
621: if (!HTList_isEmpty(rdfp->m_elementStack)) {
622: HTElement *e = (HTElement *)
623: HTList_lastObject(rdfp->m_elementStack);
624: HTElement_addChild(e, newElement);
625: }
626:
627: /*
628: ** Place the new element into the stack
629: */
630: HTList_addObject(rdfp->m_elementStack, newElement);
631: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
632: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
633: HTList_addObject(rdfp->m_parseElementStack, newElement);
634: HT_FREE(rdfp->m_sLiteral);
635: StrAllocCopy(rdfp->m_sLiteral, "");
636:
637: /*
638: * Since parseType="Resource" implies the following
639: * production must match Description, let's create
640: * an additional Description node here in the document tree.
641: */
642: {
2.3 frystyk 643: char * fName = NULL;
2.1 frystyk 644: HTElement *desc = NULL;
645: HTAssocList * al = HTAssocList_new ();
646: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
647: desc = HTElement_new(fName, al);
648: HT_FREE(fName);
649: if (!HTList_isEmpty(rdfp->m_elementStack)) {
650: HTElement *e = (HTElement *)
651: HTList_lastObject(rdfp->m_elementStack);
652: HTElement_addChild(e, desc);
653: }
654: HTList_addObject(rdfp->m_elementStack, desc);
655: }
656: } /* end of if */
657: } /* end of block */
658: } /* end of block */
659: }
660:
661: /*
662: * For each end of an element scope step back in the
663: * element and namespace stack
664: */
665: PRIVATE void XML_endElement (void * userData,
666: const XML_Char * name)
667: {
668: HTRDF * rdfp = (HTRDF *) userData;
669: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
670: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
671: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
672: if (namespaces) HTAssocList_delete(namespaces);
673:
674: if (bParseLiteral) {
675: HTElement *pe = (HTElement *)
676: HTList_lastObject(rdfp->m_parseElementStack);
677: if (pe != rdfp->m_root) {
2.4 barstow 678: /* Terminate the literal */
679: addMarkupEnd (rdfp, name);
2.1 frystyk 680: } else {
681: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
682: HTElement_addChild(pe, de);
2.4 barstow 683:
2.1 frystyk 684: HT_FREE(rdfp->m_sLiteral);
685: StrAllocCopy(rdfp->m_sLiteral, "");
686: HTList_removeLastObject(rdfp->m_parseElementStack);
687: HTList_removeLastObject(rdfp->m_parseTypeStack);
688: }
689: } else if (HTRDF_parseResource(rdfp)) {
690: /**
691: * If we are doing parseType="Resource"
692: * we need to explore whether the next element in
693: * the stack is the closing element in which case
694: * we remove it as well (remember, there's an
695: * extra Description element to be removed)
696: */
697: if (!HTList_isEmpty(rdfp->m_elementStack)) {
698: HTElement *pe = (HTElement *)
699: HTList_lastObject(rdfp->m_parseElementStack);
700: HTElement *e = (HTElement *)
701: HTList_lastObject(rdfp->m_elementStack);
702: if (pe == e) {
703: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
704: HTList_removeLastObject(rdfp->m_parseElementStack);
705: HTList_removeLastObject(rdfp->m_parseTypeStack);
706: }
707: }
708: }
709: }
710:
711: PRIVATE void XML_characterData (void * userData,
712: const XML_Char * s, int len)
713: {
714: /*
715: * Place all characters as Data instance to the containment
716: * hierarchy with the help of the stack.
717: */
718: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 719: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
720: char * tstr = NULL;
721: char * str = NULL;
722: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 723: HT_OUTOFMEM("XML_characterData");
724: strncpy(str, s, len);
725: str[len]='\0';
726: if (HTRDF_parseLiteral(rdfp)) {
727: StrAllocCat(rdfp->m_sLiteral, str);
728: HT_FREE(str);
729: return;
730: }
731: /* JUST FOR EXPAT */
732: {
733: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
734: if (lch && HTElement_instanceOfData(lch)) {
735: HTElement_addData(lch, str);
736: HT_FREE(str);
737: return;
738: }
739: }
740: /*
741: * Warning: this is not correct procedure according to XML spec.
742: * All whitespace matters!
743: */
744: tstr = trim(str);
745: if (strlen(tstr) > 0) {
746: HTElement * de = HTElement_new2(tstr);
747: HTElement_addChild(e, de);
748: }
749: HT_FREE(str); HT_FREE(tstr);
750: }
751:
752: PRIVATE void XML_processingInstruction (void * userData,
753: const XML_Char * target,
754: const XML_Char * data)
755: {
756: return;
757: }
758:
759: /*
760: ** This is called for any characters in the XML document for
761: ** which there is no applicable handler. This includes both
762: ** characters that are part of markup which is of a kind that is
763: ** not reported (comments, markup declarations), or characters
764: ** that are part of a construct which could be reported but
765: ** for which no handler has been supplied. The characters are passed
766: ** exactly as they were in the XML document except that
767: ** they will be encoded in UTF-8. Line boundaries are not normalized.
768: ** Note that a byte order mark character is not passed to the default handler.
769: ** If a default handler is set, internal entity references
770: ** are not expanded. There are no guarantees about
771: ** how characters are divided between calls to the default handler:
772: ** for example, a comment might be split between multiple calls.
773: */
774: PRIVATE void XML_default (void * userData,
775: const XML_Char * s, int len)
776: {
777: return;
778: }
779:
780: /*
781: ** This is called for a declaration of an unparsed (NDATA)
782: ** entity. The base argument is whatever was set by XML_SetBase.
783: ** The entityName, systemId and notationName arguments will never be null.
784: ** The other arguments may be.
785: */
786: PRIVATE void XML_unparsedEntityDecl (void * userData,
787: const XML_Char * entityName,
788: const XML_Char * base,
789: const XML_Char * systemId,
790: const XML_Char * publicId,
791: const XML_Char * notationName)
792: {
793: return;
794: }
795:
796: /*
797: ** This is called for a declaration of notation.
798: ** The base argument is whatever was set by XML_SetBase.
799: ** The notationName will never be null. The other arguments can be.
800: */
801: PRIVATE void XML_notationDecl (void * userData,
802: const XML_Char * notationName,
803: const XML_Char * base,
804: const XML_Char * systemId,
805: const XML_Char * publicId)
806: {
807: return;
808: }
809:
810: /*
811: ** This is called for a reference to an external parsed general entity.
812: ** The referenced entity is not automatically parsed.
813: ** The application can parse it immediately or later using
814: ** XML_ExternalEntityParserCreate.
815: ** The parser argument is the parser parsing the entity containing the reference;
816: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
817: ** The systemId argument is the system identifier as specified in the entity
818: ** declaration; it will not be null.
819: ** The base argument is the system identifier that should be used as the base for
820: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
821: ** it may be null.
822: ** The publicId argument is the public identifier as specified in the entity declaration,
823: ** or null if none was specified; the whitespace in the public identifier
824: ** will have been normalized as required by the XML spec.
825: ** The openEntityNames argument is a space-separated list of the names of the entities
826: ** that are open for the parse of this entity (including the name of the referenced
827: ** entity); this can be passed as the openEntityNames argument to
828: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
829: ** returns, so if the referenced entity is to be parsed later, it must be copied.
830: ** The handler should return 0 if processing should not continue because of
831: ** a fatal error in the handling of the external entity.
832: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
833: ** error.
834: ** Note that unlike other handlers the first argument is the parser, not userData.
835: */
836: PRIVATE int XML_externalEntityRef (XML_Parser parser,
837: const XML_Char * openEntityNames,
838: const XML_Char * base,
839: const XML_Char * systemId,
840: const XML_Char * publicId)
841: {
842: return 0;
843: }
844:
845: /*
846: ** This is called for an encoding that is unknown to the parser.
847: ** The encodingHandlerData argument is that which was passed as the
848: ** second argument to XML_SetUnknownEncodingHandler.
849: ** The name argument gives the name of the encoding as specified in
850: ** the encoding declaration.
851: ** If the callback can provide information about the encoding,
852: ** it must fill in the XML_Encoding structure, and return 1.
853: ** Otherwise it must return 0.
854: ** If info does not describe a suitable encoding,
855: ** then the parser will return an XML_UNKNOWN_ENCODING error.
856: */
857: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
858: const XML_Char * name,
859: XML_Encoding * info)
860: {
861: return 0;
862: }
863:
864: /* ------------------------------------------------------------------------- */
865: /* HTXML STREAM HANDLERS */
866: /* ------------------------------------------------------------------------- */
867:
868: PRIVATE void rdf_setHandlers (XML_Parser me)
869: {
870: XML_SetElementHandler(me, XML_startElement, XML_endElement);
871: XML_SetCharacterDataHandler(me, XML_characterData);
872: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
873: XML_SetDefaultHandler(me, XML_default);
874: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
875: XML_SetNotationDeclHandler(me, XML_notationDecl);
876: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
877: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
878: }
879:
880: PRIVATE void rdf_newInstance (HTStream * me,
881: HTRequest * request,
882: HTFormat target_format,
883: HTStream * target_stream,
884: XML_Parser xmlparser,
885: void * context)
886: {
887: if (me && xmlparser) {
888: rdf_setHandlers(xmlparser);
889: XML_SetUserData(xmlparser, context);
890:
891: /* Call the new RDF instance callback (if any) with this new stream */
892: if (RDFInstance)
893: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
894: }
895: }
896:
897: /* ------------------------------------------------------------------------- */
898: /* RDF PARSER */
899: /* ------------------------------------------------------------------------- */
900:
901: PRIVATE void visit_element_children (HTList *children)
902: {
903: HTElement *child = NULL;
904: HTList *cur = children;
905: while ((child = (HTElement *) HTList_nextObject(cur))) {
906: if (!HTList_isEmpty(child->m_children))
907: visit_element_children(child->m_children);
908: HTElement_delete(child);
909: }
910: }
911:
912: PRIVATE void delete_elements (HTRDF * me)
913: {
914: if (me && me->m_root) {
915: HTElement *r = me->m_root;
916: if (!HTList_isEmpty(r->m_children))
917: visit_element_children(r->m_children);
918: HTElement_delete(r);
919: }
920: }
921:
2.8 barstow 922: PRIVATE void delete_literal_elements (HTRDF * me)
923: {
924: if (me && me->m_literalStack) {
925: HTList *cur = me->m_literalStack;
926: HTElement *e = NULL;
927: while ((e = (HTElement *) HTList_nextObject(cur))) {
928: HTElement_delete(e);
929: }
930: HTList_delete(me->m_literalStack);
931: }
932: }
933:
934:
2.1 frystyk 935: PUBLIC HTRDF * HTRDF_new (void)
936: {
937: HTRDF * me;
938: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
939: HT_OUTOFMEM("HTRDF_new");
940: me->m_namespaceStack = HTList_new();
941: me->m_elementStack = HTList_new();
2.8 barstow 942: me->m_literalStack = HTList_new();
2.1 frystyk 943:
944: me->m_triples = HTList_new();
945: me->m_vAllNameSpaces = HTList_new();
946:
947: me->m_bCreateBags = FALSE;
948: me->m_bFetchSchemas = FALSE;
949:
950: me->m_parseTypeStack = HTList_new();
951: me->m_parseElementStack = HTList_new();
952:
953: me->m_vResources = HTList_new();
954: me->m_vResolveQueue = HTList_new();
955: me->m_hIDtable = HTHashtable_new(0);
956:
957: return me;
958: }
959:
960: PUBLIC BOOL HTRDF_delete (HTRDF * me)
961: {
962: if (me) {
963: delete_elements(me);
964: if (me->m_namespaceStack) {
965: HTList *cur = me->m_namespaceStack;
966: HTAssocList *alist = NULL;
967: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
968: HTAssocList_delete(alist);
969: }
970: HTList_delete(me->m_namespaceStack);
971: }
972: if (me->m_elementStack) HTList_delete(me->m_elementStack);
2.8 barstow 973:
974: delete_literal_elements(me);
975:
2.1 frystyk 976: me->m_root = NULL;
977: if (me->m_triples) {
978: HTList *cur = me->m_triples;
979: HTTriple *t = NULL;
980: while ((t = (HTTriple *) HTList_nextObject(cur))) {
981: /*HTTriple_print(t);*/
982: HTTriple_delete(t);
983: }
984: HTList_delete(me->m_triples);
985: }
986: HT_FREE(me->m_sSource);
987: if (me->m_vAllNameSpaces) {
988: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 989: char * s = NULL;
990: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 991: HT_FREE(s);
992: }
993: HTList_delete(me->m_vAllNameSpaces);
994: }
995: if (me->m_parseTypeStack)
996: HTList_delete(me->m_parseTypeStack);
997: if (me->m_parseElementStack)
998: HTList_delete(me->m_parseElementStack);
999: if (me->m_vResources)
1000: HTList_delete(me->m_vResources);
1001: if (me->m_vResolveQueue)
1002: HTList_delete(me->m_vResolveQueue);
1003: if (me->m_hIDtable)
1004: HTHashtable_delete(me->m_hIDtable);
1005: HT_FREE(me->m_sLiteral);
1006: HT_FREE(me);
1007: return YES;
1008: }
1009: return NO;
1010: }
1011:
1012: /*
1013: * setSource method saves the name of the source document for
1014: * later inspection if needed
1015: */
2.3 frystyk 1016: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 1017: {
1018: if (me && source) {
1019: StrAllocCopy (me->m_sSource, source);
1020: return YES;
1021: }
1022: return NO;
1023: }
1024:
1025: /*
1026: * Go through the m_vResolveQueue and assign
1027: * direct object reference for each symbolic reference
1028: */
1029: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
1030: {
1031: if (me) {
1032: HTList * cur = me->m_vResolveQueue;
1033: HTElement *e = NULL;
1034: HTElement *e2 = NULL;
1035: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1036: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1037: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1038: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1039: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1040: "aboutEachPrefix");
1041: if (sAbout) {
1042: if (sAbout[0]=='#')
1043: sAbout = &(sAbout[1]);
1044: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1045: if (e2)
1046: HTElement_addTarget(e, e2);
1047: else
1048: HTPrint("Unresolved internal reference %s\n", sAbout);
1049: }
1050: if (sResource) {
1051: if (sResource[0]=='#')
1052: sResource = &(sResource[1]);
1053: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1054: if (e2)
1055: HTElement_addTarget(e, e2);
1056: }
1057:
1058: if (sAboutEach) {
1059: sAboutEach = &(sAboutEach[1]);
1060: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1061: if (e2)
1062: HTElement_addTarget(e, e2);
1063: }
1064: if (sAboutEachPrefix) {
1065: HTList * curr = me->m_vResources;
1066: HTElement *ele = NULL;
1067: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1068: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1069: if (sA &&
1070: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1071: HTElement_addTarget(e, ele);
1072: }
1073: }
1074: }
1075: }
1076: HTList_delete(me->m_vResources);
1077: me->m_vResources = HTList_new();
1078: return YES;
1079: }
1080: return NO;
1081: }
1082:
1083: /**
1084: * Check if the element e is from the namespace
1085: * of the RDF schema by comparing only the beginning of
1086: * the expanded element name with the canonical RDFMS
1087: * URI
1088: */
1089: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1090: {
1091: return (me && e && e->m_sName) ?
1092: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1093: }
1094:
1095: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1096: {
1097: if (me && e && e->m_sName) {
1098: int len = strlen(e->m_sName);
1099: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1100: }
1101: return NO;
1102: }
1103:
1104: /**
1105: * Is the element a Description
1106: */
1107: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1108: {
1109: if (me && e && e->m_sName) {
1110: int len = strlen(e->m_sName);
1111: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1112: }
1113: return NO;
1114: }
1115:
1116: /*
1117: * Is the element a ListItem
1118: */
1119: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1120: {
1121: if (me && e && e->m_sName) {
1122: int len = strlen(e->m_sName);
1123: if (len > 2)
1124: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1125: }
1126: return NO;
1127: }
1128:
1129: /**
1130: * Is the element a Sequence
1131: */
1132: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1133: {
1134: if (me && e && e->m_sName) {
1135: int len = strlen(e->m_sName);
1136: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1137: }
1138: return NO;
1139: }
1140:
1141: /*
1142: * Is the element an Alternative
1143: */
1144: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1145: {
1146: if (me && e && e->m_sName) {
1147: int len = strlen(e->m_sName);
1148: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1149: }
1150: return NO;
1151: }
1152:
1153: /*
1154: * Is the element a Bag
1155: */
1156: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1157: {
1158: if (me && e && e->m_sName) {
1159: int len = strlen(e->m_sName);
1160: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1161: }
1162: return NO;
1163: }
1164:
1165: /**
1166: * Is the element a Container
1167: */
1168: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1169: {
1170: return (HTRDF_isSequence(me, e) ||
1171: HTRDF_isAlternative(me, e) ||
1172: HTRDF_isBag(me, e));
1173: }
1174:
1175: /*
1176: * This method matches all properties but those from RDF namespace
1177: */
1178: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1179: {
1180: if (me && e && e->m_sName) {
1181: int len = strlen(e->m_sName);
2.3 frystyk 1182: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1183: "value", "type", "Property", "Statement"};
1184: int i;
1185: if (HTRDF_isRDF(me, e)) {
1186: for(i = 0; i< 7; i++) {
1187: int ntp = strlen(tp[i]);
1188: if (len > ntp) {
1189: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1190: return YES;
1191: }
1192: }
1193: return NO;
1194: }
1195: if (len > 0) return YES;
1196: }
1197: return NO;
1198: }
1199:
2.3 frystyk 1200: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1201: int iCounter)
1202: {
1203: /*
1204: * Two different cases for
1205: * 1. LI element without content (resource available)
1206: * 2. LI element with content (resource unavailable)
1207: */
2.3 frystyk 1208: char * cName = NULL;
1209: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1210: char sdig[20];
1211: sprintf(sdig, "_%d", iCounter);
1212: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1213: if (sResource) {
1214: HTRDF_addTriple(me, cName, sID, sResource);
1215: /* validity checking */
1216: if (!HTList_isEmpty(listitem->m_children)){
1217: HTPrint("Listitem with resource attribute can not have child nodes");
1218: }
1219: StrAllocCopy(listitem->m_sID, sResource);
1220: } else {
1221: HTList *cur = listitem->m_children;
1222: HTElement *n = NULL;
1223: while ((n = (HTElement *) HTList_nextObject(cur))) {
1224: if (HTElement_instanceOfData(n)) {
1225: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1226: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1227: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1228: HTRDF_addTriple(me, cName, sID, sNodeID);
1229: StrAllocCopy(listitem->m_sID, sNodeID);
1230: } else if (HTRDF_isListItem(me, n)) {
1231: HTPrint("Can not nest list item inside list item\n");
1232: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1233: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1234: HTRDF_addTriple(me, cName, sID, n->m_sID);
1235: HT_FREE(c);
1236: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1237: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1238: HTRDF_addTriple(me, cName, sID, sNodeID);
1239: HT_FREE(sNodeID);
1240: }
1241: }
1242: }
1243: HT_FREE(cName);
1244: }
1245:
2.3 frystyk 1246: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1247: {
2.3 frystyk 1248: char * sID = NULL;
1249: char * tName = NULL;
1250: char * aName = NULL;
1251: char * sName = NULL;
1252: char * bName = NULL;
2.1 frystyk 1253: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1254: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1255: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1256: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1257:
1258: StrAllocCopy(sID, n->m_sID);
1259: if (!sID)
1260: sID = HTRDF_newReificationID(me);
1261: /*
1262: * Do the instantiation only once
1263: */
1264: if (!n->m_bDone) {
1265: if (HTRDF_isSequence(me, n)) {
1266: HTRDF_addTriple(me, tName, sID, sName);
1267: } else if (HTRDF_isAlternative(me, n)) {
1268: HTRDF_addTriple(me, tName, sID, aName);
1269: } else if (HTRDF_isBag(me, n)) {
1270: HTRDF_addTriple(me, tName, sID, bName);
1271: }
1272: n->m_bDone = YES;
1273: }
1274: HTRDF_expandAttributes(me, n, n);
1275:
1276: {
1277: HTList *cur = n->m_children;
1278: HTElement *n2 = NULL;
1279: int iCounter = 1;
1280: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1281: HTPrint("An RDF:Alt container must have at least one list item\n");
1282: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1283: if (HTRDF_isListItem(me, n2)) {
1284: HTRDF_processListItem(me, sID, n2, iCounter);
1285: iCounter++;
1286: } else {
1287: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1288: }
1289: }
1290: } /* end of block */
1291:
1292: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1293:
1294: return sID;
1295: }
1296: /*
1297: * Manage the typedNode production in the RDF grammar.
1298: *
1299: */
2.3 frystyk 1300: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1301: {
2.3 frystyk 1302: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1303: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1304: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1305: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1306: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1307: "aboutEachPrefix");*/
2.3 frystyk 1308: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1309: char * iName = NULL;
1310: char * bName = NULL;
1311: char * tName = NULL;
2.1 frystyk 1312:
2.3 frystyk 1313: char * sObject = NULL;
2.1 frystyk 1314:
1315: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1316: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1317: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1318:
1319: if (resource)
1320: HTPrint("resource attribute not allowed for a typedNode %s\n",
1321: typedNode->m_sName);
1322:
1323: /*
1324: * We are going to manage this typedNode using the processDescription
1325: * routine later on. Before that, place all properties encoded as
1326: * attributes to separate child nodes.
1327: */
1328: {
1329: HTAssoc * assoc;
1330: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1331: char * sAttribute = NULL;
1332: char * tValue = NULL;
1333: char * sValue = NULL;
2.1 frystyk 1334: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1335: sAttribute = HTAssoc_name(assoc);
1336: sValue = HTAssoc_value(assoc);
1337: tValue = trim(sValue);
1338: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1339: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1340: if (strlen(tValue) > 0) {
1341: HTAssocList *newAL = HTAssocList_new();
1342: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1343: HTElement *d = NULL;
1344: HTElement_addAttribute(newPredicate, iName,
1345: sAbout ? sAbout : sID);
1346: HTElement_addAttribute(newPredicate, bName, sBagID);
1347: d = HTElement_new2(tValue);
1348: HTElement_addChild(newPredicate, d);
1349: HTElement_addChild(typedNode, newPredicate);
1350: }
1351: }
1352: HT_FREE(tValue);
1353: } /* end of while */
1354: }/* end of block */
1355: {
1356: if (sAbout)
1357: StrAllocCopy(sObject, sAbout);
1358: else if (sID)
1359: StrAllocCopy(sObject, sID);
1360: else
1361: sObject = HTRDF_newReificationID(me);
1362: StrAllocCopy(typedNode->m_sID, sObject);
1363:
1364: /* special case: should the typedNode have aboutEach attribute,
1365: ** the type predicate should distribute to pointed
1366: ** collection also -> create a child node to the typedNode
1367: */
1368: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1369: HTAssocList *newAL = HTAssocList_new();
1370: HTElement *newPredicate = HTElement_new(tName, newAL);
1371: HTElement *d = HTElement_new2(typedNode->m_sName);
1372: HTElement_addChild(newPredicate, d);
1373: HTElement_addChild(typedNode, newPredicate);
1374: } else {
1375: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1376: }
1377: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1378: }/* end of block */
1379:
1380: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1381:
1382: return sObject;
1383: }
1384:
1385: /*
1386: * Start processing an RDF/XML document instance from the
1387: * root element rdf.
1388: *
1389: */
1390: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1391: {
1392: if (me && e) {
1393: HTList *cur = e->m_children;
1394: HTElement *ele = NULL;
1395: if (HTList_isEmpty(e->m_children)) {
1396: HTPrint("Empty RDF Element\n");
1397: return NO;
1398: }
1399: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1400: if (HTRDF_isDescription(me, ele)) {
1401: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1402: me->m_bCreateBags);
1403: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1404: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1405: HT_FREE(c);
1406: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1407: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1408: HT_FREE(t);
1409: }
1410: }
1411: return YES;
1412: }
1413: return NO;
1414: }
1415:
1416: /*
1417: * processPredicate handles all elements not defined as special
1418: * RDF elements.
1419: *
1420: * predicate The predicate element itself
1421: * description Context for the predicate
1422: * sTarget The target resource
1423: * reificate Should this predicate be reificated
1424: *
1425: * return the new ID which can be used to identify the predicate
1426: *
1427: */
2.3 frystyk 1428: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1429: HTElement * predicate,
1430: HTElement * description,
2.3 frystyk 1431: char * sTarget,
2.1 frystyk 1432: BOOL reificate)
1433: {
2.3 frystyk 1434: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1435: char * nsStatementID = NULL;
1436: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1437: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1438:
1439: /*
1440: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1441: ** or xmlns... -> generate new triples according to the spec.
1442: ** (See end of Section 6)
1443: */
1444: {
1445: HTElement * place_holder = NULL;
1446: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1447: char * fName = NULL;
1448: char * aName = NULL;
2.1 frystyk 1449:
1450: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1451: place_holder = HTElement_new(fName, newAL);
1452: HT_FREE(fName);
1453:
1454: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1455:
1456: /* error checking */
1457: if (!HTList_isEmpty(predicate->m_children)) {
1458: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1459: HTElement_delete(place_holder);
1460: return NULL;
1461: }
1462: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1463:
1464: /* determine the 'about' part for the new statements */
1465: if (sStatementID) {
1466: HTElement *data = HTElement_new2(sStatementID);
1467: HTElement_addAttribute(place_holder, aName, sStatementID);
1468:
1469: /* hack: make rdf:ID the value of the predicate */
1470: HTElement_addChild(predicate, data);
1471: } else if (sResource) {
1472: HTElement_addAttribute(place_holder, aName, sResource);
1473: } else {
1474: nsStatementID = HTRDF_newReificationID(me);
1475: HTElement_addAttribute(place_holder, aName, nsStatementID);
1476: HT_FREE(nsStatementID);
1477: }
1478: HT_FREE(aName);
1479:
1480: if (sBagID) {
1481: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1482: HTElement_addAttribute(place_holder, fName, sBagID);
1483: HT_FREE(fName);
1484: StrAllocCopy(place_holder->m_sBagID, sBagID);
1485: }
1486: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1487: } else {
1488:
1489: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1490: HTElement_delete(place_holder);
1491: }
1492: }
1493:
1494: /*
1495: ** Tricky part: if the resource attribute is present for a predicate
1496: ** AND there are no children, the value of the predicate is either
1497: ** 1. the URI in the resource attribute OR
1498: ** 2. the node ID of the resolved #resource attribute
1499: */
1500: if (sResource && HTList_isEmpty(predicate->m_children)) {
1501: if (!HTElement_target(predicate)) {
1502: if (reificate) {
1503: HT_FREE(nsStatementID);
1504: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1505: sTarget, sResource,
1506: predicate->m_sID);
1507: StrAllocCopy(predicate->m_sID, nsStatementID);
1508: } else {
1509: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1510: }
1511: } else {
1512: HTElement *target = HTElement_target(predicate);
1513: if (reificate) {
1514: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1515: sTarget,
1516: target->m_sID,
1517: predicate->m_sID);
1518: StrAllocCopy(predicate->m_sID, nsStatementID);
1519: } else {
1520: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1521: }
1522: }
2.7 barstow 1523: if (nsStatementID && predicate->m_sID)
1524: StrAllocCopy(nsStatementID, predicate->m_sID);
2.1 frystyk 1525: return nsStatementID;
1526: }
1527:
1528: /*
1529: ** Does this predicate make a reference somewhere using the
1530: ** sResource attribute
1531: */
1532: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1533: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1534: HTElement_target(predicate),
1535: YES, NO, NO);
1536: if (reificate) {
1537: HT_FREE(nsStatementID);
1538: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1539: sTarget, dStatementID,
1540: predicate->m_sID);
1541: StrAllocCopy(predicate->m_sID, nsStatementID);
1542: } else {
1543: StrAllocCopy(nsStatementID, dStatementID);
1544: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1545: }
1546: return nsStatementID;
1547: }
1548:
1549: /*
1550: ** Before looping through the children, let's check
1551: ** if there are any. If not, the value of the predicate is
1552: ** an anonymous node
1553: */
1554: {
1555: HTList *cur = predicate->m_children;
1556: BOOL bUsedTypedNodeProduction = NO;
1557: HTElement *n2;
2.7 barstow 1558: if (nsStatementID && sStatementID)
1559: StrAllocCopy(nsStatementID, sStatementID);
2.1 frystyk 1560: if (HTList_isEmpty(cur)) {
1561: if (reificate) {
2.3 frystyk 1562: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1563: HT_FREE(nsStatementID);
1564: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1565: sTarget, nr,
1566: predicate->m_sID);
1567: HT_FREE(nr);
1568: } else {
2.3 frystyk 1569: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1570: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1571: HT_FREE(nr);
1572: }
1573: }
1574: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1575: if (HTRDF_isDescription(me, n2)) {
1576: HTElement *d2 = n2;
2.3 frystyk 1577: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.7 barstow 1578: if (d2->m_sID && dStatementID && d2->m_sID != dStatementID)
1579: StrAllocCopy(d2->m_sID, dStatementID);
2.1 frystyk 1580:
1581: if (reificate) {
1582: HT_FREE(nsStatementID);
1583: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1584: sTarget, dStatementID,
1585: predicate->m_sID);
1586: } else {
1587: StrAllocCopy(nsStatementID, dStatementID);
1588: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1589: nsStatementID);
1590: }
1591: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1592: char * tValue = NULL;
1593: char * sValue = n2->m_sContent;
2.1 frystyk 1594: /* we've got real data */
1595: /*
1596: * Only if the content is not empty PCDATA (whitespace that is)
1597: * print the triple
1598: */
1599: tValue = trim(sValue);
1600: if (tValue && strlen(tValue) > 0) {
1601: if (reificate) {
1602: HT_FREE(nsStatementID);
1603: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1604: sTarget, tValue,
1605: predicate->m_sID);
1606: StrAllocCopy(predicate->m_sID, nsStatementID);
1607: } else {
1608: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1609: }
1610: }
1611: HT_FREE(tValue);
1612: } else if (HTRDF_isContainer(me, n2)) {
1613: HTElement *target = HTElement_target(description);
2.3 frystyk 1614: char * aboutTarget =
2.1 frystyk 1615: target ?
1616: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1617: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1618: StrAllocCopy(nsStatementID, sCollectionID);
1619: /* Attach the collection to the current predicate */
1620: if (target) {
1621: if (reificate) {
1622: HT_FREE(nsStatementID);
1623: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1624: aboutTarget,
1625: sCollectionID,
1626: predicate->m_sID);
1627: StrAllocCopy(predicate->m_sID, nsStatementID);
1628: } else {
1629: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1630: sCollectionID);
1631: }
1632: } else {
1633: if (reificate) {
1634: HT_FREE(nsStatementID);
1635: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1636: sTarget, sCollectionID,
1637: predicate->m_sID);
1638: StrAllocCopy(predicate->m_sID, nsStatementID);
1639: } else {
1640: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1641: sCollectionID);
1642: }
1643: }
1644: HT_FREE(sCollectionID);
1645: } else if (HTRDF_isTypedPredicate(me, n2)) {
1646: if (bUsedTypedNodeProduction) {
1647: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1648: } else {
1649: bUsedTypedNodeProduction = YES;
1650: }
1651: HT_FREE(nsStatementID);
1652: nsStatementID = HTRDF_processTypedNode(me, n2);
1653: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1654: }
1655: }
1656: return nsStatementID;
1657: } /* end of block */
1658: return NULL;
1659: }
1660:
1661: /*
1662: * processDescription manages Description elements
1663: *
1664: * description The Description element itself
1665: * inPredicate Is this is a nested description
1666: * reificate Do we need to reificate
1667: * createBag Do we create a bag container
1668: *
1669: * return An ID for the description
1670: *
1671: */
2.3 frystyk 1672: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1673: HTElement * description,
1674: BOOL inPredicate,
1675: BOOL reificate,
1676: BOOL createBag)
1677: {
1678: int iChildCount = 1;
1679: BOOL bOnce = YES;
1680:
2.3 frystyk 1681: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1682: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1683: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1684: "aboutEachPrefix");
2.3 frystyk 1685: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1686: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1687: HTElement *target = HTElement_target(description);
1688: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1689: BOOL targetIsContainer = NO;
2.3 frystyk 1690: char * sTargetAbout = NULL;
1691: char * sTargetBagID = NULL;
1692: char * sTargetID = NULL;
1693: char * dName = NULL;
1694: char * aName = NULL;
2.1 frystyk 1695:
1696: /*
1697: ** Return immediately if the description has already been managed
1698: */
1699: if (description->m_bDone) return description->m_sID;
1700:
1701: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1702: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1703:
1704: /*
1705: ** Determine what the target of the Description reference is
1706: */
1707: if (hasTarget) {
2.3 frystyk 1708: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1709: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1710: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1711: if (me->m_sSource && sTargetID2) {
1712: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1713: } else {
1714: StrAllocCopy(sTargetID, sTargetID2);
1715: }
1716: /*
1717: * Target is collection if
1718: * 1. it is identified with bagID attribute
1719: * 2. it is identified with ID attribute and is a collection
1720: */
1721: if (sTargetBagID && sAbout) {
1722: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1723: } else {
1724: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1725: HTRDF_isContainer(me, target))
1726: targetIsContainer = YES;
1727: }
1728: HT_FREE(sTargetID);
1729: }
1730:
1731: /*
1732: * Check if there are properties encoded using the abbreviated
1733: * syntax
1734: */
1735: HTRDF_expandAttributes(me, description, description);
1736:
1737: /*
1738: * Manage the aboutEach attribute here
1739: */
1740: if (sAboutEach && hasTarget) {
1741: if (HTRDF_isContainer(me, target)) {
1742: HTList *cur = target->m_children;
1743: HTElement *ele = NULL;
1744: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1745: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1746: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1747: if (sResource) {
1748: HTElement * newDescription = NULL;
1749: HTElement * ele2;
1750: HTList * cur2 = description->m_children;
1751:
1752: /*
1753: * Manage <li resource="..." /> case
1754: */
1755: if (sResource) {
1756: HTAssocList *newAL = HTAssocList_new();
1757: newDescription = HTElement_new(dName, newAL);
1758: HTElement_addAttribute(newDescription, aName, sResource);
1759: }
1760:
1761: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1762: if (newDescription) HTElement_addChild(newDescription, ele2);
1763: }
1764:
1765: if (newDescription)
1766: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1767:
1768: /* Not needed anymore */
1769: HTElement_delete(newDescription);
1770:
1771: } else {
1772: /**
1773: * Otherwise we have a structured value inside <li>
1774: *
1775: * loop through the children of <li>
1776: * (can be only one)
1777: */
1778: HTList *cur2 = ele->m_children;
1779: HTElement *ele2 = NULL;
1780: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1781: HTAssocList *newAL = HTAssocList_new();
1782: HTElement *newNode = HTElement_new(dName, newAL);
1783: HTList *cur3 = description->m_children;
1784: HTElement *ele3 = NULL;
1785: /* loop through the items in the
1786: * description with aboutEach
1787: * and add them to the target
1788: */
1789: while ((ele3 = (HTElement *)
1790: HTList_nextObject(cur3))) {
1791: HTElement_addChild(newNode, ele3);
1792: }
1793: HTElement_addTarget(newNode, ele2);
1794: HTRDF_processDescription(me, newNode, YES, NO, NO);
1795: }
1796: }
1797: } else if (HTRDF_isTypedPredicate(me, ele)) {
1798: HTAssocList *newAL = HTAssocList_new();
1799: HTElement *newNode = HTElement_new(dName, newAL);
1800: HTList *cur2 = description->m_children;
1801: HTElement *ele2 = NULL;
1802: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1803: HTElement_addChild(newNode, ele2);
1804: }
1805: HTElement_addTarget(newNode, ele);
1806: HTRDF_processDescription(me, newNode, YES, NO, NO);
1807: }
1808: } /* end of while */
1809: } else if (HTRDF_isDescription(me, target)) {
1810: HTList *cur = target->m_children;
1811: HTElement *ele = NULL;
1812: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1813: HTAssocList *newAL = HTAssocList_new();
1814: HTElement *newNode = HTElement_new(dName, newAL);
1815: HTList *cur2 = description->m_children;
1816: HTElement *ele2 = NULL;
1817: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1818: HTElement_addChild(newNode, ele2);
1819: }
1820: HTElement_addTarget(newNode, ele);
1821: HTRDF_processDescription(me, newNode, YES, NO, NO);
1822: } /* end of while */
1823: }
1824:
1825: HT_FREE(dName);
1826: HT_FREE(aName);
1827: return NULL;
1828: }
1829:
1830: /*
1831: * Manage the aboutEachPrefix attribute here
1832: */
1833: if (sAboutEachPrefix) {
1834: if (hasTarget) {
1835: HTList *cur = description->m_vTargets;
1836: HTElement *target = NULL;
1837: while ((target = (HTElement *) HTList_nextObject(cur))) {
1838: HTList *cur2 = description->m_children;
1839: HTElement *ele2 = NULL;
1840: HTElement *newDescription = NULL;
1841: HTAssocList *newAL = HTAssocList_new();
1842: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1843: newDescription = HTElement_new(dName, newAL);
1844: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1845: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1846: HTElement_addChild(newDescription, ele2);
1847: }
1848: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1849: }
1850: }
1851:
1852: HT_FREE(dName);
1853: HT_FREE(aName);
1854: return NULL;
1855: }
1856: /*
1857: * Enumerate through the children
1858: */
1859: {
1860: HTList *cur = description->m_children;
1861: HTElement *n = NULL;
1862: while ((n = (HTElement *) HTList_nextObject(cur))) {
1863: if (HTRDF_isDescription(me, n))
1864: HTPrint("Can not nest Description inside Description\n");
1865: else if (HTRDF_isListItem(me, n))
1866: HTPrint("Can not nest List Item inside Description\n");
1867: else if (HTRDF_isContainer(me, n))
1868: HTPrint("Can not nest Container inside Description\n");
1869: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1870: char * sChildID = NULL;
2.1 frystyk 1871: if (hasTarget && targetIsContainer) {
1872: sChildID = HTRDF_processPredicate(me, n, description,
1873: target->m_sBagID ?
1874: target->m_sBagID :
1875: target->m_sID, NO);
1876: StrAllocCopy(description->m_sID, sChildID);
1877: createBag = NO;
1878: } else if (hasTarget) {
1879: sChildID = HTRDF_processPredicate(me, n, description,
1880: target->m_sBagID ?
1881: target->m_sBagID :
1882: target->m_sID, reificate);
1883: StrAllocCopy(description->m_sID, sChildID);
1884: } else if (!hasTarget && !inPredicate) {
1885: if (!description->m_sID) {
2.3 frystyk 1886: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1887: StrAllocCopy(description->m_sID, nr);
1888: HT_FREE(nr);
1889: }
1890: if (!sAbout) {
1891: if (sID)
1892: sAbout = sID;
1893: else
1894: sAbout = description->m_sID;
1895: }
1896: sChildID = HTRDF_processPredicate(me, n, description,
1897: sAbout, sBagid ?
1898: YES : reificate);
1899:
1900: } else if (!hasTarget && inPredicate) {
1901: if (!sAbout) {
1902: if (sID) {
1903: StrAllocCopy(description->m_sID, sID);
1904: sAbout = sID;
1905: } else {
1906: if (!description->m_sID) {
2.3 frystyk 1907: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1908: StrAllocCopy(description->m_sID, nr);
1909: HT_FREE(nr);
1910: }
1911: sAbout = description->m_sID;
1912: }
1913: } else {
2.7 barstow 1914: if (description->m_sID != sAbout)
1915: StrAllocCopy(description->m_sID, sAbout);
2.1 frystyk 1916: }
1917: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1918: }
1919: /*
1920: * Each Description block creates also a Bag node which
1921: * has links to all properties within the block IF
1922: * the m_bCreateBags variable is true
1923: */
1924: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1925: char * sNamespace = RDFMS;
2.1 frystyk 1926: if (bOnce && sChildID) {
2.3 frystyk 1927: char * tName = NULL;
1928: char * bName = NULL;
2.1 frystyk 1929: bOnce = NO;
1930: if (!description->m_sBagID) {
2.3 frystyk 1931: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1932: StrAllocCopy(description->m_sBagID, nr);
1933: HT_FREE(nr);
1934: }
1935: if (!description->m_sID)
1936: StrAllocCopy(description->m_sID,
1937: description->m_sBagID);
1938: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1939: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1940: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1941: HT_FREE(tName);
1942: HT_FREE(bName);
1943:
1944: }
1945: if (sChildID) {
2.3 frystyk 1946: char * tName = NULL;
2.1 frystyk 1947: char si[20];
1948: sprintf(si, "%d", iChildCount);
1949: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1950: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1951: iChildCount++;
1952: HT_FREE(tName);
1953: }
1954: }
1955: HT_FREE(sChildID);
1956: }
1957: }
1958: } /* end of block*/
1959:
1960: description->m_bDone = YES;
1961:
1962: HT_FREE(dName);
1963: HT_FREE(aName);
1964: return (description->m_sID);
1965: }
1966:
1967: /*
1968: * Given an XML document (well-formed HTML, for example),
1969: * look for a suitable element to start parsing from
1970: *
1971: */
1972: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1973: {
1974: if (me && ele) {
1975: if (HTRDF_isRDF(me, ele)) {
1976: if (HTRDF_isRDFroot(me, ele)) {
1977: HTRDF_processRDF(me, ele);
1978: } else if (HTRDF_isDescription(me, ele)) {
1979: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1980: me->m_bCreateBags);
1981: }
1982: } else {
1983: HTList *cur = ele->m_children;
1984: HTElement *child = NULL;
1985: while ((child = (HTElement *) HTList_nextObject(cur))) {
1986: HTRDF_processXML(me, child);
1987: }
1988: }
1989:
1990: /* MISSING RECURSION */
1991:
1992: return YES;
1993: }
1994: return NO;
1995: }
1996:
1997: /*
1998: * Return the root element pointer. This requires the parsing
1999: * has been already done.
2000: */
2001: PUBLIC HTElement * HTRDF_root (HTRDF *me)
2002: {
2003: return me ? me->m_root : NULL;
2004: }
2005:
2006: /*
2007: * Return the full namespace URI for a given prefix sPrefix.
2008: * The default namespace is identified with xmlns prefix.
2009: * The namespace of xmlns attribute is an empty string.
2010: */
2011:
2.3 frystyk 2012: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 2013: {
2.3 frystyk 2014: char * nPrefix = NULL;
2.1 frystyk 2015: HTAssocList * calist;
2016: HTList * cur = me->m_namespaceStack;
2017:
2018: if (!sPrefix)
2019: StrAllocCopy(nPrefix, "xmlns");
2020:
2021: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 2022: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 2023: if (sValue) {
2024: StrAllocCopy(nPrefix, sValue);
2025: return nPrefix;
2026: }
2027: }
2028: /*
2029: * Give error only if
2030: * 1. the prefix is not from the reserved xml namespace
2031: * 2. the prefix is not xmlns which is to look for the default
2032: * namespace
2033: */
2034: if (!strcmp(sPrefix, XMLSCHEMA)) {
2035: StrAllocCopy(nPrefix, sPrefix);
2036: return nPrefix;
2037: } else if (!strcmp(sPrefix, "xmlns")) {
2038: StrAllocCopy(nPrefix, "");
2039: return nPrefix;
2040: } else
2041: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2042:
2043: StrAllocCopy(nPrefix, "");
2044: return nPrefix;
2045: }
2046:
2047: /*
2048: * Methods to determine whether we are parsing
2049: * parseType="Literal" or parseType="Resource"
2050: */
2051:
2052: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2053: {
2054: HTElement *e = NULL;
2055: HTList *cur = me->m_elementStack;
2056: if (!HTList_isEmpty(me->m_elementStack)) {
2057: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2058: char * sParseType = NULL;
2.1 frystyk 2059: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2060: if (sParseType) {
2061: if (strcmp(sParseType, "Resource"))
2062: return YES;
2063: }
2064: }
2065: }
2066: return NO;
2067: }
2068:
2069: /*
2070: * Methods to determine whether we are parsing
2071: * parseType="Literal" or parseType="Resource"
2072: */
2073:
2074: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2075: {
2076: HTElement *e = NULL;
2077: HTList *cur = me->m_elementStack;
2078: if (!HTList_isEmpty(me->m_elementStack)) {
2079: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2080: char * sParseType = NULL;
2.1 frystyk 2081: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2082: if (sParseType) {
2083: if (!strcmp(sParseType, "Resource"))
2084: return YES;
2085: }
2086: }
2087: }
2088: return NO;
2089: }
2090: /*
2091: * checkAttributes goes through the attributes of element e<
2092: * to see
2093: * 1. if there are symbolic references to other nodes in the data model.
2094: * in which case they must be stored for later resolving with
2095: * resolveLater method.
2096: * 2. if there is an identity attribute, it is registered using
2097: * registerResource or registerID method.
2098: *
2099: */
2100:
2101: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2102: {
2103: {
2.3 frystyk 2104: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2105:
2106: if (sResource && sResource[0] == '#')
2107: HTRDF_resolveLater(me, e);
2108: }
2109: {
2.3 frystyk 2110: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2111:
2112: if (sAboutEach && sAboutEach[0] == '#')
2113: HTRDF_resolveLater(me, e);
2114: }
2115: {
2.3 frystyk 2116: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2117: "aboutEachPrefix");
2118:
2119: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2120: HTRDF_resolveLater(me, e);
2121: }
2122: {
2.3 frystyk 2123: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2124: if (sAbout) {
2125: if (sAbout[0] == '#')
2126: HTRDF_resolveLater(me, e);
2127: else
2128: HTRDF_registerResource(me, e);
2129: }
2130: }
2131:
2132: {
2.3 frystyk 2133: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2134:
2135: if (sBagID) {
2136: HTRDF_registerID(me, sBagID, e);
2137: StrAllocCopy(e->m_sBagID, sBagID);
2138: }
2139: }
2140: {
2.3 frystyk 2141: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2142: if (sID) {
2143: HTRDF_registerID(me, sID, e);
2144: StrAllocCopy(e->m_sID, sID);
2145: }
2146: }
2147: }
2148: /*
2149: * Add the element e to the m_vResolveQueue
2150: * to be resolved later.
2151: */
2152: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2153: {
2154: HTList_addObject(me->m_vResolveQueue, e);
2155: }
2156: /*
2157: * Add an element e to the Hashtable m_hIDtable
2158: * which stores all nodes with an ID
2159: */
2160:
2.3 frystyk 2161: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2162: {
2163: if (HTHashtable_object(me->m_hIDtable, sID))
2164: HTPrint("Node ID %s redefined", sID);
2165: HTHashtable_addObject(me->m_hIDtable, sID, e);
2166: }
2167: /*
2168: * Add an element e to the Vector m_vResources
2169: * which stores all nodes with an URI
2170: */
2171: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2172: {
2173: HTList_addObject(me->m_vResources, e);
2174: }
2175:
2176: /*
2177: * Look for a node by name sID from the Hashtable
2178: * m_hIDtable of all registered IDs.
2179: */
2180:
2.3 frystyk 2181: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2182: {
2183: if (sID)
2184: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2185: return NULL;
2186: }
2187:
2188: /*
2189: ** Special method to deal with rdf:resource attribute
2190: */
2.3 frystyk 2191: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2192: {
2.3 frystyk 2193: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2194: if (sResource != NULL && sResource[0] == '\0')
2195: sResource = me->m_sSource;
2196: return sResource;
2197: }
2198:
2199: /*
2200: ** Take an element ele with its parent element parent
2201: ** and evaluate all its attributes to see if they are non-RDF specific
2202: ** and non-XML specific in which case they must become children of
2203: ** the ele node.
2204: */
2205: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2206: {
2207: BOOL foundAbbreviation = NO;
2.3 frystyk 2208: char * sAttribute = NULL;
2209: char * sValue = NULL;
2.1 frystyk 2210: HTAssoc * assoc;
2211: HTAssocList * cur = ele->m_attributes;
2212: int lxmlschema = strlen(XMLSCHEMA);
2213: int lrdfms = strlen(RDFMS);
2214:
2215: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2216: int latt;
2217: sAttribute = HTAssoc_name(assoc);
2218: sValue = HTAssoc_value(assoc);
2.4 barstow 2219:
2.1 frystyk 2220: latt = strlen(sAttribute);
2221: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2222: continue;
2223:
2224: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2225: (sAttribute[lrdfms]!='_') &&
2226: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2227: strcmp(&(sAttribute[latt-4]), "type"))
2228: continue;
2229:
2230: if (strlen(sValue) > 0) {
2231: HTAssocList * newAL = HTAssocList_new();
2232: HTElement * newElement = HTElement_new(sAttribute, newAL);
2233: HTElement * newData = HTElement_new2(sValue);
2234: HTElement_addChild(newElement, newData);
2235: HTElement_addChild(parent, newElement);
2236: foundAbbreviation = YES;
2237: }
2238: }
2239: return foundAbbreviation;
2240: }
2241:
2242: /**
2243: * Create a new reification ID by using a name part and an
2244: * incremental counter m_iReificationCounter.
2245: */
2.3 frystyk 2246: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2247: {
2.3 frystyk 2248: char * nsid = NULL;
2.1 frystyk 2249: char nsrc[20];
2250: me->m_iReificationCounter++;
2251: sprintf(nsrc, "%d", me->m_iReificationCounter);
2252: if (!me->m_sSource) {
2253: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2254: } else {
2255: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2256: }
2257: return nsid;
2258: }
2259:
2260: /*
2261: * reificate creates one new node and four new triples
2262: * and returns the ID of the new node
2263: */
2264:
2.3 frystyk 2265: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2266: char * sObject, char * sNodeID)
2.1 frystyk 2267: {
2.3 frystyk 2268: char * sName = NULL;
2269: char * pName = NULL;
2270: char * oName = NULL;
2271: char * tName = NULL;
2272: char * stName = NULL;
2273: char * tNodeID = NULL;
2.1 frystyk 2274:
2275: if (!sNodeID)
2276: tNodeID = HTRDF_newReificationID(me);
2277: else
2278: StrAllocCopy(tNodeID, sNodeID);
2279:
2280: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2281: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2282: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2283: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2284: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2285:
2286: /*
2287: * The original statement must remain in the data model
2288: */
2289: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2290:
2291: /*
2292: * Do not reificate reificated properties
2293: */
2294: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2295: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2296:
2297: /* Reificate by creating 4 new triples */
2298: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2299: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2300: HTRDF_addTriple(me, oName, tNodeID, sObject);
2301: HTRDF_addTriple(me, tName, tNodeID, stName);
2302: } else
2303: HT_FREE(tNodeID);
2304:
2305: HT_FREE(sName);
2306: HT_FREE(pName);
2307: HT_FREE(oName);
2308: HT_FREE(tName);
2309: HT_FREE(stName);
2310:
2311: return tNodeID;
2312: }
2313: /*
2314: * Create a new triple and add it to the m_triples List
2315: * Send the triple to the Output stream
2316: */
2317:
2.3 frystyk 2318: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2319: char * sObject)
2.1 frystyk 2320: {
2321: HTTriple *t = NULL;
2322:
2323: /*
2324: * If there is no subject (about=""), then use the URI/filename where
2325: * the RDF description came from
2326: */
2327: if (!sPredicate || !sSubject || !sObject) {
2328: HTPrint("Predicate %s when subject %s and object %s \n",
2329: sPredicate ? sPredicate : "null",
2330: sSubject ? sSubject : "null",
2331: sObject ? sObject : "null");
2332: return;
2333: }
2334:
2335: if (sSubject[0]=='\0')
2336: sSubject = me->m_sSource;
2337:
2338: t = HTTriple_new(sPredicate, sSubject, sObject);
2339:
2340: /* Call the triple callback handler (if any) with this new triple */
2341: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2342:
2343: HTList_addObject(me->m_triples, t);
2344: }
2345:
2346: /*
2347: * createBags method allows one to determine whether SiRPAC
2348: * produces Bag instances for each Description block.
2349: * The default setting is not to generate them.
2350: */
2351:
2352: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2353: {
2354: if (me)
2355: me->m_bCreateBags = b;
2356: }
2357:
2358: /*
2359: Set output stream for RDF parser
2360: */
2361:
2362: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2363: {
2364: if (me)
2365: me->ostream = ostream;
2366: }
2367:
2368: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2369: {
2370: if (me) {
2371: me->newTripleInstance = cbf;
2372: me->tripleContext = context;
2373: return YES;
2374: }
2375: return NO;
2376: }
2377:
2378: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2379: {
2380: RDFInstance = me;
2381: RDFInstanceContext = context;
2382: return YES;
2383: }
2384:
2385: /* ------------------------------------------------------------------------- */
2386: /* HTRDFTriples STREAM HANDLERS */
2387: /* ------------------------------------------------------------------------- */
2388:
2389: PRIVATE int generate_triples(HTStream *me)
2390: {
2391: HTRDF *rdfp = me ? me->rdfparser : NULL;
2392: if (rdfp) {
2393:
2394: HTRDF_resolve(rdfp);
2395:
2396: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2397:
2398: return HT_OK;
2399: }
2400: return HT_ERROR;
2401: }
2402:
2403: PRIVATE int HTRDFTriples_flush (HTStream * me)
2404: {
2405: if (me->target)
2406: return (*me->target->isa->flush)(me->target);
2407: return HT_OK;
2408: }
2409:
2410: PRIVATE int HTRDFTriples_free (HTStream * me)
2411: {
2412: int status = HT_OK;
2413:
2414: status = generate_triples(me);
2415:
2416: HTRDF_delete(me->rdfparser);
2417:
2418: if (me->target) {
2419: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2420: return HT_WOULD_BLOCK;
2421: }
2422: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2423: HT_FREE(me);
2424: return status;
2425: }
2426:
2427: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2428: {
2429: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2430: HTRDF_delete(me->rdfparser);
2431: if (me->target)
2432: (*me->target->isa->abort)(me->target, NULL);
2433: HT_FREE(me);
2434: return HT_ERROR;
2435: }
2436:
2437: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2438: {
2439: return HT_OK;
2440: }
2441:
2442: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2443: {
2444: return HTRDFTriples_write(me, &c, 1);
2445: }
2446:
2447: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2448: {
2449: return HTRDFTriples_write(me, s, (int) strlen(s));
2450: }
2451:
2452: PRIVATE const HTStreamClass HTRDFTriplesClass =
2453: {
2454: "rdf",
2455: HTRDFTriples_flush,
2456: HTRDFTriples_free,
2457: HTRDFTriples_abort,
2458: HTRDFTriples_putCharacter,
2459: HTRDFTriples_putString,
2460: HTRDFTriples_write
2461: };
2462:
2463: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2464: void * param,
2465: HTFormat input_format,
2466: HTFormat output_format,
2467: HTStream * output_stream)
2468: {
2469: HTStream * me = NULL;
2470: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2471: HT_OUTOFMEM("HTRDFTriples_new");
2472: me->isa = &HTRDFTriplesClass;
2473: me->state = HT_OK;
2474: me->request = request;
2475: me->target = output_stream ? output_stream : HTErrorStream();
2476:
2477: /* Now create the RDF parser instance */
2478: if ((me->rdfparser = HTRDF_new()) == NULL) {
2479: HT_FREE(me);
2480: return HTErrorStream();
2481: }
2482:
2483: /* Set the source (I guess mostly to follow SiRPAC API) */
2484: {
2485: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2486: HTRDF_setSource(me->rdfparser, uri);
2487: HT_FREE(uri);
2488: }
2489:
2490: /* Where are we putting data? */
2491: HTRDF_setOutputStream(me->rdfparser, me);
2492:
2493: /* If you want to create Bags, change it to YES */
2494: HTRDF_createBags(me->rdfparser, NO);
2495:
2496: /* Register our new XML Instance handler */
2497: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2498: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2499:
2500: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2501:
2502: return me;
2503: }
2504:
2505: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2506: void * param,
2507: HTFormat input_format,
2508: HTFormat output_format,
2509: HTStream * output_stream)
2510: {
2511: return HTXML_new(request, param, input_format, output_format,
2512: RDFParser_new(request, param, input_format, output_format, output_stream));
2513: }
2514:
2515: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2516: {
2517: if (rdfp && t) {
2518: HTStream *ostream = rdfp->ostream;
2519: if (ostream) {
2520: PUTC(ostream,'(');
2521: PUTS(ostream, t->m_sPredicate);
2522: PUTC(ostream,',');
2523: PUTS(ostream, t->m_sSubject);
2524: PUTC(ostream,',');
2525: PUTS(ostream, t->m_sObject);
2526: PUTC(ostream,')');
2527: PUTC(ostream,'\n');
2528: }
2529: }
2530: }
2531:
2532: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2533: void * param,
2534: HTFormat input_format,
2535: HTFormat output_format,
2536: HTStream * output_stream)
2537: {
2.2 frystyk 2538: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2539: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2540:
2541: /* Register our own tripple instance handler */
2542: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2543:
2544: /* Create an XML parser instance and return */
2545: return HTXML_new(request, param, input_format, output_format, me);
2546: }
2547:
2.9 barstow 2548: /*
2549: ** This function initializes the XML parser and RDF parsers
2550: ** that are used to synchronously parse a file of RDF or a
2551: ** a buffer of RDF.
2552: **
2.11 barstow 2553: ** Parameters:
2554: ** xmlparser - MODIFIED the XML parser to create. The caller is
2555: ** responsible for free'ing this pointer.
2556: ** rdfparser - MODIFIED the RDF parser to create. The caller is
2557: ** responsible for free'ing this pointer.
2558: ** stream - MODIFIED the HTStream needed by the RDF parser. The
2559: ** caller is responsible for free'ing this pointer.
2560: ** uri - the URI created from name. It is used by the RDF parser
2561: ** when creating anonymous node names. The caller is
2562: ** responsible for freeing this pointer.
2563: ** new_triple_callback - the callback invoked when a new triple
2564: ** is created. If NULL, the default handler will be invoked.
2.12 barstow 2565: ** context - a void pointer to pass to the new_triple_callback.
2566: ** Should set NULL if no data needs to be passed to the
2567: ** callback.
2.11 barstow 2568: ** name - the file name or buffer name to be used when the RDF
2569: ** parser needs a document name
2570: **
2571: ** Returns:
2572: ** YES if the initialization succeeds; otherwise NO is returned
2573: ** and an error message is logged.
2.9 barstow 2574: **/
2575:
2.11 barstow 2576: PRIVATE BOOL initialize_parsers(XML_Parser *xmlparser, HTRDF **rdfparser,
2.9 barstow 2577: HTStream **stream, char **uri, HTTripleCallback_new * new_triple_callback,
2.12 barstow 2578: void *context, const char * name)
2.9 barstow 2579: {
2580: /* Create an XML parser */
2581: #ifdef USE_NS
2582: *xmlparser = XML_ParserCreateNS (NULL, ':');
2583: #else
2584: *xmlparser = XML_ParserCreate (NULL);
2585: #endif /* USE_NS */
2586:
2.11 barstow 2587: if (!*xmlparser) {
2588: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for XML parser.\n");
2589: return NO;
2590: }
2.9 barstow 2591:
2592: /* We need also need RDF parser to create the triples */
2593: *rdfparser = HTRDF_new();
2594: if (!*rdfparser) {
2.11 barstow 2595: XML_ParserFree(*xmlparser);
2596: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for RDF parser.\n");
2597: return NO;
2.9 barstow 2598: }
2599:
2600: /* Must construct a URI from name for the parser */
2601: *uri = HTLocalToWWW (name, "file:");
2602: HTRDF_setSource(*rdfparser, *uri);
2603:
2604: HTRDF_createBags(*rdfparser, NO);
2605:
2606: if (new_triple_callback)
2.12 barstow 2607: HTRDF_registerNewTripleCallback(*rdfparser, new_triple_callback, context);
2.9 barstow 2608: else
2.12 barstow 2609: HTRDF_registerNewTripleCallback(*rdfparser, triple_newInstance, context);
2.9 barstow 2610:
2611: rdf_setHandlers(*xmlparser);
2612: XML_SetUserData(*xmlparser, *rdfparser);
2613:
2614: /* Create a stream to be used to process the triple output */
2615: if ((*stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
2616: HT_FREE(*uri);
2617: XML_ParserFree(*xmlparser);
2618: HTRDF_delete(*rdfparser);
2.11 barstow 2619: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for HTStream.\n");
2620: return NO;
2.9 barstow 2621: }
2622: (*stream)->isa = &HTRDFTriplesClass;
2623: (*stream)->state = HT_OK;
2624: (*stream)->request = NULL; /* Don't have a request */
2625: (*stream)->target = NULL; /* Don't have another stream */
2626: (*stream)->rdfparser = *rdfparser;
2627:
2.11 barstow 2628: return YES;
2.9 barstow 2629: }
2630:
2.10 kahan 2631: /* HTRDF_parseFile
2.6 kahan 2632: ** ---------------
2633: ** This function parses a file of RDF in a synchronous, non-blocking
2.9 barstow 2634: ** way. In other words, the file is not asynchronously loaded.
2635: **
2.10 kahan 2636: ** Parameters:
2637: ** file_name the name of the file to parse
2638: ** new_triple_callback the callback that is invoked when a triple
2639: ** is created. If NULL, the default triple handler is
2640: ** invoked.
2.12 barstow 2641: ** context - a void pointer to pass to the new_triple_callback.
2642: ** Should set NULL if no data needs to be passed to the
2643: ** callback.
2.10 kahan 2644: ** Returns:
2.11 barstow 2645: ** Returns YES if the file is successfully parsed; otherwise NO is
2646: ** returned and an error message is logged.
2.6 kahan 2647: */
2648:
2.12 barstow 2649: PUBLIC BOOL HTRDF_parseFile (const char *file_name,
2650: HTTripleCallback_new * new_triple_callback, void *context)
2.5 barstow 2651: {
2652: char buff[512]; /* the file input buffer */
2653: FILE *fp;
2654: XML_Parser xmlparser;
2655: HTRDF *rdfparser;
2656: HTStream * stream = NULL;
2.6 kahan 2657: char *uri = NULL;
2.11 barstow 2658: BOOL status;
2.5 barstow 2659:
2660: /* Sanity check */
2.6 kahan 2661: if (!file_name) {
2.11 barstow 2662: HTTRACE(XML_TRACE, "HTRDF_parseFile. file_name is NULL\n");
2663: return NO;
2.6 kahan 2664: }
2665:
2.5 barstow 2666: /* If the file does not exist, return now */
2667: fp = fopen (file_name, "r");
2.6 kahan 2668: if (!fp) { /* annotation index file doesn't exist */
2.11 barstow 2669: HTTRACE(XML_TRACE, "HTRDF_parseFile. File open failed.");
2670: return NO;
2.6 kahan 2671: }
2.5 barstow 2672:
2.11 barstow 2673: /* Initialize the XML and RDF parsers */
2674: status = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri,
2.12 barstow 2675: new_triple_callback, context, file_name);
2.11 barstow 2676: if (!status) {
2.6 kahan 2677: fclose (fp);
2.11 barstow 2678: return NO;
2.5 barstow 2679: }
2680:
2681: /*
2682: * The parsing occurs on one read buffer at a time instead of
2683: * reading everything into memory and then parsing
2684: */
2685: for (;;) {
2686: int done;
2687: int buff_len;
2688: fgets(buff, sizeof(buff), fp);
2689: if (ferror(fp)) {
2.6 kahan 2690: HT_FREE(uri);
2.5 barstow 2691: fclose (fp);
2692: XML_ParserFree(xmlparser);
2693: HTRDF_delete(rdfparser);
2694: HT_FREE(stream);
2.11 barstow 2695: HTTRACE(XML_TRACE, "HTRDF_parseFile. Error reading file.");
2696: return NO;
2.5 barstow 2697: }
2698: done = feof(fp);
2699: if (done)
2700: buff_len = 0;
2701: else
2702: buff_len = strlen (buff);
2703: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
2704: fprintf (stderr, "Parse error at line %d:\n%s\n",
2705: XML_GetCurrentLineNumber(xmlparser),
2706: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2.6 kahan 2707: HT_FREE(uri);
2.5 barstow 2708: fclose(fp);
2709: XML_ParserFree(xmlparser);
2710: HTRDF_delete(rdfparser);
2711: HT_FREE(stream);
2.11 barstow 2712: HTTRACE(XML_TRACE, "HTRDF_parseFile. Parse error.");
2713: return NO;
2.5 barstow 2714: }
2715: if (done)
2716: break;
2717: }
2718:
2719: /* The file has been parsed, generate the triples */
2720: generate_triples(stream);
2721:
2722: /* Cleanup */
2.6 kahan 2723: HT_FREE(uri);
2.5 barstow 2724: fclose (fp);
2.9 barstow 2725: XML_ParserFree(xmlparser);
2726: HTRDF_delete(rdfparser);
2727: HT_FREE(stream);
2728:
2.11 barstow 2729: return YES;
2.9 barstow 2730: }
2731:
2.10 kahan 2732: /* HTRDF_parseBuffer
2.9 barstow 2733: ** ---------------
2734: ** This function parses a buffer of RDF in a synchronous, non-blocking
2735: ** way.
2736: **
2.10 kahan 2737: ** Parameters:
2738: ** buffer the buffer to parse
2739: ** buffer_name the buffer's name. This is used by the parser
2740: ** when naming "anonymous" subjects
2741: ** buffer_len the buffer's length (number of bytes)
2742: ** new_triple_callback the callback that is invoked when a triple
2743: ** is created. If NULL, the default triple handler is
2744: ** invoked.
2.12 barstow 2745: ** context - a void pointer to pass to the new_triple_callback.
2746: ** Should set NULL if no data needs to be passed to the
2747: ** callback.
2.10 kahan 2748: ** Returns:
2.11 barstow 2749: ** Returns YES if the buffer is successfully parsed; otherwise NO is
2750: ** returned and an error message is logged.
2.9 barstow 2751: */
2752:
2.12 barstow 2753: PUBLIC BOOL HTRDF_parseBuffer (const char *buffer, const char *buffer_name,
2754: int buffer_len, HTTripleCallback_new * new_triple_callback, void *context)
2.9 barstow 2755: {
2756: XML_Parser xmlparser;
2757: HTRDF *rdfparser;
2758: HTStream * stream = NULL;
2759: char *uri;
2.11 barstow 2760: BOOL status;
2.9 barstow 2761:
2762: /* Sanity checks */
2.11 barstow 2763: if (!buffer) {
2764: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer is NULL");
2765: return NO;
2766: }
2767: if (buffer_len <= 0) {
2768: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer_len is <=0");
2769: return NO;
2770: }
2771: if (!buffer_name) {
2772: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer_name is NULL");
2773: return NO;
2774: }
2775:
2776: status = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri,
2.12 barstow 2777: new_triple_callback, context, buffer_name);
2.11 barstow 2778: if (!status)
2779: return NO;
2.9 barstow 2780:
2781: if (! XML_Parse(xmlparser, buffer, buffer_len, 1)) {
2782: fprintf(stderr, "Parse error at line %d:\n%s\n",
2783: XML_GetCurrentLineNumber(xmlparser),
2784: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2785: HT_FREE(uri);
2786: XML_ParserFree(xmlparser);
2787: HTRDF_delete(rdfparser);
2788: HT_FREE(stream);
2.11 barstow 2789: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. Parse error.");
2790: return NO;
2.9 barstow 2791: }
2792:
2793: /* The buffer has been parsed, generate the triples */
2794: generate_triples(stream);
2795:
2796: /* Cleanup */
2797: HT_FREE(uri);
2.5 barstow 2798: XML_ParserFree(xmlparser);
2799: HTRDF_delete(rdfparser);
2800: HT_FREE(stream);
2801:
2.11 barstow 2802: return YES;
2.5 barstow 2803: }
Webmaster