Annotation of libwww/Library/src/HTRDF.c, revision 2.12
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.12 ! barstow 4: ** @(#) $Id: HTRDF.c,v 2.11 2000/08/18 13:25:39 barstow Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
2.3 frystyk 40: char * m_sPredicate;
41: char * m_sSubject;
42: char * m_sObject;
2.1 frystyk 43: };
44:
45: struct _HTElement {
2.3 frystyk 46: char * m_sName;
2.1 frystyk 47: HTAssocList * m_attributes;
48: HTList * m_children;
2.3 frystyk 49: char * m_sID;
50: char * m_sBagID;
2.1 frystyk 51: HTList * m_vTargets;
52: BOOL m_bDone;
2.3 frystyk 53: char * m_sPrefix;
54: char * m_sContent;
2.1 frystyk 55: };
56:
57: struct _HTRDFParser {
58: HTList * m_namespaceStack;
59: HTList * m_elementStack;
2.8 barstow 60: HTList * m_literalStack;
2.1 frystyk 61: HTElement * m_root;
62: HTList * m_triples;
2.3 frystyk 63: char * m_sSource;
2.1 frystyk 64: HTList * m_vAllNameSpaces;
65:
66: BOOL m_bCreateBags;
67: BOOL m_bFetchSchemas;
68:
69: HTList * m_parseTypeStack;
70: HTList * m_parseElementStack;
2.3 frystyk 71: char * m_sLiteral;
2.1 frystyk 72:
73: HTList * m_vResources;
74: HTList * m_vResolveQueue;
75: HTHashtable * m_hIDtable;
76: int m_iReificationCounter;
77:
78: HTStream * ostream;
79:
80: HTTripleCallback_new * newTripleInstance;
81: void * tripleContext;
82: };
83:
84: /* @@@ Should not be global but controlled by name spaces @@@ */
85: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
86: PRIVATE void * RDFInstanceContext = NULL;
87:
2.3 frystyk 88: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
89: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 90: HTElement *description,
2.3 frystyk 91: char * sTarget,
2.1 frystyk 92: BOOL reificate);
2.3 frystyk 93: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 94: int iCounter);
95: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
96: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 97: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
98: char * sObject, char * sNodeID);
2.4 barstow 99: /* ------------------------------------------------------------------------- */
100:
101: /*
102: ** Append the markup for the given element and its attribute to the
103: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
104: */
105: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
106: {
107: int i=0;
108:
109: if (!rdfp || !name) return;
110:
2.7 barstow 111: StrAllocMCat(&rdfp->m_sLiteral, "<", name, NULL);
2.4 barstow 112:
113: while (atts[i]) {
2.7 barstow 114: StrAllocMCat(&rdfp->m_sLiteral, " ", atts[i], "=\"", atts[i+1], "\"", NULL);
2.4 barstow 115: i+=2;
116: }
117:
118: StrAllocCat(rdfp->m_sLiteral, ">");
119: }
120:
121: /*
122: ** Terminate this element's "Literal" buffer. This buffer is filled in when
123: ** parseType="Literal".
124: */
125: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
126: {
127: if (!rdfp || !name) return;
128:
2.7 barstow 129: StrAllocMCat(&rdfp->m_sLiteral, "</", name, ">", NULL);
2.4 barstow 130: }
2.1 frystyk 131:
132: /* ------------------------------------------------------------------------- */
133:
134: /*
135: ** Searches a whole list of Strings and returns true if the String is found.
136: */
2.3 frystyk 137: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 138: {
139: HTList *cur = list;
2.3 frystyk 140: char * cs = NULL;
141: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 142: if (!strcmp(cs, s)) return YES;
143: }
144: return NO;
145: }
146:
147: /*
148: ** Useful function that Trims a string
149: ** @@@ Should use HTStrip() @@@
150: */
151: PRIVATE char * trim (char *s)
152: {
153: char *p = NULL, *t = NULL;
154: int len = s ? strlen(s) : -1;
155: if (s && len > 0) {
156: StrAllocCopy(t, s);
157: p = &(s[len-1]);
158: while(p!=s) {
159: if (!isspace((int)(*p)))
160: break;
161: p--;
162: }
163: t[(int)(p-s)+1] = '\0';
164: if (isspace((int) t[(int)(p-s)]))
165: t[(int)(p-s)] = '\0';
166: }
167: return t;
168: }
169:
170: /* ------------------------------------------------------------------------- */
171: /* TRIPLE of RDF */
172: /* ------------------------------------------------------------------------- */
173:
2.3 frystyk 174: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 175: {
176: HTTriple * me = NULL;
177: if (p && s && o) {
178: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
179: HT_OUTOFMEM("HTTriple_new");
180: StrAllocCopy(me->m_sPredicate, p);
181: StrAllocCopy(me->m_sSubject, s);
182: StrAllocCopy(me->m_sObject, o);
183: }
184: return me;
185: }
186:
187: PUBLIC BOOL HTTriple_delete (HTTriple * me)
188: {
189: if (me) {
190: HT_FREE(me->m_sPredicate);
191: HT_FREE(me->m_sSubject);
192: HT_FREE(me->m_sObject);
193: HT_FREE(me);
194: return YES;
195: }
196: return NO;
197: }
198:
199: PUBLIC void HTTriple_print (HTTriple * me)
200: {
201: if (me)
202: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
203: me->m_sObject);
204: }
205:
2.3 frystyk 206: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 207: {
208: return me ? me->m_sSubject : NULL;
209: }
210:
2.3 frystyk 211: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 212: {
213: return me ? me->m_sPredicate : NULL;
214: }
215:
2.3 frystyk 216: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 217: {
218: return me ? me->m_sObject : NULL;
219: }
220:
221: /* ------------------------------------------------------------------------- */
222: /* ELEMENT of RDF */
223: /* ------------------------------------------------------------------------- */
224:
2.3 frystyk 225: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 226: {
227: HTElement * me = NULL;
228: if (sName) {
229: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
230: HT_OUTOFMEM("HTElement_new");
231: StrAllocCopy(me->m_sName, sName);
232: me->m_attributes = al ? al : HTAssocList_new();
233: me->m_children = HTList_new();
234: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
235: me->m_vTargets = HTList_new();
236: me->m_bDone = FALSE;
237: }
238: return me;
239: }
240:
241: /*
242: ** Creates a Data Element and saves the data in the Content field.
243: ** Data Element does not have attributes
244: */
2.3 frystyk 245: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 246: {
247: HTElement * me = NULL;
248: if (sContent) {
249: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
250: HT_OUTOFMEM("HTElement_new2");
251: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
252: me->m_attributes = NULL;
253: me->m_children = HTList_new();
254: /*me->m_nodes = HTAssocList_new();*/
255: me->m_vTargets = HTList_new();
256: me->m_bDone = FALSE;
257: StrAllocCopy(me->m_sContent, sContent);
258: }
259: return me;
260: }
261:
2.3 frystyk 262: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 263: {
264: if (me && sContent) {
265: int l = strlen(me->m_sName);
266: StrAllocCat(me->m_sContent, sContent);
267: me->m_sName[l-1]='\0';
268: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
269: return YES;
270: }
271: return NO;
272: }
273:
274: PUBLIC BOOL HTElement_delete (HTElement * me)
275: {
276: if (me) {
277: HT_FREE(me->m_sName);
278: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
279: if (me->m_children) HTList_delete(me->m_children);
280: HT_FREE(me->m_sID);
281: HT_FREE(me->m_sBagID);
282: if (me->m_vTargets) HTList_delete(me->m_vTargets);
283: HT_FREE(me->m_sPrefix);
284: HT_FREE(me->m_sContent);
285: HT_FREE(me);
286: return YES;
287: }
288: return NO;
289: }
290:
291: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
292: {
293: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
294: }
295:
2.3 frystyk 296: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 297: {
298: return (me && sName && sValue) ?
299: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
300: }
301:
2.3 frystyk 302: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 303: {
304: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
305: }
306:
2.3 frystyk 307: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 308: {
309: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
310: }
311:
2.3 frystyk 312: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 313: {
2.3 frystyk 314: char * fValue = NULL;
315: char * fName = NULL;
2.1 frystyk 316: if (me && sNamespace && sName) {
317: StrAllocMCopy(&fName, sNamespace, sName, NULL);
318: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
319: HT_FREE(fName);
320: }
321: return fValue;
322: }
323:
324: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
325: {
326: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
327: }
328:
329: PUBLIC HTElement * HTElement_target (HTElement * me)
330: {
331: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
332: }
333:
334: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
335: {
336: return (me && me->m_sContent) ? YES : NO;
337: }
338:
339: /* ------------------------------------------------------------------------- */
340: /* EXPAT HANDLERS */
341: /* ------------------------------------------------------------------------- */
342:
343: /*
344: * Called for each new element.
345: * Build up the document tree using an element stack
346: */
347: PRIVATE void XML_startElement (void * userData,
348: const XML_Char * name, const XML_Char ** atts)
349: {
350: HTRDF * rdfp = (HTRDF *) userData;
351: HTAssocList * namespaces = HTAssocList_new();
352: HTAssocList * newAL = HTAssocList_new();
353: int i = 0;
2.5 barstow 354:
2.1 frystyk 355: /**
356: * The following loop tries to identify special xmlns prefix
357: * attributes and update the namespace stack accordingly.
358: * While doing all this, it builds another AttributeList instance
359: * which will hold the expanded names of the attributes
360: * (I think this approach is only useful for RDF which uses
361: * attributes as an abbreviated syntax for element names)
362: */
363: if (atts) {
364: while (atts[i]) {
2.3 frystyk 365: char * aName = (char * ) atts[i];
2.1 frystyk 366: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 367: char * aValue = (char *) atts[i+1];
2.1 frystyk 368: int len = aValue ? strlen(aValue) : -1;
369: if (len == 0 && !rdfp->m_sSource)
370: aValue = rdfp->m_sSource;
371: HTAssocList_addObject(namespaces, aName, aValue);
372: /* save all non-RDF schema addresses */
373: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
374: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
375: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 376: char * nname = NULL;
2.1 frystyk 377: StrAllocCopy(nname, aValue);
378: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
379: }
380:
381: /* Special case: Don't save document's own address */
382: if (rdfp->m_sSource &&
383: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 384: char * nname = NULL;
2.1 frystyk 385: StrAllocCopy(nname, aValue);
386: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
387: }
388: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 389: char * aValue = (char *) atts[i+1];
390: char * nName = NULL;
2.1 frystyk 391: int len = aValue ? strlen(aValue) : -1;
392: if (len == 0 && !rdfp->m_sSource)
393: aValue = rdfp->m_sSource;
394: StrAllocCopy(nName, &(aName[6]));
395: HTAssocList_addObject(namespaces, nName, aValue);
396: HT_FREE(nName);
397:
398: /* Save all non-RDF schema addresses */
399: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
400: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
401: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 402: char * nname = NULL;
2.1 frystyk 403: StrAllocCopy(nname, aValue);
404: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
405: }
406:
407: /* Special case: Don't save document's own address */
408: if (rdfp->m_sSource &&
409: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 410: char * nname = NULL;
2.1 frystyk 411: StrAllocCopy(nname, aValue);
412: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
413: }
414: }
415: i+=2;
416: } /* end of while */
417: } /* end of if */
418:
419: /*
420: ** Place new namespace declarations into the stack
421: ** (Yes, I could optimize this a bit, not it wastes space
422: ** if there are no xmlns definitions)
423: */
424: HTList_addObject(rdfp->m_namespaceStack, namespaces);
425:
426: /*
427: ** Figure out the prefix part if it exists and
428: ** determine the namespace of the element accordingly
429: */
430: {
2.3 frystyk 431: char * sNamespace = NULL;
432: char * sElementName = NULL;
433: char * sPrefix2 = NULL;
2.1 frystyk 434: HTElement *newElement = NULL;
435: char *pindex = strchr(name, ':');
436: int ix = pindex ? (int) (pindex - name) : -1 ;
437: if (ix > 0) {
438: if (!(sPrefix2 = HT_MALLOC(ix+1)))
439: HT_OUTOFMEM("XML_startELement");
440: strncpy(sPrefix2, name, ix);
441: sPrefix2[ix]='\0';
442: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
443: StrAllocCopy(sElementName, &(name[ix+1]));
444: HT_FREE(sPrefix2);
445: } else {
446: sNamespace = HTRDF_namespace(rdfp, "xmlns");
447: StrAllocCopy(sElementName, name);
448: }
449:
450: /*
451: * Finally look for attributes other than the special xmlns,
452: * expand them, and place to the new Attribute List
453: */
454: i = 0;
455: if (atts) {
456: while (atts[i]) {
2.3 frystyk 457: char * aName = (char *) atts[i];
458: char * sAttributeNamespace = NULL;
2.1 frystyk 459: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 460: char * aValue = (char *) atts[i+1];
461: char * sPrefix = NULL;
2.1 frystyk 462: /* Expat does not have type for attributes */
463: pindex = strchr(aName, ':');
464: ix = pindex ? (int) (pindex - aName) : -1;
465: if (ix > 0) {
466: if (!(sPrefix = HT_MALLOC(ix+1)))
467: HT_OUTOFMEM("XML_startELement");
468: strncpy(sPrefix, aName, ix);
469: sPrefix[ix] = '\0';
470: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
471: aName = &(aName[ix+1]);
472: HT_FREE(sPrefix);
473: } else {
474: if (!sNamespace)
475: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
476: else
477: StrAllocCopy(sAttributeNamespace, sNamespace);
478: }
479:
480: if (HTRDF_parseLiteral(rdfp)) {
481: if (!sPrefix) {
482: if (!(sPrefix = HT_MALLOC(8)))
483: HT_OUTOFMEM("XML_startELement");
484: sprintf(sPrefix, "gen%d\n", i);
485: }
486: {
2.3 frystyk 487: char * fName = NULL;
2.1 frystyk 488: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
489: HTAssocList_addObject(newAL, fName, aValue);
490: HT_FREE(fName);
491: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
492: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
493: HT_FREE(fName);
494: }
495: } else {
2.3 frystyk 496: char * fName = NULL;
2.1 frystyk 497: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
498: HTAssocList_addObject(newAL, fName, aValue);
499: HT_FREE(fName);
500: }
501:
502: HT_FREE(sAttributeNamespace);
503:
504: /*
505: ** This call will try to see if the user is using
506: ** RDF look-alike elements from another namespace
507: **
508: ** Note: you can remove the call if you wish
509: */
510: #if 0
511: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
512: #endif
513:
514: } /* end of if */
515: i+=2;
516: } /* end of while */
517: } /* end of if atts */
518:
519: /*
520: * If we have parseType="Literal" set earlier, this element
521: * needs some additional attributes to make it stand-alone
522: * piece of XML
523: */
524: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 525: char * fName = NULL;
2.4 barstow 526:
2.1 frystyk 527: if (!sPrefix2) {
528: if (sNamespace)
529: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
530: StrAllocMCopy(&fName, "gen", sElementName, NULL);
531: newElement = HTElement_new(fName, newAL);
532: StrAllocCopy(newElement->m_sPrefix, "gen");
533: HT_FREE(fName);
534: } else {
2.3 frystyk 535: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 536: if (sAttributeNamespace) {
537: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
538: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
539: HT_FREE(fName);
540: }
541: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
542: newElement = HTElement_new(fName, newAL);
543: HT_FREE(fName);
544: }
545: } else {
2.3 frystyk 546: char * fName = NULL;
2.1 frystyk 547: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
548: newElement = HTElement_new(fName, newAL);
549: HT_FREE(fName);
550: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
551: }
552: HT_FREE(sElementName);
553: HT_FREE(sNamespace);
554: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 555:
2.1 frystyk 556: /*
557: ** Check parseType
558: */
559: {
2.3 frystyk 560: char * fName = NULL;
561: char * sLiteralValue = NULL;
2.1 frystyk 562: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
563: sLiteralValue = HTElement_getAttribute(newElement, fName);
564: HT_FREE(fName);
565: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
566: /**
567: * This is the management of the element where
568: * parseType="Literal" appears
569: *
570: * You should notice RDF V1.0 conforming implementations
571: * must treat other values than Literal and Resource as
572: * Literal. This is why the condition is !equals("Resource")
573: */
574:
575: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
576: if (!HTList_isEmpty(rdfp->m_elementStack)) {
577: HTElement *e = (HTElement *)
578: HTList_lastObject(rdfp->m_elementStack);
579: HTElement_addChild(e, newElement);
580: }
581: HTList_addObject(rdfp->m_elementStack, newElement);
582: HTList_addObject(rdfp->m_parseElementStack, newElement);
583: HT_FREE(rdfp->m_sLiteral);
584: StrAllocCopy(rdfp->m_sLiteral, "");
585: return;
586: }
587:
588: if (HTRDF_parseLiteral(rdfp)) {
589: /*
590: * This is the management of any element nested within
591: * a parseType="Literal" declaration
592: */
2.4 barstow 593: /* Add the element to the parser's literal buffer */
594: addMarkupStart (rdfp, name, atts);
595:
2.8 barstow 596: /* Add this literal element to the literal stack */
597: if (!HTList_isEmpty(rdfp->m_literalStack)) {
598: HTElement *e = (HTElement *)
599: HTList_lastObject(rdfp->m_literalStack);
600: HTElement_addChild(e, newElement);
601: }
602: HTList_addObject(rdfp->m_literalStack, newElement);
603:
2.1 frystyk 604: HTList_addObject(rdfp->m_elementStack, newElement);
605: return;
606: }
607:
608: /*
609: ** Update the containment hierarchy with the stack.
610: */
611: if (!HTList_isEmpty(rdfp->m_elementStack)) {
612: HTElement *e = (HTElement *)
613: HTList_lastObject(rdfp->m_elementStack);
614: HTElement_addChild(e, newElement);
615: }
616:
617: /*
618: ** Place the new element into the stack
619: */
620: HTList_addObject(rdfp->m_elementStack, newElement);
621: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
622: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
623: HTList_addObject(rdfp->m_parseElementStack, newElement);
624: HT_FREE(rdfp->m_sLiteral);
625: StrAllocCopy(rdfp->m_sLiteral, "");
626:
627: /*
628: * Since parseType="Resource" implies the following
629: * production must match Description, let's create
630: * an additional Description node here in the document tree.
631: */
632: {
2.3 frystyk 633: char * fName = NULL;
2.1 frystyk 634: HTElement *desc = NULL;
635: HTAssocList * al = HTAssocList_new ();
636: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
637: desc = HTElement_new(fName, al);
638: HT_FREE(fName);
639: if (!HTList_isEmpty(rdfp->m_elementStack)) {
640: HTElement *e = (HTElement *)
641: HTList_lastObject(rdfp->m_elementStack);
642: HTElement_addChild(e, desc);
643: }
644: HTList_addObject(rdfp->m_elementStack, desc);
645: }
646: } /* end of if */
647: } /* end of block */
648: } /* end of block */
649: }
650:
651: /*
652: * For each end of an element scope step back in the
653: * element and namespace stack
654: */
655: PRIVATE void XML_endElement (void * userData,
656: const XML_Char * name)
657: {
658: HTRDF * rdfp = (HTRDF *) userData;
659: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
660: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
661: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
662: if (namespaces) HTAssocList_delete(namespaces);
663:
664: if (bParseLiteral) {
665: HTElement *pe = (HTElement *)
666: HTList_lastObject(rdfp->m_parseElementStack);
667: if (pe != rdfp->m_root) {
2.4 barstow 668: /* Terminate the literal */
669: addMarkupEnd (rdfp, name);
2.1 frystyk 670: } else {
671: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
672: HTElement_addChild(pe, de);
2.4 barstow 673:
2.1 frystyk 674: HT_FREE(rdfp->m_sLiteral);
675: StrAllocCopy(rdfp->m_sLiteral, "");
676: HTList_removeLastObject(rdfp->m_parseElementStack);
677: HTList_removeLastObject(rdfp->m_parseTypeStack);
678: }
679: } else if (HTRDF_parseResource(rdfp)) {
680: /**
681: * If we are doing parseType="Resource"
682: * we need to explore whether the next element in
683: * the stack is the closing element in which case
684: * we remove it as well (remember, there's an
685: * extra Description element to be removed)
686: */
687: if (!HTList_isEmpty(rdfp->m_elementStack)) {
688: HTElement *pe = (HTElement *)
689: HTList_lastObject(rdfp->m_parseElementStack);
690: HTElement *e = (HTElement *)
691: HTList_lastObject(rdfp->m_elementStack);
692: if (pe == e) {
693: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
694: HTList_removeLastObject(rdfp->m_parseElementStack);
695: HTList_removeLastObject(rdfp->m_parseTypeStack);
696: }
697: }
698: }
699: }
700:
701: PRIVATE void XML_characterData (void * userData,
702: const XML_Char * s, int len)
703: {
704: /*
705: * Place all characters as Data instance to the containment
706: * hierarchy with the help of the stack.
707: */
708: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 709: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
710: char * tstr = NULL;
711: char * str = NULL;
712: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 713: HT_OUTOFMEM("XML_characterData");
714: strncpy(str, s, len);
715: str[len]='\0';
716: if (HTRDF_parseLiteral(rdfp)) {
717: StrAllocCat(rdfp->m_sLiteral, str);
718: HT_FREE(str);
719: return;
720: }
721: /* JUST FOR EXPAT */
722: {
723: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
724: if (lch && HTElement_instanceOfData(lch)) {
725: HTElement_addData(lch, str);
726: HT_FREE(str);
727: return;
728: }
729: }
730: /*
731: * Warning: this is not correct procedure according to XML spec.
732: * All whitespace matters!
733: */
734: tstr = trim(str);
735: if (strlen(tstr) > 0) {
736: HTElement * de = HTElement_new2(tstr);
737: HTElement_addChild(e, de);
738: }
739: HT_FREE(str); HT_FREE(tstr);
740: }
741:
742: PRIVATE void XML_processingInstruction (void * userData,
743: const XML_Char * target,
744: const XML_Char * data)
745: {
746: return;
747: }
748:
749: /*
750: ** This is called for any characters in the XML document for
751: ** which there is no applicable handler. This includes both
752: ** characters that are part of markup which is of a kind that is
753: ** not reported (comments, markup declarations), or characters
754: ** that are part of a construct which could be reported but
755: ** for which no handler has been supplied. The characters are passed
756: ** exactly as they were in the XML document except that
757: ** they will be encoded in UTF-8. Line boundaries are not normalized.
758: ** Note that a byte order mark character is not passed to the default handler.
759: ** If a default handler is set, internal entity references
760: ** are not expanded. There are no guarantees about
761: ** how characters are divided between calls to the default handler:
762: ** for example, a comment might be split between multiple calls.
763: */
764: PRIVATE void XML_default (void * userData,
765: const XML_Char * s, int len)
766: {
767: return;
768: }
769:
770: /*
771: ** This is called for a declaration of an unparsed (NDATA)
772: ** entity. The base argument is whatever was set by XML_SetBase.
773: ** The entityName, systemId and notationName arguments will never be null.
774: ** The other arguments may be.
775: */
776: PRIVATE void XML_unparsedEntityDecl (void * userData,
777: const XML_Char * entityName,
778: const XML_Char * base,
779: const XML_Char * systemId,
780: const XML_Char * publicId,
781: const XML_Char * notationName)
782: {
783: return;
784: }
785:
786: /*
787: ** This is called for a declaration of notation.
788: ** The base argument is whatever was set by XML_SetBase.
789: ** The notationName will never be null. The other arguments can be.
790: */
791: PRIVATE void XML_notationDecl (void * userData,
792: const XML_Char * notationName,
793: const XML_Char * base,
794: const XML_Char * systemId,
795: const XML_Char * publicId)
796: {
797: return;
798: }
799:
800: /*
801: ** This is called for a reference to an external parsed general entity.
802: ** The referenced entity is not automatically parsed.
803: ** The application can parse it immediately or later using
804: ** XML_ExternalEntityParserCreate.
805: ** The parser argument is the parser parsing the entity containing the reference;
806: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
807: ** The systemId argument is the system identifier as specified in the entity
808: ** declaration; it will not be null.
809: ** The base argument is the system identifier that should be used as the base for
810: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
811: ** it may be null.
812: ** The publicId argument is the public identifier as specified in the entity declaration,
813: ** or null if none was specified; the whitespace in the public identifier
814: ** will have been normalized as required by the XML spec.
815: ** The openEntityNames argument is a space-separated list of the names of the entities
816: ** that are open for the parse of this entity (including the name of the referenced
817: ** entity); this can be passed as the openEntityNames argument to
818: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
819: ** returns, so if the referenced entity is to be parsed later, it must be copied.
820: ** The handler should return 0 if processing should not continue because of
821: ** a fatal error in the handling of the external entity.
822: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
823: ** error.
824: ** Note that unlike other handlers the first argument is the parser, not userData.
825: */
826: PRIVATE int XML_externalEntityRef (XML_Parser parser,
827: const XML_Char * openEntityNames,
828: const XML_Char * base,
829: const XML_Char * systemId,
830: const XML_Char * publicId)
831: {
832: return 0;
833: }
834:
835: /*
836: ** This is called for an encoding that is unknown to the parser.
837: ** The encodingHandlerData argument is that which was passed as the
838: ** second argument to XML_SetUnknownEncodingHandler.
839: ** The name argument gives the name of the encoding as specified in
840: ** the encoding declaration.
841: ** If the callback can provide information about the encoding,
842: ** it must fill in the XML_Encoding structure, and return 1.
843: ** Otherwise it must return 0.
844: ** If info does not describe a suitable encoding,
845: ** then the parser will return an XML_UNKNOWN_ENCODING error.
846: */
847: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
848: const XML_Char * name,
849: XML_Encoding * info)
850: {
851: return 0;
852: }
853:
854: /* ------------------------------------------------------------------------- */
855: /* HTXML STREAM HANDLERS */
856: /* ------------------------------------------------------------------------- */
857:
858: PRIVATE void rdf_setHandlers (XML_Parser me)
859: {
860: XML_SetElementHandler(me, XML_startElement, XML_endElement);
861: XML_SetCharacterDataHandler(me, XML_characterData);
862: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
863: XML_SetDefaultHandler(me, XML_default);
864: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
865: XML_SetNotationDeclHandler(me, XML_notationDecl);
866: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
867: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
868: }
869:
870: PRIVATE void rdf_newInstance (HTStream * me,
871: HTRequest * request,
872: HTFormat target_format,
873: HTStream * target_stream,
874: XML_Parser xmlparser,
875: void * context)
876: {
877: if (me && xmlparser) {
878: rdf_setHandlers(xmlparser);
879: XML_SetUserData(xmlparser, context);
880:
881: /* Call the new RDF instance callback (if any) with this new stream */
882: if (RDFInstance)
883: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
884: }
885: }
886:
887: /* ------------------------------------------------------------------------- */
888: /* RDF PARSER */
889: /* ------------------------------------------------------------------------- */
890:
891: PRIVATE void visit_element_children (HTList *children)
892: {
893: HTElement *child = NULL;
894: HTList *cur = children;
895: while ((child = (HTElement *) HTList_nextObject(cur))) {
896: if (!HTList_isEmpty(child->m_children))
897: visit_element_children(child->m_children);
898: HTElement_delete(child);
899: }
900: }
901:
902: PRIVATE void delete_elements (HTRDF * me)
903: {
904: if (me && me->m_root) {
905: HTElement *r = me->m_root;
906: if (!HTList_isEmpty(r->m_children))
907: visit_element_children(r->m_children);
908: HTElement_delete(r);
909: }
910: }
911:
2.8 barstow 912: PRIVATE void delete_literal_elements (HTRDF * me)
913: {
914: if (me && me->m_literalStack) {
915: HTList *cur = me->m_literalStack;
916: HTElement *e = NULL;
917: while ((e = (HTElement *) HTList_nextObject(cur))) {
918: HTElement_delete(e);
919: }
920: HTList_delete(me->m_literalStack);
921: }
922: }
923:
924:
2.1 frystyk 925: PUBLIC HTRDF * HTRDF_new (void)
926: {
927: HTRDF * me;
928: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
929: HT_OUTOFMEM("HTRDF_new");
930: me->m_namespaceStack = HTList_new();
931: me->m_elementStack = HTList_new();
2.8 barstow 932: me->m_literalStack = HTList_new();
2.1 frystyk 933:
934: me->m_triples = HTList_new();
935: me->m_vAllNameSpaces = HTList_new();
936:
937: me->m_bCreateBags = FALSE;
938: me->m_bFetchSchemas = FALSE;
939:
940: me->m_parseTypeStack = HTList_new();
941: me->m_parseElementStack = HTList_new();
942:
943: me->m_vResources = HTList_new();
944: me->m_vResolveQueue = HTList_new();
945: me->m_hIDtable = HTHashtable_new(0);
946:
947: return me;
948: }
949:
950: PUBLIC BOOL HTRDF_delete (HTRDF * me)
951: {
952: if (me) {
953: delete_elements(me);
954: if (me->m_namespaceStack) {
955: HTList *cur = me->m_namespaceStack;
956: HTAssocList *alist = NULL;
957: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
958: HTAssocList_delete(alist);
959: }
960: HTList_delete(me->m_namespaceStack);
961: }
962: if (me->m_elementStack) HTList_delete(me->m_elementStack);
2.8 barstow 963:
964: delete_literal_elements(me);
965:
2.1 frystyk 966: me->m_root = NULL;
967: if (me->m_triples) {
968: HTList *cur = me->m_triples;
969: HTTriple *t = NULL;
970: while ((t = (HTTriple *) HTList_nextObject(cur))) {
971: /*HTTriple_print(t);*/
972: HTTriple_delete(t);
973: }
974: HTList_delete(me->m_triples);
975: }
976: HT_FREE(me->m_sSource);
977: if (me->m_vAllNameSpaces) {
978: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 979: char * s = NULL;
980: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 981: HT_FREE(s);
982: }
983: HTList_delete(me->m_vAllNameSpaces);
984: }
985: if (me->m_parseTypeStack)
986: HTList_delete(me->m_parseTypeStack);
987: if (me->m_parseElementStack)
988: HTList_delete(me->m_parseElementStack);
989: if (me->m_vResources)
990: HTList_delete(me->m_vResources);
991: if (me->m_vResolveQueue)
992: HTList_delete(me->m_vResolveQueue);
993: if (me->m_hIDtable)
994: HTHashtable_delete(me->m_hIDtable);
995: HT_FREE(me->m_sLiteral);
996: HT_FREE(me);
997: return YES;
998: }
999: return NO;
1000: }
1001:
1002: /*
1003: * setSource method saves the name of the source document for
1004: * later inspection if needed
1005: */
2.3 frystyk 1006: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 1007: {
1008: if (me && source) {
1009: StrAllocCopy (me->m_sSource, source);
1010: return YES;
1011: }
1012: return NO;
1013: }
1014:
1015: /*
1016: * Go through the m_vResolveQueue and assign
1017: * direct object reference for each symbolic reference
1018: */
1019: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
1020: {
1021: if (me) {
1022: HTList * cur = me->m_vResolveQueue;
1023: HTElement *e = NULL;
1024: HTElement *e2 = NULL;
1025: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1026: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1027: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1028: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1029: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1030: "aboutEachPrefix");
1031: if (sAbout) {
1032: if (sAbout[0]=='#')
1033: sAbout = &(sAbout[1]);
1034: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1035: if (e2)
1036: HTElement_addTarget(e, e2);
1037: else
1038: HTPrint("Unresolved internal reference %s\n", sAbout);
1039: }
1040: if (sResource) {
1041: if (sResource[0]=='#')
1042: sResource = &(sResource[1]);
1043: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1044: if (e2)
1045: HTElement_addTarget(e, e2);
1046: }
1047:
1048: if (sAboutEach) {
1049: sAboutEach = &(sAboutEach[1]);
1050: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1051: if (e2)
1052: HTElement_addTarget(e, e2);
1053: }
1054: if (sAboutEachPrefix) {
1055: HTList * curr = me->m_vResources;
1056: HTElement *ele = NULL;
1057: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1058: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1059: if (sA &&
1060: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1061: HTElement_addTarget(e, ele);
1062: }
1063: }
1064: }
1065: }
1066: HTList_delete(me->m_vResources);
1067: me->m_vResources = HTList_new();
1068: return YES;
1069: }
1070: return NO;
1071: }
1072:
1073: /**
1074: * Check if the element e is from the namespace
1075: * of the RDF schema by comparing only the beginning of
1076: * the expanded element name with the canonical RDFMS
1077: * URI
1078: */
1079: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1080: {
1081: return (me && e && e->m_sName) ?
1082: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1083: }
1084:
1085: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1086: {
1087: if (me && e && e->m_sName) {
1088: int len = strlen(e->m_sName);
1089: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1090: }
1091: return NO;
1092: }
1093:
1094: /**
1095: * Is the element a Description
1096: */
1097: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1098: {
1099: if (me && e && e->m_sName) {
1100: int len = strlen(e->m_sName);
1101: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1102: }
1103: return NO;
1104: }
1105:
1106: /*
1107: * Is the element a ListItem
1108: */
1109: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1110: {
1111: if (me && e && e->m_sName) {
1112: int len = strlen(e->m_sName);
1113: if (len > 2)
1114: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1115: }
1116: return NO;
1117: }
1118:
1119: /**
1120: * Is the element a Sequence
1121: */
1122: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1123: {
1124: if (me && e && e->m_sName) {
1125: int len = strlen(e->m_sName);
1126: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1127: }
1128: return NO;
1129: }
1130:
1131: /*
1132: * Is the element an Alternative
1133: */
1134: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1135: {
1136: if (me && e && e->m_sName) {
1137: int len = strlen(e->m_sName);
1138: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1139: }
1140: return NO;
1141: }
1142:
1143: /*
1144: * Is the element a Bag
1145: */
1146: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1147: {
1148: if (me && e && e->m_sName) {
1149: int len = strlen(e->m_sName);
1150: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1151: }
1152: return NO;
1153: }
1154:
1155: /**
1156: * Is the element a Container
1157: */
1158: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1159: {
1160: return (HTRDF_isSequence(me, e) ||
1161: HTRDF_isAlternative(me, e) ||
1162: HTRDF_isBag(me, e));
1163: }
1164:
1165: /*
1166: * This method matches all properties but those from RDF namespace
1167: */
1168: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1169: {
1170: if (me && e && e->m_sName) {
1171: int len = strlen(e->m_sName);
2.3 frystyk 1172: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1173: "value", "type", "Property", "Statement"};
1174: int i;
1175: if (HTRDF_isRDF(me, e)) {
1176: for(i = 0; i< 7; i++) {
1177: int ntp = strlen(tp[i]);
1178: if (len > ntp) {
1179: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1180: return YES;
1181: }
1182: }
1183: return NO;
1184: }
1185: if (len > 0) return YES;
1186: }
1187: return NO;
1188: }
1189:
2.3 frystyk 1190: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1191: int iCounter)
1192: {
1193: /*
1194: * Two different cases for
1195: * 1. LI element without content (resource available)
1196: * 2. LI element with content (resource unavailable)
1197: */
2.3 frystyk 1198: char * cName = NULL;
1199: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1200: char sdig[20];
1201: sprintf(sdig, "_%d", iCounter);
1202: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1203: if (sResource) {
1204: HTRDF_addTriple(me, cName, sID, sResource);
1205: /* validity checking */
1206: if (!HTList_isEmpty(listitem->m_children)){
1207: HTPrint("Listitem with resource attribute can not have child nodes");
1208: }
1209: StrAllocCopy(listitem->m_sID, sResource);
1210: } else {
1211: HTList *cur = listitem->m_children;
1212: HTElement *n = NULL;
1213: while ((n = (HTElement *) HTList_nextObject(cur))) {
1214: if (HTElement_instanceOfData(n)) {
1215: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1216: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1217: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1218: HTRDF_addTriple(me, cName, sID, sNodeID);
1219: StrAllocCopy(listitem->m_sID, sNodeID);
1220: } else if (HTRDF_isListItem(me, n)) {
1221: HTPrint("Can not nest list item inside list item\n");
1222: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1223: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1224: HTRDF_addTriple(me, cName, sID, n->m_sID);
1225: HT_FREE(c);
1226: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1227: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1228: HTRDF_addTriple(me, cName, sID, sNodeID);
1229: HT_FREE(sNodeID);
1230: }
1231: }
1232: }
1233: HT_FREE(cName);
1234: }
1235:
2.3 frystyk 1236: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1237: {
2.3 frystyk 1238: char * sID = NULL;
1239: char * tName = NULL;
1240: char * aName = NULL;
1241: char * sName = NULL;
1242: char * bName = NULL;
2.1 frystyk 1243: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1244: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1245: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1246: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1247:
1248: StrAllocCopy(sID, n->m_sID);
1249: if (!sID)
1250: sID = HTRDF_newReificationID(me);
1251: /*
1252: * Do the instantiation only once
1253: */
1254: if (!n->m_bDone) {
1255: if (HTRDF_isSequence(me, n)) {
1256: HTRDF_addTriple(me, tName, sID, sName);
1257: } else if (HTRDF_isAlternative(me, n)) {
1258: HTRDF_addTriple(me, tName, sID, aName);
1259: } else if (HTRDF_isBag(me, n)) {
1260: HTRDF_addTriple(me, tName, sID, bName);
1261: }
1262: n->m_bDone = YES;
1263: }
1264: HTRDF_expandAttributes(me, n, n);
1265:
1266: {
1267: HTList *cur = n->m_children;
1268: HTElement *n2 = NULL;
1269: int iCounter = 1;
1270: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1271: HTPrint("An RDF:Alt container must have at least one list item\n");
1272: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1273: if (HTRDF_isListItem(me, n2)) {
1274: HTRDF_processListItem(me, sID, n2, iCounter);
1275: iCounter++;
1276: } else {
1277: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1278: }
1279: }
1280: } /* end of block */
1281:
1282: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1283:
1284: return sID;
1285: }
1286: /*
1287: * Manage the typedNode production in the RDF grammar.
1288: *
1289: */
2.3 frystyk 1290: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1291: {
2.3 frystyk 1292: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1293: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1294: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1295: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1296: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1297: "aboutEachPrefix");*/
2.3 frystyk 1298: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1299: char * iName = NULL;
1300: char * bName = NULL;
1301: char * tName = NULL;
2.1 frystyk 1302:
2.3 frystyk 1303: char * sObject = NULL;
2.1 frystyk 1304:
1305: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1306: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1307: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1308:
1309: if (resource)
1310: HTPrint("resource attribute not allowed for a typedNode %s\n",
1311: typedNode->m_sName);
1312:
1313: /*
1314: * We are going to manage this typedNode using the processDescription
1315: * routine later on. Before that, place all properties encoded as
1316: * attributes to separate child nodes.
1317: */
1318: {
1319: HTAssoc * assoc;
1320: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1321: char * sAttribute = NULL;
1322: char * tValue = NULL;
1323: char * sValue = NULL;
2.1 frystyk 1324: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1325: sAttribute = HTAssoc_name(assoc);
1326: sValue = HTAssoc_value(assoc);
1327: tValue = trim(sValue);
1328: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1329: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1330: if (strlen(tValue) > 0) {
1331: HTAssocList *newAL = HTAssocList_new();
1332: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1333: HTElement *d = NULL;
1334: HTElement_addAttribute(newPredicate, iName,
1335: sAbout ? sAbout : sID);
1336: HTElement_addAttribute(newPredicate, bName, sBagID);
1337: d = HTElement_new2(tValue);
1338: HTElement_addChild(newPredicate, d);
1339: HTElement_addChild(typedNode, newPredicate);
1340: }
1341: }
1342: HT_FREE(tValue);
1343: } /* end of while */
1344: }/* end of block */
1345: {
1346: if (sAbout)
1347: StrAllocCopy(sObject, sAbout);
1348: else if (sID)
1349: StrAllocCopy(sObject, sID);
1350: else
1351: sObject = HTRDF_newReificationID(me);
1352: StrAllocCopy(typedNode->m_sID, sObject);
1353:
1354: /* special case: should the typedNode have aboutEach attribute,
1355: ** the type predicate should distribute to pointed
1356: ** collection also -> create a child node to the typedNode
1357: */
1358: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1359: HTAssocList *newAL = HTAssocList_new();
1360: HTElement *newPredicate = HTElement_new(tName, newAL);
1361: HTElement *d = HTElement_new2(typedNode->m_sName);
1362: HTElement_addChild(newPredicate, d);
1363: HTElement_addChild(typedNode, newPredicate);
1364: } else {
1365: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1366: }
1367: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1368: }/* end of block */
1369:
1370: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1371:
1372: return sObject;
1373: }
1374:
1375: /*
1376: * Start processing an RDF/XML document instance from the
1377: * root element rdf.
1378: *
1379: */
1380: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1381: {
1382: if (me && e) {
1383: HTList *cur = e->m_children;
1384: HTElement *ele = NULL;
1385: if (HTList_isEmpty(e->m_children)) {
1386: HTPrint("Empty RDF Element\n");
1387: return NO;
1388: }
1389: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1390: if (HTRDF_isDescription(me, ele)) {
1391: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1392: me->m_bCreateBags);
1393: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1394: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1395: HT_FREE(c);
1396: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1397: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1398: HT_FREE(t);
1399: }
1400: }
1401: return YES;
1402: }
1403: return NO;
1404: }
1405:
1406: /*
1407: * processPredicate handles all elements not defined as special
1408: * RDF elements.
1409: *
1410: * predicate The predicate element itself
1411: * description Context for the predicate
1412: * sTarget The target resource
1413: * reificate Should this predicate be reificated
1414: *
1415: * return the new ID which can be used to identify the predicate
1416: *
1417: */
2.3 frystyk 1418: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1419: HTElement * predicate,
1420: HTElement * description,
2.3 frystyk 1421: char * sTarget,
2.1 frystyk 1422: BOOL reificate)
1423: {
2.3 frystyk 1424: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1425: char * nsStatementID = NULL;
1426: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1427: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1428:
1429: /*
1430: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1431: ** or xmlns... -> generate new triples according to the spec.
1432: ** (See end of Section 6)
1433: */
1434: {
1435: HTElement * place_holder = NULL;
1436: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1437: char * fName = NULL;
1438: char * aName = NULL;
2.1 frystyk 1439:
1440: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1441: place_holder = HTElement_new(fName, newAL);
1442: HT_FREE(fName);
1443:
1444: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1445:
1446: /* error checking */
1447: if (!HTList_isEmpty(predicate->m_children)) {
1448: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1449: HTElement_delete(place_holder);
1450: return NULL;
1451: }
1452: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1453:
1454: /* determine the 'about' part for the new statements */
1455: if (sStatementID) {
1456: HTElement *data = HTElement_new2(sStatementID);
1457: HTElement_addAttribute(place_holder, aName, sStatementID);
1458:
1459: /* hack: make rdf:ID the value of the predicate */
1460: HTElement_addChild(predicate, data);
1461: } else if (sResource) {
1462: HTElement_addAttribute(place_holder, aName, sResource);
1463: } else {
1464: nsStatementID = HTRDF_newReificationID(me);
1465: HTElement_addAttribute(place_holder, aName, nsStatementID);
1466: HT_FREE(nsStatementID);
1467: }
1468: HT_FREE(aName);
1469:
1470: if (sBagID) {
1471: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1472: HTElement_addAttribute(place_holder, fName, sBagID);
1473: HT_FREE(fName);
1474: StrAllocCopy(place_holder->m_sBagID, sBagID);
1475: }
1476: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1477: } else {
1478:
1479: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1480: HTElement_delete(place_holder);
1481: }
1482: }
1483:
1484: /*
1485: ** Tricky part: if the resource attribute is present for a predicate
1486: ** AND there are no children, the value of the predicate is either
1487: ** 1. the URI in the resource attribute OR
1488: ** 2. the node ID of the resolved #resource attribute
1489: */
1490: if (sResource && HTList_isEmpty(predicate->m_children)) {
1491: if (!HTElement_target(predicate)) {
1492: if (reificate) {
1493: HT_FREE(nsStatementID);
1494: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1495: sTarget, sResource,
1496: predicate->m_sID);
1497: StrAllocCopy(predicate->m_sID, nsStatementID);
1498: } else {
1499: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1500: }
1501: } else {
1502: HTElement *target = HTElement_target(predicate);
1503: if (reificate) {
1504: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1505: sTarget,
1506: target->m_sID,
1507: predicate->m_sID);
1508: StrAllocCopy(predicate->m_sID, nsStatementID);
1509: } else {
1510: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1511: }
1512: }
2.7 barstow 1513: if (nsStatementID && predicate->m_sID)
1514: StrAllocCopy(nsStatementID, predicate->m_sID);
2.1 frystyk 1515: return nsStatementID;
1516: }
1517:
1518: /*
1519: ** Does this predicate make a reference somewhere using the
1520: ** sResource attribute
1521: */
1522: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1523: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1524: HTElement_target(predicate),
1525: YES, NO, NO);
1526: if (reificate) {
1527: HT_FREE(nsStatementID);
1528: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1529: sTarget, dStatementID,
1530: predicate->m_sID);
1531: StrAllocCopy(predicate->m_sID, nsStatementID);
1532: } else {
1533: StrAllocCopy(nsStatementID, dStatementID);
1534: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1535: }
1536: return nsStatementID;
1537: }
1538:
1539: /*
1540: ** Before looping through the children, let's check
1541: ** if there are any. If not, the value of the predicate is
1542: ** an anonymous node
1543: */
1544: {
1545: HTList *cur = predicate->m_children;
1546: BOOL bUsedTypedNodeProduction = NO;
1547: HTElement *n2;
2.7 barstow 1548: if (nsStatementID && sStatementID)
1549: StrAllocCopy(nsStatementID, sStatementID);
2.1 frystyk 1550: if (HTList_isEmpty(cur)) {
1551: if (reificate) {
2.3 frystyk 1552: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1553: HT_FREE(nsStatementID);
1554: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1555: sTarget, nr,
1556: predicate->m_sID);
1557: HT_FREE(nr);
1558: } else {
2.3 frystyk 1559: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1560: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1561: HT_FREE(nr);
1562: }
1563: }
1564: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1565: if (HTRDF_isDescription(me, n2)) {
1566: HTElement *d2 = n2;
2.3 frystyk 1567: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.7 barstow 1568: if (d2->m_sID && dStatementID && d2->m_sID != dStatementID)
1569: StrAllocCopy(d2->m_sID, dStatementID);
2.1 frystyk 1570:
1571: if (reificate) {
1572: HT_FREE(nsStatementID);
1573: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1574: sTarget, dStatementID,
1575: predicate->m_sID);
1576: } else {
1577: StrAllocCopy(nsStatementID, dStatementID);
1578: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1579: nsStatementID);
1580: }
1581: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1582: char * tValue = NULL;
1583: char * sValue = n2->m_sContent;
2.1 frystyk 1584: /* we've got real data */
1585: /*
1586: * Only if the content is not empty PCDATA (whitespace that is)
1587: * print the triple
1588: */
1589: tValue = trim(sValue);
1590: if (tValue && strlen(tValue) > 0) {
1591: if (reificate) {
1592: HT_FREE(nsStatementID);
1593: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1594: sTarget, tValue,
1595: predicate->m_sID);
1596: StrAllocCopy(predicate->m_sID, nsStatementID);
1597: } else {
1598: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1599: }
1600: }
1601: HT_FREE(tValue);
1602: } else if (HTRDF_isContainer(me, n2)) {
1603: HTElement *target = HTElement_target(description);
2.3 frystyk 1604: char * aboutTarget =
2.1 frystyk 1605: target ?
1606: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1607: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1608: StrAllocCopy(nsStatementID, sCollectionID);
1609: /* Attach the collection to the current predicate */
1610: if (target) {
1611: if (reificate) {
1612: HT_FREE(nsStatementID);
1613: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1614: aboutTarget,
1615: sCollectionID,
1616: predicate->m_sID);
1617: StrAllocCopy(predicate->m_sID, nsStatementID);
1618: } else {
1619: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1620: sCollectionID);
1621: }
1622: } else {
1623: if (reificate) {
1624: HT_FREE(nsStatementID);
1625: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1626: sTarget, sCollectionID,
1627: predicate->m_sID);
1628: StrAllocCopy(predicate->m_sID, nsStatementID);
1629: } else {
1630: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1631: sCollectionID);
1632: }
1633: }
1634: HT_FREE(sCollectionID);
1635: } else if (HTRDF_isTypedPredicate(me, n2)) {
1636: if (bUsedTypedNodeProduction) {
1637: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1638: } else {
1639: bUsedTypedNodeProduction = YES;
1640: }
1641: HT_FREE(nsStatementID);
1642: nsStatementID = HTRDF_processTypedNode(me, n2);
1643: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1644: }
1645: }
1646: return nsStatementID;
1647: } /* end of block */
1648: return NULL;
1649: }
1650:
1651: /*
1652: * processDescription manages Description elements
1653: *
1654: * description The Description element itself
1655: * inPredicate Is this is a nested description
1656: * reificate Do we need to reificate
1657: * createBag Do we create a bag container
1658: *
1659: * return An ID for the description
1660: *
1661: */
2.3 frystyk 1662: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1663: HTElement * description,
1664: BOOL inPredicate,
1665: BOOL reificate,
1666: BOOL createBag)
1667: {
1668: int iChildCount = 1;
1669: BOOL bOnce = YES;
1670:
2.3 frystyk 1671: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1672: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1673: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1674: "aboutEachPrefix");
2.3 frystyk 1675: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1676: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1677: HTElement *target = HTElement_target(description);
1678: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1679: BOOL targetIsContainer = NO;
2.3 frystyk 1680: char * sTargetAbout = NULL;
1681: char * sTargetBagID = NULL;
1682: char * sTargetID = NULL;
1683: char * dName = NULL;
1684: char * aName = NULL;
2.1 frystyk 1685:
1686: /*
1687: ** Return immediately if the description has already been managed
1688: */
1689: if (description->m_bDone) return description->m_sID;
1690:
1691: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1692: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1693:
1694: /*
1695: ** Determine what the target of the Description reference is
1696: */
1697: if (hasTarget) {
2.3 frystyk 1698: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1699: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1700: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1701: if (me->m_sSource && sTargetID2) {
1702: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1703: } else {
1704: StrAllocCopy(sTargetID, sTargetID2);
1705: }
1706: /*
1707: * Target is collection if
1708: * 1. it is identified with bagID attribute
1709: * 2. it is identified with ID attribute and is a collection
1710: */
1711: if (sTargetBagID && sAbout) {
1712: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1713: } else {
1714: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1715: HTRDF_isContainer(me, target))
1716: targetIsContainer = YES;
1717: }
1718: HT_FREE(sTargetID);
1719: }
1720:
1721: /*
1722: * Check if there are properties encoded using the abbreviated
1723: * syntax
1724: */
1725: HTRDF_expandAttributes(me, description, description);
1726:
1727: /*
1728: * Manage the aboutEach attribute here
1729: */
1730: if (sAboutEach && hasTarget) {
1731: if (HTRDF_isContainer(me, target)) {
1732: HTList *cur = target->m_children;
1733: HTElement *ele = NULL;
1734: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1735: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1736: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1737: if (sResource) {
1738: HTElement * newDescription = NULL;
1739: HTElement * ele2;
1740: HTList * cur2 = description->m_children;
1741:
1742: /*
1743: * Manage <li resource="..." /> case
1744: */
1745: if (sResource) {
1746: HTAssocList *newAL = HTAssocList_new();
1747: newDescription = HTElement_new(dName, newAL);
1748: HTElement_addAttribute(newDescription, aName, sResource);
1749: }
1750:
1751: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1752: if (newDescription) HTElement_addChild(newDescription, ele2);
1753: }
1754:
1755: if (newDescription)
1756: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1757:
1758: /* Not needed anymore */
1759: HTElement_delete(newDescription);
1760:
1761: } else {
1762: /**
1763: * Otherwise we have a structured value inside <li>
1764: *
1765: * loop through the children of <li>
1766: * (can be only one)
1767: */
1768: HTList *cur2 = ele->m_children;
1769: HTElement *ele2 = NULL;
1770: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1771: HTAssocList *newAL = HTAssocList_new();
1772: HTElement *newNode = HTElement_new(dName, newAL);
1773: HTList *cur3 = description->m_children;
1774: HTElement *ele3 = NULL;
1775: /* loop through the items in the
1776: * description with aboutEach
1777: * and add them to the target
1778: */
1779: while ((ele3 = (HTElement *)
1780: HTList_nextObject(cur3))) {
1781: HTElement_addChild(newNode, ele3);
1782: }
1783: HTElement_addTarget(newNode, ele2);
1784: HTRDF_processDescription(me, newNode, YES, NO, NO);
1785: }
1786: }
1787: } else if (HTRDF_isTypedPredicate(me, ele)) {
1788: HTAssocList *newAL = HTAssocList_new();
1789: HTElement *newNode = HTElement_new(dName, newAL);
1790: HTList *cur2 = description->m_children;
1791: HTElement *ele2 = NULL;
1792: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1793: HTElement_addChild(newNode, ele2);
1794: }
1795: HTElement_addTarget(newNode, ele);
1796: HTRDF_processDescription(me, newNode, YES, NO, NO);
1797: }
1798: } /* end of while */
1799: } else if (HTRDF_isDescription(me, target)) {
1800: HTList *cur = target->m_children;
1801: HTElement *ele = NULL;
1802: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1803: HTAssocList *newAL = HTAssocList_new();
1804: HTElement *newNode = HTElement_new(dName, newAL);
1805: HTList *cur2 = description->m_children;
1806: HTElement *ele2 = NULL;
1807: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1808: HTElement_addChild(newNode, ele2);
1809: }
1810: HTElement_addTarget(newNode, ele);
1811: HTRDF_processDescription(me, newNode, YES, NO, NO);
1812: } /* end of while */
1813: }
1814:
1815: HT_FREE(dName);
1816: HT_FREE(aName);
1817: return NULL;
1818: }
1819:
1820: /*
1821: * Manage the aboutEachPrefix attribute here
1822: */
1823: if (sAboutEachPrefix) {
1824: if (hasTarget) {
1825: HTList *cur = description->m_vTargets;
1826: HTElement *target = NULL;
1827: while ((target = (HTElement *) HTList_nextObject(cur))) {
1828: HTList *cur2 = description->m_children;
1829: HTElement *ele2 = NULL;
1830: HTElement *newDescription = NULL;
1831: HTAssocList *newAL = HTAssocList_new();
1832: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1833: newDescription = HTElement_new(dName, newAL);
1834: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1835: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1836: HTElement_addChild(newDescription, ele2);
1837: }
1838: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1839: }
1840: }
1841:
1842: HT_FREE(dName);
1843: HT_FREE(aName);
1844: return NULL;
1845: }
1846: /*
1847: * Enumerate through the children
1848: */
1849: {
1850: HTList *cur = description->m_children;
1851: HTElement *n = NULL;
1852: while ((n = (HTElement *) HTList_nextObject(cur))) {
1853: if (HTRDF_isDescription(me, n))
1854: HTPrint("Can not nest Description inside Description\n");
1855: else if (HTRDF_isListItem(me, n))
1856: HTPrint("Can not nest List Item inside Description\n");
1857: else if (HTRDF_isContainer(me, n))
1858: HTPrint("Can not nest Container inside Description\n");
1859: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1860: char * sChildID = NULL;
2.1 frystyk 1861: if (hasTarget && targetIsContainer) {
1862: sChildID = HTRDF_processPredicate(me, n, description,
1863: target->m_sBagID ?
1864: target->m_sBagID :
1865: target->m_sID, NO);
1866: StrAllocCopy(description->m_sID, sChildID);
1867: createBag = NO;
1868: } else if (hasTarget) {
1869: sChildID = HTRDF_processPredicate(me, n, description,
1870: target->m_sBagID ?
1871: target->m_sBagID :
1872: target->m_sID, reificate);
1873: StrAllocCopy(description->m_sID, sChildID);
1874: } else if (!hasTarget && !inPredicate) {
1875: if (!description->m_sID) {
2.3 frystyk 1876: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1877: StrAllocCopy(description->m_sID, nr);
1878: HT_FREE(nr);
1879: }
1880: if (!sAbout) {
1881: if (sID)
1882: sAbout = sID;
1883: else
1884: sAbout = description->m_sID;
1885: }
1886: sChildID = HTRDF_processPredicate(me, n, description,
1887: sAbout, sBagid ?
1888: YES : reificate);
1889:
1890: } else if (!hasTarget && inPredicate) {
1891: if (!sAbout) {
1892: if (sID) {
1893: StrAllocCopy(description->m_sID, sID);
1894: sAbout = sID;
1895: } else {
1896: if (!description->m_sID) {
2.3 frystyk 1897: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1898: StrAllocCopy(description->m_sID, nr);
1899: HT_FREE(nr);
1900: }
1901: sAbout = description->m_sID;
1902: }
1903: } else {
2.7 barstow 1904: if (description->m_sID != sAbout)
1905: StrAllocCopy(description->m_sID, sAbout);
2.1 frystyk 1906: }
1907: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1908: }
1909: /*
1910: * Each Description block creates also a Bag node which
1911: * has links to all properties within the block IF
1912: * the m_bCreateBags variable is true
1913: */
1914: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1915: char * sNamespace = RDFMS;
2.1 frystyk 1916: if (bOnce && sChildID) {
2.3 frystyk 1917: char * tName = NULL;
1918: char * bName = NULL;
2.1 frystyk 1919: bOnce = NO;
1920: if (!description->m_sBagID) {
2.3 frystyk 1921: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1922: StrAllocCopy(description->m_sBagID, nr);
1923: HT_FREE(nr);
1924: }
1925: if (!description->m_sID)
1926: StrAllocCopy(description->m_sID,
1927: description->m_sBagID);
1928: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1929: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1930: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1931: HT_FREE(tName);
1932: HT_FREE(bName);
1933:
1934: }
1935: if (sChildID) {
2.3 frystyk 1936: char * tName = NULL;
2.1 frystyk 1937: char si[20];
1938: sprintf(si, "%d", iChildCount);
1939: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1940: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1941: iChildCount++;
1942: HT_FREE(tName);
1943: }
1944: }
1945: HT_FREE(sChildID);
1946: }
1947: }
1948: } /* end of block*/
1949:
1950: description->m_bDone = YES;
1951:
1952: HT_FREE(dName);
1953: HT_FREE(aName);
1954: return (description->m_sID);
1955: }
1956:
1957: /*
1958: * Given an XML document (well-formed HTML, for example),
1959: * look for a suitable element to start parsing from
1960: *
1961: */
1962: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1963: {
1964: if (me && ele) {
1965: if (HTRDF_isRDF(me, ele)) {
1966: if (HTRDF_isRDFroot(me, ele)) {
1967: HTRDF_processRDF(me, ele);
1968: } else if (HTRDF_isDescription(me, ele)) {
1969: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1970: me->m_bCreateBags);
1971: }
1972: } else {
1973: HTList *cur = ele->m_children;
1974: HTElement *child = NULL;
1975: while ((child = (HTElement *) HTList_nextObject(cur))) {
1976: HTRDF_processXML(me, child);
1977: }
1978: }
1979:
1980: /* MISSING RECURSION */
1981:
1982: return YES;
1983: }
1984: return NO;
1985: }
1986:
1987: /*
1988: * Return the root element pointer. This requires the parsing
1989: * has been already done.
1990: */
1991: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1992: {
1993: return me ? me->m_root : NULL;
1994: }
1995:
1996: /*
1997: * Return the full namespace URI for a given prefix sPrefix.
1998: * The default namespace is identified with xmlns prefix.
1999: * The namespace of xmlns attribute is an empty string.
2000: */
2001:
2.3 frystyk 2002: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 2003: {
2.3 frystyk 2004: char * nPrefix = NULL;
2.1 frystyk 2005: HTAssocList * calist;
2006: HTList * cur = me->m_namespaceStack;
2007:
2008: if (!sPrefix)
2009: StrAllocCopy(nPrefix, "xmlns");
2010:
2011: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 2012: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 2013: if (sValue) {
2014: StrAllocCopy(nPrefix, sValue);
2015: return nPrefix;
2016: }
2017: }
2018: /*
2019: * Give error only if
2020: * 1. the prefix is not from the reserved xml namespace
2021: * 2. the prefix is not xmlns which is to look for the default
2022: * namespace
2023: */
2024: if (!strcmp(sPrefix, XMLSCHEMA)) {
2025: StrAllocCopy(nPrefix, sPrefix);
2026: return nPrefix;
2027: } else if (!strcmp(sPrefix, "xmlns")) {
2028: StrAllocCopy(nPrefix, "");
2029: return nPrefix;
2030: } else
2031: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2032:
2033: StrAllocCopy(nPrefix, "");
2034: return nPrefix;
2035: }
2036:
2037: /*
2038: * Methods to determine whether we are parsing
2039: * parseType="Literal" or parseType="Resource"
2040: */
2041:
2042: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2043: {
2044: HTElement *e = NULL;
2045: HTList *cur = me->m_elementStack;
2046: if (!HTList_isEmpty(me->m_elementStack)) {
2047: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2048: char * sParseType = NULL;
2.1 frystyk 2049: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2050: if (sParseType) {
2051: if (strcmp(sParseType, "Resource"))
2052: return YES;
2053: }
2054: }
2055: }
2056: return NO;
2057: }
2058:
2059: /*
2060: * Methods to determine whether we are parsing
2061: * parseType="Literal" or parseType="Resource"
2062: */
2063:
2064: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2065: {
2066: HTElement *e = NULL;
2067: HTList *cur = me->m_elementStack;
2068: if (!HTList_isEmpty(me->m_elementStack)) {
2069: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2070: char * sParseType = NULL;
2.1 frystyk 2071: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2072: if (sParseType) {
2073: if (!strcmp(sParseType, "Resource"))
2074: return YES;
2075: }
2076: }
2077: }
2078: return NO;
2079: }
2080: /*
2081: * checkAttributes goes through the attributes of element e<
2082: * to see
2083: * 1. if there are symbolic references to other nodes in the data model.
2084: * in which case they must be stored for later resolving with
2085: * resolveLater method.
2086: * 2. if there is an identity attribute, it is registered using
2087: * registerResource or registerID method.
2088: *
2089: */
2090:
2091: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2092: {
2093: {
2.3 frystyk 2094: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2095:
2096: if (sResource && sResource[0] == '#')
2097: HTRDF_resolveLater(me, e);
2098: }
2099: {
2.3 frystyk 2100: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2101:
2102: if (sAboutEach && sAboutEach[0] == '#')
2103: HTRDF_resolveLater(me, e);
2104: }
2105: {
2.3 frystyk 2106: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2107: "aboutEachPrefix");
2108:
2109: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2110: HTRDF_resolveLater(me, e);
2111: }
2112: {
2.3 frystyk 2113: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2114: if (sAbout) {
2115: if (sAbout[0] == '#')
2116: HTRDF_resolveLater(me, e);
2117: else
2118: HTRDF_registerResource(me, e);
2119: }
2120: }
2121:
2122: {
2.3 frystyk 2123: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2124:
2125: if (sBagID) {
2126: HTRDF_registerID(me, sBagID, e);
2127: StrAllocCopy(e->m_sBagID, sBagID);
2128: }
2129: }
2130: {
2.3 frystyk 2131: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2132: if (sID) {
2133: HTRDF_registerID(me, sID, e);
2134: StrAllocCopy(e->m_sID, sID);
2135: }
2136: }
2137: }
2138: /*
2139: * Add the element e to the m_vResolveQueue
2140: * to be resolved later.
2141: */
2142: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2143: {
2144: HTList_addObject(me->m_vResolveQueue, e);
2145: }
2146: /*
2147: * Add an element e to the Hashtable m_hIDtable
2148: * which stores all nodes with an ID
2149: */
2150:
2.3 frystyk 2151: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2152: {
2153: if (HTHashtable_object(me->m_hIDtable, sID))
2154: HTPrint("Node ID %s redefined", sID);
2155: HTHashtable_addObject(me->m_hIDtable, sID, e);
2156: }
2157: /*
2158: * Add an element e to the Vector m_vResources
2159: * which stores all nodes with an URI
2160: */
2161: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2162: {
2163: HTList_addObject(me->m_vResources, e);
2164: }
2165:
2166: /*
2167: * Look for a node by name sID from the Hashtable
2168: * m_hIDtable of all registered IDs.
2169: */
2170:
2.3 frystyk 2171: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2172: {
2173: if (sID)
2174: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2175: return NULL;
2176: }
2177:
2178: /*
2179: ** Special method to deal with rdf:resource attribute
2180: */
2.3 frystyk 2181: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2182: {
2.3 frystyk 2183: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2184: if (sResource != NULL && sResource[0] == '\0')
2185: sResource = me->m_sSource;
2186: return sResource;
2187: }
2188:
2189: /*
2190: ** Take an element ele with its parent element parent
2191: ** and evaluate all its attributes to see if they are non-RDF specific
2192: ** and non-XML specific in which case they must become children of
2193: ** the ele node.
2194: */
2195: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2196: {
2197: BOOL foundAbbreviation = NO;
2.3 frystyk 2198: char * sAttribute = NULL;
2199: char * sValue = NULL;
2.1 frystyk 2200: HTAssoc * assoc;
2201: HTAssocList * cur = ele->m_attributes;
2202: int lxmlschema = strlen(XMLSCHEMA);
2203: int lrdfms = strlen(RDFMS);
2204:
2205: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2206: int latt;
2207: sAttribute = HTAssoc_name(assoc);
2208: sValue = HTAssoc_value(assoc);
2.4 barstow 2209:
2.1 frystyk 2210: latt = strlen(sAttribute);
2211: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2212: continue;
2213:
2214: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2215: (sAttribute[lrdfms]!='_') &&
2216: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2217: strcmp(&(sAttribute[latt-4]), "type"))
2218: continue;
2219:
2220: if (strlen(sValue) > 0) {
2221: HTAssocList * newAL = HTAssocList_new();
2222: HTElement * newElement = HTElement_new(sAttribute, newAL);
2223: HTElement * newData = HTElement_new2(sValue);
2224: HTElement_addChild(newElement, newData);
2225: HTElement_addChild(parent, newElement);
2226: foundAbbreviation = YES;
2227: }
2228: }
2229: return foundAbbreviation;
2230: }
2231:
2232: /**
2233: * Create a new reification ID by using a name part and an
2234: * incremental counter m_iReificationCounter.
2235: */
2.3 frystyk 2236: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2237: {
2.3 frystyk 2238: char * nsid = NULL;
2.1 frystyk 2239: char nsrc[20];
2240: me->m_iReificationCounter++;
2241: sprintf(nsrc, "%d", me->m_iReificationCounter);
2242: if (!me->m_sSource) {
2243: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2244: } else {
2245: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2246: }
2247: return nsid;
2248: }
2249:
2250: /*
2251: * reificate creates one new node and four new triples
2252: * and returns the ID of the new node
2253: */
2254:
2.3 frystyk 2255: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2256: char * sObject, char * sNodeID)
2.1 frystyk 2257: {
2.3 frystyk 2258: char * sName = NULL;
2259: char * pName = NULL;
2260: char * oName = NULL;
2261: char * tName = NULL;
2262: char * stName = NULL;
2263: char * tNodeID = NULL;
2.1 frystyk 2264:
2265: if (!sNodeID)
2266: tNodeID = HTRDF_newReificationID(me);
2267: else
2268: StrAllocCopy(tNodeID, sNodeID);
2269:
2270: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2271: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2272: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2273: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2274: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2275:
2276: /*
2277: * The original statement must remain in the data model
2278: */
2279: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2280:
2281: /*
2282: * Do not reificate reificated properties
2283: */
2284: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2285: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2286:
2287: /* Reificate by creating 4 new triples */
2288: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2289: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2290: HTRDF_addTriple(me, oName, tNodeID, sObject);
2291: HTRDF_addTriple(me, tName, tNodeID, stName);
2292: } else
2293: HT_FREE(tNodeID);
2294:
2295: HT_FREE(sName);
2296: HT_FREE(pName);
2297: HT_FREE(oName);
2298: HT_FREE(tName);
2299: HT_FREE(stName);
2300:
2301: return tNodeID;
2302: }
2303: /*
2304: * Create a new triple and add it to the m_triples List
2305: * Send the triple to the Output stream
2306: */
2307:
2.3 frystyk 2308: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2309: char * sObject)
2.1 frystyk 2310: {
2311: HTTriple *t = NULL;
2312:
2313: /*
2314: * If there is no subject (about=""), then use the URI/filename where
2315: * the RDF description came from
2316: */
2317: if (!sPredicate || !sSubject || !sObject) {
2318: HTPrint("Predicate %s when subject %s and object %s \n",
2319: sPredicate ? sPredicate : "null",
2320: sSubject ? sSubject : "null",
2321: sObject ? sObject : "null");
2322: return;
2323: }
2324:
2325: if (sSubject[0]=='\0')
2326: sSubject = me->m_sSource;
2327:
2328: t = HTTriple_new(sPredicate, sSubject, sObject);
2329:
2330: /* Call the triple callback handler (if any) with this new triple */
2331: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2332:
2333: HTList_addObject(me->m_triples, t);
2334: }
2335:
2336: /*
2337: * createBags method allows one to determine whether SiRPAC
2338: * produces Bag instances for each Description block.
2339: * The default setting is not to generate them.
2340: */
2341:
2342: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2343: {
2344: if (me)
2345: me->m_bCreateBags = b;
2346: }
2347:
2348: /*
2349: Set output stream for RDF parser
2350: */
2351:
2352: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2353: {
2354: if (me)
2355: me->ostream = ostream;
2356: }
2357:
2358: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2359: {
2360: if (me) {
2361: me->newTripleInstance = cbf;
2362: me->tripleContext = context;
2363: return YES;
2364: }
2365: return NO;
2366: }
2367:
2368: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2369: {
2370: RDFInstance = me;
2371: RDFInstanceContext = context;
2372: return YES;
2373: }
2374:
2375: /* ------------------------------------------------------------------------- */
2376: /* HTRDFTriples STREAM HANDLERS */
2377: /* ------------------------------------------------------------------------- */
2378:
2379: PRIVATE int generate_triples(HTStream *me)
2380: {
2381: HTRDF *rdfp = me ? me->rdfparser : NULL;
2382: if (rdfp) {
2383:
2384: HTRDF_resolve(rdfp);
2385:
2386: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2387:
2388: return HT_OK;
2389: }
2390: return HT_ERROR;
2391: }
2392:
2393: PRIVATE int HTRDFTriples_flush (HTStream * me)
2394: {
2395: if (me->target)
2396: return (*me->target->isa->flush)(me->target);
2397: return HT_OK;
2398: }
2399:
2400: PRIVATE int HTRDFTriples_free (HTStream * me)
2401: {
2402: int status = HT_OK;
2403:
2404: status = generate_triples(me);
2405:
2406: HTRDF_delete(me->rdfparser);
2407:
2408: if (me->target) {
2409: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2410: return HT_WOULD_BLOCK;
2411: }
2412: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2413: HT_FREE(me);
2414: return status;
2415: }
2416:
2417: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2418: {
2419: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2420: HTRDF_delete(me->rdfparser);
2421: if (me->target)
2422: (*me->target->isa->abort)(me->target, NULL);
2423: HT_FREE(me);
2424: return HT_ERROR;
2425: }
2426:
2427: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2428: {
2429: return HT_OK;
2430: }
2431:
2432: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2433: {
2434: return HTRDFTriples_write(me, &c, 1);
2435: }
2436:
2437: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2438: {
2439: return HTRDFTriples_write(me, s, (int) strlen(s));
2440: }
2441:
2442: PRIVATE const HTStreamClass HTRDFTriplesClass =
2443: {
2444: "rdf",
2445: HTRDFTriples_flush,
2446: HTRDFTriples_free,
2447: HTRDFTriples_abort,
2448: HTRDFTriples_putCharacter,
2449: HTRDFTriples_putString,
2450: HTRDFTriples_write
2451: };
2452:
2453: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2454: void * param,
2455: HTFormat input_format,
2456: HTFormat output_format,
2457: HTStream * output_stream)
2458: {
2459: HTStream * me = NULL;
2460: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2461: HT_OUTOFMEM("HTRDFTriples_new");
2462: me->isa = &HTRDFTriplesClass;
2463: me->state = HT_OK;
2464: me->request = request;
2465: me->target = output_stream ? output_stream : HTErrorStream();
2466:
2467: /* Now create the RDF parser instance */
2468: if ((me->rdfparser = HTRDF_new()) == NULL) {
2469: HT_FREE(me);
2470: return HTErrorStream();
2471: }
2472:
2473: /* Set the source (I guess mostly to follow SiRPAC API) */
2474: {
2475: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2476: HTRDF_setSource(me->rdfparser, uri);
2477: HT_FREE(uri);
2478: }
2479:
2480: /* Where are we putting data? */
2481: HTRDF_setOutputStream(me->rdfparser, me);
2482:
2483: /* If you want to create Bags, change it to YES */
2484: HTRDF_createBags(me->rdfparser, NO);
2485:
2486: /* Register our new XML Instance handler */
2487: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2488: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2489:
2490: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2491:
2492: return me;
2493: }
2494:
2495: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2496: void * param,
2497: HTFormat input_format,
2498: HTFormat output_format,
2499: HTStream * output_stream)
2500: {
2501: return HTXML_new(request, param, input_format, output_format,
2502: RDFParser_new(request, param, input_format, output_format, output_stream));
2503: }
2504:
2505: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2506: {
2507: if (rdfp && t) {
2508: HTStream *ostream = rdfp->ostream;
2509: if (ostream) {
2510: PUTC(ostream,'(');
2511: PUTS(ostream, t->m_sPredicate);
2512: PUTC(ostream,',');
2513: PUTS(ostream, t->m_sSubject);
2514: PUTC(ostream,',');
2515: PUTS(ostream, t->m_sObject);
2516: PUTC(ostream,')');
2517: PUTC(ostream,'\n');
2518: }
2519: }
2520: }
2521:
2522: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2523: void * param,
2524: HTFormat input_format,
2525: HTFormat output_format,
2526: HTStream * output_stream)
2527: {
2.2 frystyk 2528: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2529: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2530:
2531: /* Register our own tripple instance handler */
2532: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2533:
2534: /* Create an XML parser instance and return */
2535: return HTXML_new(request, param, input_format, output_format, me);
2536: }
2537:
2.9 barstow 2538: /*
2539: ** This function initializes the XML parser and RDF parsers
2540: ** that are used to synchronously parse a file of RDF or a
2541: ** a buffer of RDF.
2542: **
2.11 barstow 2543: ** Parameters:
2544: ** xmlparser - MODIFIED the XML parser to create. The caller is
2545: ** responsible for free'ing this pointer.
2546: ** rdfparser - MODIFIED the RDF parser to create. The caller is
2547: ** responsible for free'ing this pointer.
2548: ** stream - MODIFIED the HTStream needed by the RDF parser. The
2549: ** caller is responsible for free'ing this pointer.
2550: ** uri - the URI created from name. It is used by the RDF parser
2551: ** when creating anonymous node names. The caller is
2552: ** responsible for freeing this pointer.
2553: ** new_triple_callback - the callback invoked when a new triple
2554: ** is created. If NULL, the default handler will be invoked.
2.12 ! barstow 2555: ** context - a void pointer to pass to the new_triple_callback.
! 2556: ** Should set NULL if no data needs to be passed to the
! 2557: ** callback.
2.11 barstow 2558: ** name - the file name or buffer name to be used when the RDF
2559: ** parser needs a document name
2560: **
2561: ** Returns:
2562: ** YES if the initialization succeeds; otherwise NO is returned
2563: ** and an error message is logged.
2.9 barstow 2564: **/
2565:
2.11 barstow 2566: PRIVATE BOOL initialize_parsers(XML_Parser *xmlparser, HTRDF **rdfparser,
2.9 barstow 2567: HTStream **stream, char **uri, HTTripleCallback_new * new_triple_callback,
2.12 ! barstow 2568: void *context, const char * name)
2.9 barstow 2569: {
2570: /* Create an XML parser */
2571: #ifdef USE_NS
2572: *xmlparser = XML_ParserCreateNS (NULL, ':');
2573: #else
2574: *xmlparser = XML_ParserCreate (NULL);
2575: #endif /* USE_NS */
2576:
2.11 barstow 2577: if (!*xmlparser) {
2578: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for XML parser.\n");
2579: return NO;
2580: }
2.9 barstow 2581:
2582: /* We need also need RDF parser to create the triples */
2583: *rdfparser = HTRDF_new();
2584: if (!*rdfparser) {
2.11 barstow 2585: XML_ParserFree(*xmlparser);
2586: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for RDF parser.\n");
2587: return NO;
2.9 barstow 2588: }
2589:
2590: /* Must construct a URI from name for the parser */
2591: *uri = HTLocalToWWW (name, "file:");
2592: HTRDF_setSource(*rdfparser, *uri);
2593:
2594: HTRDF_createBags(*rdfparser, NO);
2595:
2596: if (new_triple_callback)
2.12 ! barstow 2597: HTRDF_registerNewTripleCallback(*rdfparser, new_triple_callback, context);
2.9 barstow 2598: else
2.12 ! barstow 2599: HTRDF_registerNewTripleCallback(*rdfparser, triple_newInstance, context);
2.9 barstow 2600:
2601: rdf_setHandlers(*xmlparser);
2602: XML_SetUserData(*xmlparser, *rdfparser);
2603:
2604: /* Create a stream to be used to process the triple output */
2605: if ((*stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
2606: HT_FREE(*uri);
2607: XML_ParserFree(*xmlparser);
2608: HTRDF_delete(*rdfparser);
2.11 barstow 2609: HTTRACE(XML_TRACE, "RDF_Parser. Could not allocate memory for HTStream.\n");
2610: return NO;
2.9 barstow 2611: }
2612: (*stream)->isa = &HTRDFTriplesClass;
2613: (*stream)->state = HT_OK;
2614: (*stream)->request = NULL; /* Don't have a request */
2615: (*stream)->target = NULL; /* Don't have another stream */
2616: (*stream)->rdfparser = *rdfparser;
2617:
2.11 barstow 2618: return YES;
2.9 barstow 2619: }
2620:
2.10 kahan 2621: /* HTRDF_parseFile
2.6 kahan 2622: ** ---------------
2623: ** This function parses a file of RDF in a synchronous, non-blocking
2.9 barstow 2624: ** way. In other words, the file is not asynchronously loaded.
2625: **
2.10 kahan 2626: ** Parameters:
2627: ** file_name the name of the file to parse
2628: ** new_triple_callback the callback that is invoked when a triple
2629: ** is created. If NULL, the default triple handler is
2630: ** invoked.
2.12 ! barstow 2631: ** context - a void pointer to pass to the new_triple_callback.
! 2632: ** Should set NULL if no data needs to be passed to the
! 2633: ** callback.
2.10 kahan 2634: ** Returns:
2.11 barstow 2635: ** Returns YES if the file is successfully parsed; otherwise NO is
2636: ** returned and an error message is logged.
2.6 kahan 2637: */
2638:
2.12 ! barstow 2639: PUBLIC BOOL HTRDF_parseFile (const char *file_name,
! 2640: HTTripleCallback_new * new_triple_callback, void *context)
2.5 barstow 2641: {
2642: char buff[512]; /* the file input buffer */
2643: FILE *fp;
2644: XML_Parser xmlparser;
2645: HTRDF *rdfparser;
2646: HTStream * stream = NULL;
2.6 kahan 2647: char *uri = NULL;
2.11 barstow 2648: BOOL status;
2.5 barstow 2649:
2650: /* Sanity check */
2.6 kahan 2651: if (!file_name) {
2.11 barstow 2652: HTTRACE(XML_TRACE, "HTRDF_parseFile. file_name is NULL\n");
2653: return NO;
2.6 kahan 2654: }
2655:
2.5 barstow 2656: /* If the file does not exist, return now */
2657: fp = fopen (file_name, "r");
2.6 kahan 2658: if (!fp) { /* annotation index file doesn't exist */
2.11 barstow 2659: HTTRACE(XML_TRACE, "HTRDF_parseFile. File open failed.");
2660: return NO;
2.6 kahan 2661: }
2.5 barstow 2662:
2.11 barstow 2663: /* Initialize the XML and RDF parsers */
2664: status = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri,
2.12 ! barstow 2665: new_triple_callback, context, file_name);
2.11 barstow 2666: if (!status) {
2.6 kahan 2667: fclose (fp);
2.11 barstow 2668: return NO;
2.5 barstow 2669: }
2670:
2671: /*
2672: * The parsing occurs on one read buffer at a time instead of
2673: * reading everything into memory and then parsing
2674: */
2675: for (;;) {
2676: int done;
2677: int buff_len;
2678: fgets(buff, sizeof(buff), fp);
2679: if (ferror(fp)) {
2.6 kahan 2680: HT_FREE(uri);
2.5 barstow 2681: fclose (fp);
2682: XML_ParserFree(xmlparser);
2683: HTRDF_delete(rdfparser);
2684: HT_FREE(stream);
2.11 barstow 2685: HTTRACE(XML_TRACE, "HTRDF_parseFile. Error reading file.");
2686: return NO;
2.5 barstow 2687: }
2688: done = feof(fp);
2689: if (done)
2690: buff_len = 0;
2691: else
2692: buff_len = strlen (buff);
2693: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
2694: fprintf (stderr, "Parse error at line %d:\n%s\n",
2695: XML_GetCurrentLineNumber(xmlparser),
2696: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2.6 kahan 2697: HT_FREE(uri);
2.5 barstow 2698: fclose(fp);
2699: XML_ParserFree(xmlparser);
2700: HTRDF_delete(rdfparser);
2701: HT_FREE(stream);
2.11 barstow 2702: HTTRACE(XML_TRACE, "HTRDF_parseFile. Parse error.");
2703: return NO;
2.5 barstow 2704: }
2705: if (done)
2706: break;
2707: }
2708:
2709: /* The file has been parsed, generate the triples */
2710: generate_triples(stream);
2711:
2712: /* Cleanup */
2.6 kahan 2713: HT_FREE(uri);
2.5 barstow 2714: fclose (fp);
2.9 barstow 2715: XML_ParserFree(xmlparser);
2716: HTRDF_delete(rdfparser);
2717: HT_FREE(stream);
2718:
2.11 barstow 2719: return YES;
2.9 barstow 2720: }
2721:
2.10 kahan 2722: /* HTRDF_parseBuffer
2.9 barstow 2723: ** ---------------
2724: ** This function parses a buffer of RDF in a synchronous, non-blocking
2725: ** way.
2726: **
2.10 kahan 2727: ** Parameters:
2728: ** buffer the buffer to parse
2729: ** buffer_name the buffer's name. This is used by the parser
2730: ** when naming "anonymous" subjects
2731: ** buffer_len the buffer's length (number of bytes)
2732: ** new_triple_callback the callback that is invoked when a triple
2733: ** is created. If NULL, the default triple handler is
2734: ** invoked.
2.12 ! barstow 2735: ** context - a void pointer to pass to the new_triple_callback.
! 2736: ** Should set NULL if no data needs to be passed to the
! 2737: ** callback.
2.10 kahan 2738: ** Returns:
2.11 barstow 2739: ** Returns YES if the buffer is successfully parsed; otherwise NO is
2740: ** returned and an error message is logged.
2.9 barstow 2741: */
2742:
2.12 ! barstow 2743: PUBLIC BOOL HTRDF_parseBuffer (const char *buffer, const char *buffer_name,
! 2744: int buffer_len, HTTripleCallback_new * new_triple_callback, void *context)
2.9 barstow 2745: {
2746: XML_Parser xmlparser;
2747: HTRDF *rdfparser;
2748: HTStream * stream = NULL;
2749: char *uri;
2.11 barstow 2750: BOOL status;
2.9 barstow 2751:
2752: /* Sanity checks */
2.11 barstow 2753: if (!buffer) {
2754: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer is NULL");
2755: return NO;
2756: }
2757: if (buffer_len <= 0) {
2758: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer_len is <=0");
2759: return NO;
2760: }
2761: if (!buffer_name) {
2762: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. buffer_name is NULL");
2763: return NO;
2764: }
2765:
2766: status = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri,
2.12 ! barstow 2767: new_triple_callback, context, buffer_name);
2.11 barstow 2768: if (!status)
2769: return NO;
2.9 barstow 2770:
2771: if (! XML_Parse(xmlparser, buffer, buffer_len, 1)) {
2772: fprintf(stderr, "Parse error at line %d:\n%s\n",
2773: XML_GetCurrentLineNumber(xmlparser),
2774: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2775: HT_FREE(uri);
2776: XML_ParserFree(xmlparser);
2777: HTRDF_delete(rdfparser);
2778: HT_FREE(stream);
2.11 barstow 2779: HTTRACE(XML_TRACE, "HTRDF_parseBuffer. Parse error.");
2780: return NO;
2.9 barstow 2781: }
2782:
2783: /* The buffer has been parsed, generate the triples */
2784: generate_triples(stream);
2785:
2786: /* Cleanup */
2787: HT_FREE(uri);
2.5 barstow 2788: XML_ParserFree(xmlparser);
2789: HTRDF_delete(rdfparser);
2790: HT_FREE(stream);
2791:
2.11 barstow 2792: return YES;
2.5 barstow 2793: }
Webmaster