Annotation of libwww/Library/src/HTRDF.c, revision 2.9
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.9 ! barstow 4: ** @(#) $Id: HTRDF.c,v 2.8 2000/08/17 13:39:31 barstow Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
2.3 frystyk 40: char * m_sPredicate;
41: char * m_sSubject;
42: char * m_sObject;
2.1 frystyk 43: };
44:
45: struct _HTElement {
2.3 frystyk 46: char * m_sName;
2.1 frystyk 47: HTAssocList * m_attributes;
48: HTList * m_children;
2.3 frystyk 49: char * m_sID;
50: char * m_sBagID;
2.1 frystyk 51: HTList * m_vTargets;
52: BOOL m_bDone;
2.3 frystyk 53: char * m_sPrefix;
54: char * m_sContent;
2.1 frystyk 55: };
56:
57: struct _HTRDFParser {
58: HTList * m_namespaceStack;
59: HTList * m_elementStack;
2.8 barstow 60: HTList * m_literalStack;
2.1 frystyk 61: HTElement * m_root;
62: HTList * m_triples;
2.3 frystyk 63: char * m_sSource;
2.1 frystyk 64: HTList * m_vAllNameSpaces;
65:
66: BOOL m_bCreateBags;
67: BOOL m_bFetchSchemas;
68:
69: HTList * m_parseTypeStack;
70: HTList * m_parseElementStack;
2.3 frystyk 71: char * m_sLiteral;
2.1 frystyk 72:
73: HTList * m_vResources;
74: HTList * m_vResolveQueue;
75: HTHashtable * m_hIDtable;
76: int m_iReificationCounter;
77:
78: HTStream * ostream;
79:
80: HTTripleCallback_new * newTripleInstance;
81: void * tripleContext;
82: };
83:
84: /* @@@ Should not be global but controlled by name spaces @@@ */
85: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
86: PRIVATE void * RDFInstanceContext = NULL;
87:
2.3 frystyk 88: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
89: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 90: HTElement *description,
2.3 frystyk 91: char * sTarget,
2.1 frystyk 92: BOOL reificate);
2.3 frystyk 93: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 94: int iCounter);
95: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
96: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 97: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
98: char * sObject, char * sNodeID);
2.4 barstow 99: /* ------------------------------------------------------------------------- */
100:
101: /*
102: ** Append the markup for the given element and its attribute to the
103: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
104: */
105: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
106: {
107: int i=0;
108:
109: if (!rdfp || !name) return;
110:
2.7 barstow 111: StrAllocMCat(&rdfp->m_sLiteral, "<", name, NULL);
2.4 barstow 112:
113: while (atts[i]) {
2.7 barstow 114: StrAllocMCat(&rdfp->m_sLiteral, " ", atts[i], "=\"", atts[i+1], "\"", NULL);
2.4 barstow 115: i+=2;
116: }
117:
118: StrAllocCat(rdfp->m_sLiteral, ">");
119: }
120:
121: /*
122: ** Terminate this element's "Literal" buffer. This buffer is filled in when
123: ** parseType="Literal".
124: */
125: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
126: {
127: if (!rdfp || !name) return;
128:
2.7 barstow 129: StrAllocMCat(&rdfp->m_sLiteral, "</", name, ">", NULL);
2.4 barstow 130: }
2.1 frystyk 131:
132: /* ------------------------------------------------------------------------- */
133:
134: /*
135: ** Searches a whole list of Strings and returns true if the String is found.
136: */
2.3 frystyk 137: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 138: {
139: HTList *cur = list;
2.3 frystyk 140: char * cs = NULL;
141: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 142: if (!strcmp(cs, s)) return YES;
143: }
144: return NO;
145: }
146:
147: /*
148: ** Useful function that Trims a string
149: ** @@@ Should use HTStrip() @@@
150: */
151: PRIVATE char * trim (char *s)
152: {
153: char *p = NULL, *t = NULL;
154: int len = s ? strlen(s) : -1;
155: if (s && len > 0) {
156: StrAllocCopy(t, s);
157: p = &(s[len-1]);
158: while(p!=s) {
159: if (!isspace((int)(*p)))
160: break;
161: p--;
162: }
163: t[(int)(p-s)+1] = '\0';
164: if (isspace((int) t[(int)(p-s)]))
165: t[(int)(p-s)] = '\0';
166: }
167: return t;
168: }
169:
170: /* ------------------------------------------------------------------------- */
171: /* TRIPLE of RDF */
172: /* ------------------------------------------------------------------------- */
173:
2.3 frystyk 174: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 175: {
176: HTTriple * me = NULL;
177: if (p && s && o) {
178: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
179: HT_OUTOFMEM("HTTriple_new");
180: StrAllocCopy(me->m_sPredicate, p);
181: StrAllocCopy(me->m_sSubject, s);
182: StrAllocCopy(me->m_sObject, o);
183: }
184: return me;
185: }
186:
187: PUBLIC BOOL HTTriple_delete (HTTriple * me)
188: {
189: if (me) {
190: HT_FREE(me->m_sPredicate);
191: HT_FREE(me->m_sSubject);
192: HT_FREE(me->m_sObject);
193: HT_FREE(me);
194: return YES;
195: }
196: return NO;
197: }
198:
199: PUBLIC void HTTriple_print (HTTriple * me)
200: {
201: if (me)
202: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
203: me->m_sObject);
204: }
205:
2.3 frystyk 206: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 207: {
208: return me ? me->m_sSubject : NULL;
209: }
210:
2.3 frystyk 211: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 212: {
213: return me ? me->m_sPredicate : NULL;
214: }
215:
2.3 frystyk 216: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 217: {
218: return me ? me->m_sObject : NULL;
219: }
220:
221: /* ------------------------------------------------------------------------- */
222: /* ELEMENT of RDF */
223: /* ------------------------------------------------------------------------- */
224:
2.3 frystyk 225: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 226: {
227: HTElement * me = NULL;
228: if (sName) {
229: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
230: HT_OUTOFMEM("HTElement_new");
231: StrAllocCopy(me->m_sName, sName);
232: me->m_attributes = al ? al : HTAssocList_new();
233: me->m_children = HTList_new();
234: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
235: me->m_vTargets = HTList_new();
236: me->m_bDone = FALSE;
237: }
238: return me;
239: }
240:
241: /*
242: ** Creates a Data Element and saves the data in the Content field.
243: ** Data Element does not have attributes
244: */
2.3 frystyk 245: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 246: {
247: HTElement * me = NULL;
248: if (sContent) {
249: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
250: HT_OUTOFMEM("HTElement_new2");
251: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
252: me->m_attributes = NULL;
253: me->m_children = HTList_new();
254: /*me->m_nodes = HTAssocList_new();*/
255: me->m_vTargets = HTList_new();
256: me->m_bDone = FALSE;
257: StrAllocCopy(me->m_sContent, sContent);
258: }
259: return me;
260: }
261:
2.3 frystyk 262: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 263: {
264: if (me && sContent) {
265: int l = strlen(me->m_sName);
266: StrAllocCat(me->m_sContent, sContent);
267: me->m_sName[l-1]='\0';
268: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
269: return YES;
270: }
271: return NO;
272: }
273:
274: PUBLIC BOOL HTElement_delete (HTElement * me)
275: {
276: if (me) {
277: HT_FREE(me->m_sName);
278: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
279: if (me->m_children) HTList_delete(me->m_children);
280: HT_FREE(me->m_sID);
281: HT_FREE(me->m_sBagID);
282: if (me->m_vTargets) HTList_delete(me->m_vTargets);
283: HT_FREE(me->m_sPrefix);
284: HT_FREE(me->m_sContent);
285: HT_FREE(me);
286: return YES;
287: }
288: return NO;
289: }
290:
291: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
292: {
293: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
294: }
295:
2.3 frystyk 296: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 297: {
298: return (me && sName && sValue) ?
299: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
300: }
301:
2.3 frystyk 302: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 303: {
304: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
305: }
306:
2.3 frystyk 307: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 308: {
309: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
310: }
311:
2.3 frystyk 312: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 313: {
2.3 frystyk 314: char * fValue = NULL;
315: char * fName = NULL;
2.1 frystyk 316: if (me && sNamespace && sName) {
317: StrAllocMCopy(&fName, sNamespace, sName, NULL);
318: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
319: HT_FREE(fName);
320: }
321: return fValue;
322: }
323:
324: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
325: {
326: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
327: }
328:
329: PUBLIC HTElement * HTElement_target (HTElement * me)
330: {
331: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
332: }
333:
334: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
335: {
336: return (me && me->m_sContent) ? YES : NO;
337: }
338:
339: /* ------------------------------------------------------------------------- */
340: /* EXPAT HANDLERS */
341: /* ------------------------------------------------------------------------- */
342:
343: /*
344: * Called for each new element.
345: * Build up the document tree using an element stack
346: */
347: PRIVATE void XML_startElement (void * userData,
348: const XML_Char * name, const XML_Char ** atts)
349: {
350: HTRDF * rdfp = (HTRDF *) userData;
351: HTAssocList * namespaces = HTAssocList_new();
352: HTAssocList * newAL = HTAssocList_new();
353: int i = 0;
2.5 barstow 354:
2.1 frystyk 355: /**
356: * The following loop tries to identify special xmlns prefix
357: * attributes and update the namespace stack accordingly.
358: * While doing all this, it builds another AttributeList instance
359: * which will hold the expanded names of the attributes
360: * (I think this approach is only useful for RDF which uses
361: * attributes as an abbreviated syntax for element names)
362: */
363: if (atts) {
364: while (atts[i]) {
2.3 frystyk 365: char * aName = (char * ) atts[i];
2.1 frystyk 366: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 367: char * aValue = (char *) atts[i+1];
2.1 frystyk 368: int len = aValue ? strlen(aValue) : -1;
369: if (len == 0 && !rdfp->m_sSource)
370: aValue = rdfp->m_sSource;
371: HTAssocList_addObject(namespaces, aName, aValue);
372: /* save all non-RDF schema addresses */
373: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
374: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
375: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 376: char * nname = NULL;
2.1 frystyk 377: StrAllocCopy(nname, aValue);
378: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
379: }
380:
381: /* Special case: Don't save document's own address */
382: if (rdfp->m_sSource &&
383: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 384: char * nname = NULL;
2.1 frystyk 385: StrAllocCopy(nname, aValue);
386: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
387: }
388: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 389: char * aValue = (char *) atts[i+1];
390: char * nName = NULL;
2.1 frystyk 391: int len = aValue ? strlen(aValue) : -1;
392: if (len == 0 && !rdfp->m_sSource)
393: aValue = rdfp->m_sSource;
394: StrAllocCopy(nName, &(aName[6]));
395: HTAssocList_addObject(namespaces, nName, aValue);
396: HT_FREE(nName);
397:
398: /* Save all non-RDF schema addresses */
399: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
400: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
401: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 402: char * nname = NULL;
2.1 frystyk 403: StrAllocCopy(nname, aValue);
404: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
405: }
406:
407: /* Special case: Don't save document's own address */
408: if (rdfp->m_sSource &&
409: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 410: char * nname = NULL;
2.1 frystyk 411: StrAllocCopy(nname, aValue);
412: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
413: }
414: }
415: i+=2;
416: } /* end of while */
417: } /* end of if */
418:
419: /*
420: ** Place new namespace declarations into the stack
421: ** (Yes, I could optimize this a bit, not it wastes space
422: ** if there are no xmlns definitions)
423: */
424: HTList_addObject(rdfp->m_namespaceStack, namespaces);
425:
426: /*
427: ** Figure out the prefix part if it exists and
428: ** determine the namespace of the element accordingly
429: */
430: {
2.3 frystyk 431: char * sNamespace = NULL;
432: char * sElementName = NULL;
433: char * sPrefix2 = NULL;
2.1 frystyk 434: HTElement *newElement = NULL;
435: char *pindex = strchr(name, ':');
436: int ix = pindex ? (int) (pindex - name) : -1 ;
437: if (ix > 0) {
438: if (!(sPrefix2 = HT_MALLOC(ix+1)))
439: HT_OUTOFMEM("XML_startELement");
440: strncpy(sPrefix2, name, ix);
441: sPrefix2[ix]='\0';
442: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
443: StrAllocCopy(sElementName, &(name[ix+1]));
444: HT_FREE(sPrefix2);
445: } else {
446: sNamespace = HTRDF_namespace(rdfp, "xmlns");
447: StrAllocCopy(sElementName, name);
448: }
449:
450: /*
451: * Finally look for attributes other than the special xmlns,
452: * expand them, and place to the new Attribute List
453: */
454: i = 0;
455: if (atts) {
456: while (atts[i]) {
2.3 frystyk 457: char * aName = (char *) atts[i];
458: char * sAttributeNamespace = NULL;
2.1 frystyk 459: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 460: char * aValue = (char *) atts[i+1];
461: char * sPrefix = NULL;
2.1 frystyk 462: /* Expat does not have type for attributes */
463: pindex = strchr(aName, ':');
464: ix = pindex ? (int) (pindex - aName) : -1;
465: if (ix > 0) {
466: if (!(sPrefix = HT_MALLOC(ix+1)))
467: HT_OUTOFMEM("XML_startELement");
468: strncpy(sPrefix, aName, ix);
469: sPrefix[ix] = '\0';
470: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
471: aName = &(aName[ix+1]);
472: HT_FREE(sPrefix);
473: } else {
474: if (!sNamespace)
475: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
476: else
477: StrAllocCopy(sAttributeNamespace, sNamespace);
478: }
479:
480: if (HTRDF_parseLiteral(rdfp)) {
481: if (!sPrefix) {
482: if (!(sPrefix = HT_MALLOC(8)))
483: HT_OUTOFMEM("XML_startELement");
484: sprintf(sPrefix, "gen%d\n", i);
485: }
486: {
2.3 frystyk 487: char * fName = NULL;
2.1 frystyk 488: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
489: HTAssocList_addObject(newAL, fName, aValue);
490: HT_FREE(fName);
491: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
492: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
493: HT_FREE(fName);
494: }
495: } else {
2.3 frystyk 496: char * fName = NULL;
2.1 frystyk 497: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
498: HTAssocList_addObject(newAL, fName, aValue);
499: HT_FREE(fName);
500: }
501:
502: HT_FREE(sAttributeNamespace);
503:
504: /*
505: ** This call will try to see if the user is using
506: ** RDF look-alike elements from another namespace
507: **
508: ** Note: you can remove the call if you wish
509: */
510: #if 0
511: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
512: #endif
513:
514: } /* end of if */
515: i+=2;
516: } /* end of while */
517: } /* end of if atts */
518:
519: /*
520: * If we have parseType="Literal" set earlier, this element
521: * needs some additional attributes to make it stand-alone
522: * piece of XML
523: */
524: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 525: char * fName = NULL;
2.4 barstow 526:
2.1 frystyk 527: if (!sPrefix2) {
528: if (sNamespace)
529: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
530: StrAllocMCopy(&fName, "gen", sElementName, NULL);
531: newElement = HTElement_new(fName, newAL);
532: StrAllocCopy(newElement->m_sPrefix, "gen");
533: HT_FREE(fName);
534: } else {
2.3 frystyk 535: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 536: if (sAttributeNamespace) {
537: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
538: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
539: HT_FREE(fName);
540: }
541: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
542: newElement = HTElement_new(fName, newAL);
543: HT_FREE(fName);
544: }
545: } else {
2.3 frystyk 546: char * fName = NULL;
2.1 frystyk 547: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
548: newElement = HTElement_new(fName, newAL);
549: HT_FREE(fName);
550: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
551: }
552: HT_FREE(sElementName);
553: HT_FREE(sNamespace);
554: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 555:
2.1 frystyk 556: /*
557: ** Check parseType
558: */
559: {
2.3 frystyk 560: char * fName = NULL;
561: char * sLiteralValue = NULL;
2.1 frystyk 562: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
563: sLiteralValue = HTElement_getAttribute(newElement, fName);
564: HT_FREE(fName);
565: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
566: /**
567: * This is the management of the element where
568: * parseType="Literal" appears
569: *
570: * You should notice RDF V1.0 conforming implementations
571: * must treat other values than Literal and Resource as
572: * Literal. This is why the condition is !equals("Resource")
573: */
574:
575: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
576: if (!HTList_isEmpty(rdfp->m_elementStack)) {
577: HTElement *e = (HTElement *)
578: HTList_lastObject(rdfp->m_elementStack);
579: HTElement_addChild(e, newElement);
580: }
581: HTList_addObject(rdfp->m_elementStack, newElement);
582: HTList_addObject(rdfp->m_parseElementStack, newElement);
583: HT_FREE(rdfp->m_sLiteral);
584: StrAllocCopy(rdfp->m_sLiteral, "");
585: return;
586: }
587:
588: if (HTRDF_parseLiteral(rdfp)) {
589: /*
590: * This is the management of any element nested within
591: * a parseType="Literal" declaration
592: */
2.4 barstow 593: /* Add the element to the parser's literal buffer */
594: addMarkupStart (rdfp, name, atts);
595:
2.8 barstow 596: /* Add this literal element to the literal stack */
597: if (!HTList_isEmpty(rdfp->m_literalStack)) {
598: HTElement *e = (HTElement *)
599: HTList_lastObject(rdfp->m_literalStack);
600: HTElement_addChild(e, newElement);
601: }
602: HTList_addObject(rdfp->m_literalStack, newElement);
603:
2.1 frystyk 604: HTList_addObject(rdfp->m_elementStack, newElement);
605: return;
606: }
607:
608: /*
609: ** Update the containment hierarchy with the stack.
610: */
611: if (!HTList_isEmpty(rdfp->m_elementStack)) {
612: HTElement *e = (HTElement *)
613: HTList_lastObject(rdfp->m_elementStack);
614: HTElement_addChild(e, newElement);
615: }
616:
617: /*
618: ** Place the new element into the stack
619: */
620: HTList_addObject(rdfp->m_elementStack, newElement);
621: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
622: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
623: HTList_addObject(rdfp->m_parseElementStack, newElement);
624: HT_FREE(rdfp->m_sLiteral);
625: StrAllocCopy(rdfp->m_sLiteral, "");
626:
627: /*
628: * Since parseType="Resource" implies the following
629: * production must match Description, let's create
630: * an additional Description node here in the document tree.
631: */
632: {
2.3 frystyk 633: char * fName = NULL;
2.1 frystyk 634: HTElement *desc = NULL;
635: HTAssocList * al = HTAssocList_new ();
636: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
637: desc = HTElement_new(fName, al);
638: HT_FREE(fName);
639: if (!HTList_isEmpty(rdfp->m_elementStack)) {
640: HTElement *e = (HTElement *)
641: HTList_lastObject(rdfp->m_elementStack);
642: HTElement_addChild(e, desc);
643: }
644: HTList_addObject(rdfp->m_elementStack, desc);
645: }
646: } /* end of if */
647: } /* end of block */
648: } /* end of block */
649: }
650:
651: /*
652: * For each end of an element scope step back in the
653: * element and namespace stack
654: */
655: PRIVATE void XML_endElement (void * userData,
656: const XML_Char * name)
657: {
658: HTRDF * rdfp = (HTRDF *) userData;
659: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
660: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
661: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
662: if (namespaces) HTAssocList_delete(namespaces);
663:
664: if (bParseLiteral) {
665: HTElement *pe = (HTElement *)
666: HTList_lastObject(rdfp->m_parseElementStack);
667: if (pe != rdfp->m_root) {
2.4 barstow 668: /* Terminate the literal */
669: addMarkupEnd (rdfp, name);
2.1 frystyk 670: } else {
671: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
672: HTElement_addChild(pe, de);
2.4 barstow 673:
2.1 frystyk 674: HT_FREE(rdfp->m_sLiteral);
675: StrAllocCopy(rdfp->m_sLiteral, "");
676: HTList_removeLastObject(rdfp->m_parseElementStack);
677: HTList_removeLastObject(rdfp->m_parseTypeStack);
678: }
679: } else if (HTRDF_parseResource(rdfp)) {
680: /**
681: * If we are doing parseType="Resource"
682: * we need to explore whether the next element in
683: * the stack is the closing element in which case
684: * we remove it as well (remember, there's an
685: * extra Description element to be removed)
686: */
687: if (!HTList_isEmpty(rdfp->m_elementStack)) {
688: HTElement *pe = (HTElement *)
689: HTList_lastObject(rdfp->m_parseElementStack);
690: HTElement *e = (HTElement *)
691: HTList_lastObject(rdfp->m_elementStack);
692: if (pe == e) {
693: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
694: HTList_removeLastObject(rdfp->m_parseElementStack);
695: HTList_removeLastObject(rdfp->m_parseTypeStack);
696: }
697: }
698: }
699: }
700:
701: PRIVATE void XML_characterData (void * userData,
702: const XML_Char * s, int len)
703: {
704: /*
705: * Place all characters as Data instance to the containment
706: * hierarchy with the help of the stack.
707: */
708: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 709: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
710: char * tstr = NULL;
711: char * str = NULL;
712: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 713: HT_OUTOFMEM("XML_characterData");
714: strncpy(str, s, len);
715: str[len]='\0';
716: if (HTRDF_parseLiteral(rdfp)) {
717: StrAllocCat(rdfp->m_sLiteral, str);
718: HT_FREE(str);
719: return;
720: }
721: /* JUST FOR EXPAT */
722: {
723: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
724: if (lch && HTElement_instanceOfData(lch)) {
725: HTElement_addData(lch, str);
726: HT_FREE(str);
727: return;
728: }
729: }
730: /*
731: * Warning: this is not correct procedure according to XML spec.
732: * All whitespace matters!
733: */
734: tstr = trim(str);
735: if (strlen(tstr) > 0) {
736: HTElement * de = HTElement_new2(tstr);
737: HTElement_addChild(e, de);
738: }
739: HT_FREE(str); HT_FREE(tstr);
740: }
741:
742: PRIVATE void XML_processingInstruction (void * userData,
743: const XML_Char * target,
744: const XML_Char * data)
745: {
746: return;
747: }
748:
749: /*
750: ** This is called for any characters in the XML document for
751: ** which there is no applicable handler. This includes both
752: ** characters that are part of markup which is of a kind that is
753: ** not reported (comments, markup declarations), or characters
754: ** that are part of a construct which could be reported but
755: ** for which no handler has been supplied. The characters are passed
756: ** exactly as they were in the XML document except that
757: ** they will be encoded in UTF-8. Line boundaries are not normalized.
758: ** Note that a byte order mark character is not passed to the default handler.
759: ** If a default handler is set, internal entity references
760: ** are not expanded. There are no guarantees about
761: ** how characters are divided between calls to the default handler:
762: ** for example, a comment might be split between multiple calls.
763: */
764: PRIVATE void XML_default (void * userData,
765: const XML_Char * s, int len)
766: {
767: return;
768: }
769:
770: /*
771: ** This is called for a declaration of an unparsed (NDATA)
772: ** entity. The base argument is whatever was set by XML_SetBase.
773: ** The entityName, systemId and notationName arguments will never be null.
774: ** The other arguments may be.
775: */
776: PRIVATE void XML_unparsedEntityDecl (void * userData,
777: const XML_Char * entityName,
778: const XML_Char * base,
779: const XML_Char * systemId,
780: const XML_Char * publicId,
781: const XML_Char * notationName)
782: {
783: return;
784: }
785:
786: /*
787: ** This is called for a declaration of notation.
788: ** The base argument is whatever was set by XML_SetBase.
789: ** The notationName will never be null. The other arguments can be.
790: */
791: PRIVATE void XML_notationDecl (void * userData,
792: const XML_Char * notationName,
793: const XML_Char * base,
794: const XML_Char * systemId,
795: const XML_Char * publicId)
796: {
797: return;
798: }
799:
800: /*
801: ** This is called for a reference to an external parsed general entity.
802: ** The referenced entity is not automatically parsed.
803: ** The application can parse it immediately or later using
804: ** XML_ExternalEntityParserCreate.
805: ** The parser argument is the parser parsing the entity containing the reference;
806: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
807: ** The systemId argument is the system identifier as specified in the entity
808: ** declaration; it will not be null.
809: ** The base argument is the system identifier that should be used as the base for
810: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
811: ** it may be null.
812: ** The publicId argument is the public identifier as specified in the entity declaration,
813: ** or null if none was specified; the whitespace in the public identifier
814: ** will have been normalized as required by the XML spec.
815: ** The openEntityNames argument is a space-separated list of the names of the entities
816: ** that are open for the parse of this entity (including the name of the referenced
817: ** entity); this can be passed as the openEntityNames argument to
818: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
819: ** returns, so if the referenced entity is to be parsed later, it must be copied.
820: ** The handler should return 0 if processing should not continue because of
821: ** a fatal error in the handling of the external entity.
822: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
823: ** error.
824: ** Note that unlike other handlers the first argument is the parser, not userData.
825: */
826: PRIVATE int XML_externalEntityRef (XML_Parser parser,
827: const XML_Char * openEntityNames,
828: const XML_Char * base,
829: const XML_Char * systemId,
830: const XML_Char * publicId)
831: {
832: return 0;
833: }
834:
835: /*
836: ** This is called for an encoding that is unknown to the parser.
837: ** The encodingHandlerData argument is that which was passed as the
838: ** second argument to XML_SetUnknownEncodingHandler.
839: ** The name argument gives the name of the encoding as specified in
840: ** the encoding declaration.
841: ** If the callback can provide information about the encoding,
842: ** it must fill in the XML_Encoding structure, and return 1.
843: ** Otherwise it must return 0.
844: ** If info does not describe a suitable encoding,
845: ** then the parser will return an XML_UNKNOWN_ENCODING error.
846: */
847: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
848: const XML_Char * name,
849: XML_Encoding * info)
850: {
851: return 0;
852: }
853:
854: /* ------------------------------------------------------------------------- */
855: /* HTXML STREAM HANDLERS */
856: /* ------------------------------------------------------------------------- */
857:
858: PRIVATE void rdf_setHandlers (XML_Parser me)
859: {
860: XML_SetElementHandler(me, XML_startElement, XML_endElement);
861: XML_SetCharacterDataHandler(me, XML_characterData);
862: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
863: XML_SetDefaultHandler(me, XML_default);
864: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
865: XML_SetNotationDeclHandler(me, XML_notationDecl);
866: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
867: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
868: }
869:
870: PRIVATE void rdf_newInstance (HTStream * me,
871: HTRequest * request,
872: HTFormat target_format,
873: HTStream * target_stream,
874: XML_Parser xmlparser,
875: void * context)
876: {
877: if (me && xmlparser) {
878: rdf_setHandlers(xmlparser);
879: XML_SetUserData(xmlparser, context);
880:
881: /* Call the new RDF instance callback (if any) with this new stream */
882: if (RDFInstance)
883: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
884: }
885: }
886:
887: /* ------------------------------------------------------------------------- */
888: /* RDF PARSER */
889: /* ------------------------------------------------------------------------- */
890:
891: PRIVATE void visit_element_children (HTList *children)
892: {
893: HTElement *child = NULL;
894: HTList *cur = children;
895: while ((child = (HTElement *) HTList_nextObject(cur))) {
896: if (!HTList_isEmpty(child->m_children))
897: visit_element_children(child->m_children);
898: HTElement_delete(child);
899: }
900: }
901:
902: PRIVATE void delete_elements (HTRDF * me)
903: {
904: if (me && me->m_root) {
905: HTElement *r = me->m_root;
906: if (!HTList_isEmpty(r->m_children))
907: visit_element_children(r->m_children);
908: HTElement_delete(r);
909: }
910: }
911:
2.8 barstow 912: PRIVATE void delete_literal_elements (HTRDF * me)
913: {
914: if (me && me->m_literalStack) {
915: HTList *cur = me->m_literalStack;
916: HTElement *e = NULL;
917: while ((e = (HTElement *) HTList_nextObject(cur))) {
918: HTElement_delete(e);
919: }
920: HTList_delete(me->m_literalStack);
921: }
922: }
923:
924:
2.1 frystyk 925: PUBLIC HTRDF * HTRDF_new (void)
926: {
927: HTRDF * me;
928: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
929: HT_OUTOFMEM("HTRDF_new");
930: me->m_namespaceStack = HTList_new();
931: me->m_elementStack = HTList_new();
2.8 barstow 932: me->m_literalStack = HTList_new();
2.1 frystyk 933:
934: me->m_triples = HTList_new();
935: me->m_vAllNameSpaces = HTList_new();
936:
937: me->m_bCreateBags = FALSE;
938: me->m_bFetchSchemas = FALSE;
939:
940: me->m_parseTypeStack = HTList_new();
941: me->m_parseElementStack = HTList_new();
942:
943: me->m_vResources = HTList_new();
944: me->m_vResolveQueue = HTList_new();
945: me->m_hIDtable = HTHashtable_new(0);
946:
947: return me;
948: }
949:
950: PUBLIC BOOL HTRDF_delete (HTRDF * me)
951: {
952: if (me) {
953: delete_elements(me);
954: if (me->m_namespaceStack) {
955: HTList *cur = me->m_namespaceStack;
956: HTAssocList *alist = NULL;
957: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
958: HTAssocList_delete(alist);
959: }
960: HTList_delete(me->m_namespaceStack);
961: }
962: if (me->m_elementStack) HTList_delete(me->m_elementStack);
2.8 barstow 963:
964: delete_literal_elements(me);
965:
2.1 frystyk 966: me->m_root = NULL;
967: if (me->m_triples) {
968: HTList *cur = me->m_triples;
969: HTTriple *t = NULL;
970: while ((t = (HTTriple *) HTList_nextObject(cur))) {
971: /*HTTriple_print(t);*/
972: HTTriple_delete(t);
973: }
974: HTList_delete(me->m_triples);
975: }
976: HT_FREE(me->m_sSource);
977: if (me->m_vAllNameSpaces) {
978: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 979: char * s = NULL;
980: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 981: HT_FREE(s);
982: }
983: HTList_delete(me->m_vAllNameSpaces);
984: }
985: if (me->m_parseTypeStack)
986: HTList_delete(me->m_parseTypeStack);
987: if (me->m_parseElementStack)
988: HTList_delete(me->m_parseElementStack);
989: if (me->m_vResources)
990: HTList_delete(me->m_vResources);
991: if (me->m_vResolveQueue)
992: HTList_delete(me->m_vResolveQueue);
993: if (me->m_hIDtable)
994: HTHashtable_delete(me->m_hIDtable);
995: HT_FREE(me->m_sLiteral);
996: HT_FREE(me);
997: return YES;
998: }
999: return NO;
1000: }
1001:
1002: /*
1003: * setSource method saves the name of the source document for
1004: * later inspection if needed
1005: */
2.3 frystyk 1006: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 1007: {
1008: if (me && source) {
1009: StrAllocCopy (me->m_sSource, source);
1010: return YES;
1011: }
1012: return NO;
1013: }
1014:
1015: /*
1016: * Go through the m_vResolveQueue and assign
1017: * direct object reference for each symbolic reference
1018: */
1019: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
1020: {
1021: if (me) {
1022: HTList * cur = me->m_vResolveQueue;
1023: HTElement *e = NULL;
1024: HTElement *e2 = NULL;
1025: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1026: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1027: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1028: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1029: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1030: "aboutEachPrefix");
1031: if (sAbout) {
1032: if (sAbout[0]=='#')
1033: sAbout = &(sAbout[1]);
1034: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1035: if (e2)
1036: HTElement_addTarget(e, e2);
1037: else
1038: HTPrint("Unresolved internal reference %s\n", sAbout);
1039: }
1040: if (sResource) {
1041: if (sResource[0]=='#')
1042: sResource = &(sResource[1]);
1043: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1044: if (e2)
1045: HTElement_addTarget(e, e2);
1046: }
1047:
1048: if (sAboutEach) {
1049: sAboutEach = &(sAboutEach[1]);
1050: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1051: if (e2)
1052: HTElement_addTarget(e, e2);
1053: }
1054: if (sAboutEachPrefix) {
1055: HTList * curr = me->m_vResources;
1056: HTElement *ele = NULL;
1057: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1058: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1059: if (sA &&
1060: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1061: HTElement_addTarget(e, ele);
1062: }
1063: }
1064: }
1065: }
1066: HTList_delete(me->m_vResources);
1067: me->m_vResources = HTList_new();
1068: return YES;
1069: }
1070: return NO;
1071: }
1072:
1073: /**
1074: * Check if the element e is from the namespace
1075: * of the RDF schema by comparing only the beginning of
1076: * the expanded element name with the canonical RDFMS
1077: * URI
1078: */
1079: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1080: {
1081: return (me && e && e->m_sName) ?
1082: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1083: }
1084:
1085: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1086: {
1087: if (me && e && e->m_sName) {
1088: int len = strlen(e->m_sName);
1089: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1090: }
1091: return NO;
1092: }
1093:
1094: /**
1095: * Is the element a Description
1096: */
1097: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1098: {
1099: if (me && e && e->m_sName) {
1100: int len = strlen(e->m_sName);
1101: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1102: }
1103: return NO;
1104: }
1105:
1106: /*
1107: * Is the element a ListItem
1108: */
1109: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1110: {
1111: if (me && e && e->m_sName) {
1112: int len = strlen(e->m_sName);
1113: if (len > 2)
1114: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1115: }
1116: return NO;
1117: }
1118:
1119: /**
1120: * Is the element a Sequence
1121: */
1122: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1123: {
1124: if (me && e && e->m_sName) {
1125: int len = strlen(e->m_sName);
1126: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1127: }
1128: return NO;
1129: }
1130:
1131: /*
1132: * Is the element an Alternative
1133: */
1134: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1135: {
1136: if (me && e && e->m_sName) {
1137: int len = strlen(e->m_sName);
1138: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1139: }
1140: return NO;
1141: }
1142:
1143: /*
1144: * Is the element a Bag
1145: */
1146: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1147: {
1148: if (me && e && e->m_sName) {
1149: int len = strlen(e->m_sName);
1150: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1151: }
1152: return NO;
1153: }
1154:
1155: /**
1156: * Is the element a Container
1157: */
1158: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1159: {
1160: return (HTRDF_isSequence(me, e) ||
1161: HTRDF_isAlternative(me, e) ||
1162: HTRDF_isBag(me, e));
1163: }
1164:
1165: /*
1166: * This method matches all properties but those from RDF namespace
1167: */
1168: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1169: {
1170: if (me && e && e->m_sName) {
1171: int len = strlen(e->m_sName);
2.3 frystyk 1172: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1173: "value", "type", "Property", "Statement"};
1174: int i;
1175: if (HTRDF_isRDF(me, e)) {
1176: for(i = 0; i< 7; i++) {
1177: int ntp = strlen(tp[i]);
1178: if (len > ntp) {
1179: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1180: return YES;
1181: }
1182: }
1183: return NO;
1184: }
1185: if (len > 0) return YES;
1186: }
1187: return NO;
1188: }
1189:
2.3 frystyk 1190: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1191: int iCounter)
1192: {
1193: /*
1194: * Two different cases for
1195: * 1. LI element without content (resource available)
1196: * 2. LI element with content (resource unavailable)
1197: */
2.3 frystyk 1198: char * cName = NULL;
1199: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1200: char sdig[20];
1201: sprintf(sdig, "_%d", iCounter);
1202: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1203: if (sResource) {
1204: HTRDF_addTriple(me, cName, sID, sResource);
1205: /* validity checking */
1206: if (!HTList_isEmpty(listitem->m_children)){
1207: HTPrint("Listitem with resource attribute can not have child nodes");
1208: }
1209: StrAllocCopy(listitem->m_sID, sResource);
1210: } else {
1211: HTList *cur = listitem->m_children;
1212: HTElement *n = NULL;
1213: while ((n = (HTElement *) HTList_nextObject(cur))) {
1214: if (HTElement_instanceOfData(n)) {
1215: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1216: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1217: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1218: HTRDF_addTriple(me, cName, sID, sNodeID);
1219: StrAllocCopy(listitem->m_sID, sNodeID);
1220: } else if (HTRDF_isListItem(me, n)) {
1221: HTPrint("Can not nest list item inside list item\n");
1222: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1223: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1224: HTRDF_addTriple(me, cName, sID, n->m_sID);
1225: HT_FREE(c);
1226: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1227: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1228: HTRDF_addTriple(me, cName, sID, sNodeID);
1229: HT_FREE(sNodeID);
1230: }
1231: }
1232: }
1233: HT_FREE(cName);
1234: }
1235:
2.3 frystyk 1236: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1237: {
2.3 frystyk 1238: char * sID = NULL;
1239: char * tName = NULL;
1240: char * aName = NULL;
1241: char * sName = NULL;
1242: char * bName = NULL;
2.1 frystyk 1243: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1244: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1245: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1246: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1247:
1248: StrAllocCopy(sID, n->m_sID);
1249: if (!sID)
1250: sID = HTRDF_newReificationID(me);
1251: /*
1252: * Do the instantiation only once
1253: */
1254: if (!n->m_bDone) {
1255: if (HTRDF_isSequence(me, n)) {
1256: HTRDF_addTriple(me, tName, sID, sName);
1257: } else if (HTRDF_isAlternative(me, n)) {
1258: HTRDF_addTriple(me, tName, sID, aName);
1259: } else if (HTRDF_isBag(me, n)) {
1260: HTRDF_addTriple(me, tName, sID, bName);
1261: }
1262: n->m_bDone = YES;
1263: }
1264: HTRDF_expandAttributes(me, n, n);
1265:
1266: {
1267: HTList *cur = n->m_children;
1268: HTElement *n2 = NULL;
1269: int iCounter = 1;
1270: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1271: HTPrint("An RDF:Alt container must have at least one list item\n");
1272: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1273: if (HTRDF_isListItem(me, n2)) {
1274: HTRDF_processListItem(me, sID, n2, iCounter);
1275: iCounter++;
1276: } else {
1277: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1278: }
1279: }
1280: } /* end of block */
1281:
1282: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1283:
1284: return sID;
1285: }
1286: /*
1287: * Manage the typedNode production in the RDF grammar.
1288: *
1289: */
2.3 frystyk 1290: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1291: {
2.3 frystyk 1292: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1293: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1294: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1295: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1296: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1297: "aboutEachPrefix");*/
2.3 frystyk 1298: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1299: char * iName = NULL;
1300: char * bName = NULL;
1301: char * tName = NULL;
2.1 frystyk 1302:
2.3 frystyk 1303: char * sObject = NULL;
2.1 frystyk 1304:
1305: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1306: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1307: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1308:
1309: if (resource)
1310: HTPrint("resource attribute not allowed for a typedNode %s\n",
1311: typedNode->m_sName);
1312:
1313: /*
1314: * We are going to manage this typedNode using the processDescription
1315: * routine later on. Before that, place all properties encoded as
1316: * attributes to separate child nodes.
1317: */
1318: {
1319: HTAssoc * assoc;
1320: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1321: char * sAttribute = NULL;
1322: char * tValue = NULL;
1323: char * sValue = NULL;
2.1 frystyk 1324: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1325: sAttribute = HTAssoc_name(assoc);
1326: sValue = HTAssoc_value(assoc);
1327: tValue = trim(sValue);
1328: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1329: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1330: if (strlen(tValue) > 0) {
1331: HTAssocList *newAL = HTAssocList_new();
1332: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1333: HTElement *d = NULL;
1334: HTElement_addAttribute(newPredicate, iName,
1335: sAbout ? sAbout : sID);
1336: HTElement_addAttribute(newPredicate, bName, sBagID);
1337: d = HTElement_new2(tValue);
1338: HTElement_addChild(newPredicate, d);
1339: HTElement_addChild(typedNode, newPredicate);
1340: HTElement_removeAttribute(typedNode, sAttribute);
1341: }
1342: }
1343: HT_FREE(tValue);
1344: } /* end of while */
1345: }/* end of block */
1346: {
1347: if (sAbout)
1348: StrAllocCopy(sObject, sAbout);
1349: else if (sID)
1350: StrAllocCopy(sObject, sID);
1351: else
1352: sObject = HTRDF_newReificationID(me);
1353: StrAllocCopy(typedNode->m_sID, sObject);
1354:
1355: /* special case: should the typedNode have aboutEach attribute,
1356: ** the type predicate should distribute to pointed
1357: ** collection also -> create a child node to the typedNode
1358: */
1359: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1360: HTAssocList *newAL = HTAssocList_new();
1361: HTElement *newPredicate = HTElement_new(tName, newAL);
1362: HTElement *d = HTElement_new2(typedNode->m_sName);
1363: HTElement_addChild(newPredicate, d);
1364: HTElement_addChild(typedNode, newPredicate);
1365: } else {
1366: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1367: }
1368: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1369: }/* end of block */
1370:
1371: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1372:
1373: return sObject;
1374: }
1375:
1376: /*
1377: * Start processing an RDF/XML document instance from the
1378: * root element rdf.
1379: *
1380: */
1381: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1382: {
1383: if (me && e) {
1384: HTList *cur = e->m_children;
1385: HTElement *ele = NULL;
1386: if (HTList_isEmpty(e->m_children)) {
1387: HTPrint("Empty RDF Element\n");
1388: return NO;
1389: }
1390: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1391: if (HTRDF_isDescription(me, ele)) {
1392: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1393: me->m_bCreateBags);
1394: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1395: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1396: HT_FREE(c);
1397: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1398: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1399: HT_FREE(t);
1400: }
1401: }
1402: return YES;
1403: }
1404: return NO;
1405: }
1406:
1407: /*
1408: * processPredicate handles all elements not defined as special
1409: * RDF elements.
1410: *
1411: * predicate The predicate element itself
1412: * description Context for the predicate
1413: * sTarget The target resource
1414: * reificate Should this predicate be reificated
1415: *
1416: * return the new ID which can be used to identify the predicate
1417: *
1418: */
2.3 frystyk 1419: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1420: HTElement * predicate,
1421: HTElement * description,
2.3 frystyk 1422: char * sTarget,
2.1 frystyk 1423: BOOL reificate)
1424: {
2.3 frystyk 1425: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1426: char * nsStatementID = NULL;
1427: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1428: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1429:
1430: /*
1431: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1432: ** or xmlns... -> generate new triples according to the spec.
1433: ** (See end of Section 6)
1434: */
1435: {
1436: HTElement * place_holder = NULL;
1437: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1438: char * fName = NULL;
1439: char * aName = NULL;
2.1 frystyk 1440:
1441: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1442: place_holder = HTElement_new(fName, newAL);
1443: HT_FREE(fName);
1444:
1445: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1446:
1447: /* error checking */
1448: if (!HTList_isEmpty(predicate->m_children)) {
1449: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1450: HTElement_delete(place_holder);
1451: return NULL;
1452: }
1453: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1454:
1455: /* determine the 'about' part for the new statements */
1456: if (sStatementID) {
1457: HTElement *data = HTElement_new2(sStatementID);
1458: HTElement_addAttribute(place_holder, aName, sStatementID);
1459:
1460: /* hack: make rdf:ID the value of the predicate */
1461: HTElement_addChild(predicate, data);
1462: } else if (sResource) {
1463: HTElement_addAttribute(place_holder, aName, sResource);
1464: } else {
1465: nsStatementID = HTRDF_newReificationID(me);
1466: HTElement_addAttribute(place_holder, aName, nsStatementID);
1467: HT_FREE(nsStatementID);
1468: }
1469: HT_FREE(aName);
1470:
1471: if (sBagID) {
1472: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1473: HTElement_addAttribute(place_holder, fName, sBagID);
1474: HT_FREE(fName);
1475: StrAllocCopy(place_holder->m_sBagID, sBagID);
1476: }
1477: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1478: } else {
1479:
1480: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1481: HTElement_delete(place_holder);
1482: }
1483: }
1484:
1485: /*
1486: ** Tricky part: if the resource attribute is present for a predicate
1487: ** AND there are no children, the value of the predicate is either
1488: ** 1. the URI in the resource attribute OR
1489: ** 2. the node ID of the resolved #resource attribute
1490: */
1491: if (sResource && HTList_isEmpty(predicate->m_children)) {
1492: if (!HTElement_target(predicate)) {
1493: if (reificate) {
1494: HT_FREE(nsStatementID);
1495: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1496: sTarget, sResource,
1497: predicate->m_sID);
1498: StrAllocCopy(predicate->m_sID, nsStatementID);
1499: } else {
1500: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1501: }
1502: } else {
1503: HTElement *target = HTElement_target(predicate);
1504: if (reificate) {
1505: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1506: sTarget,
1507: target->m_sID,
1508: predicate->m_sID);
1509: StrAllocCopy(predicate->m_sID, nsStatementID);
1510: } else {
1511: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1512: }
1513: }
2.7 barstow 1514: if (nsStatementID && predicate->m_sID)
1515: StrAllocCopy(nsStatementID, predicate->m_sID);
2.1 frystyk 1516: return nsStatementID;
1517: }
1518:
1519: /*
1520: ** Does this predicate make a reference somewhere using the
1521: ** sResource attribute
1522: */
1523: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1524: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1525: HTElement_target(predicate),
1526: YES, NO, NO);
1527: if (reificate) {
1528: HT_FREE(nsStatementID);
1529: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1530: sTarget, dStatementID,
1531: predicate->m_sID);
1532: StrAllocCopy(predicate->m_sID, nsStatementID);
1533: } else {
1534: StrAllocCopy(nsStatementID, dStatementID);
1535: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1536: }
1537: return nsStatementID;
1538: }
1539:
1540: /*
1541: ** Before looping through the children, let's check
1542: ** if there are any. If not, the value of the predicate is
1543: ** an anonymous node
1544: */
1545: {
1546: HTList *cur = predicate->m_children;
1547: BOOL bUsedTypedNodeProduction = NO;
1548: HTElement *n2;
2.7 barstow 1549: if (nsStatementID && sStatementID)
1550: StrAllocCopy(nsStatementID, sStatementID);
2.1 frystyk 1551: if (HTList_isEmpty(cur)) {
1552: if (reificate) {
2.3 frystyk 1553: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1554: HT_FREE(nsStatementID);
1555: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1556: sTarget, nr,
1557: predicate->m_sID);
1558: HT_FREE(nr);
1559: } else {
2.3 frystyk 1560: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1561: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1562: HT_FREE(nr);
1563: }
1564: }
1565: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1566: if (HTRDF_isDescription(me, n2)) {
1567: HTElement *d2 = n2;
2.3 frystyk 1568: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.7 barstow 1569: if (d2->m_sID && dStatementID && d2->m_sID != dStatementID)
1570: StrAllocCopy(d2->m_sID, dStatementID);
2.1 frystyk 1571:
1572: if (reificate) {
1573: HT_FREE(nsStatementID);
1574: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1575: sTarget, dStatementID,
1576: predicate->m_sID);
1577: } else {
1578: StrAllocCopy(nsStatementID, dStatementID);
1579: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1580: nsStatementID);
1581: }
1582: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1583: char * tValue = NULL;
1584: char * sValue = n2->m_sContent;
2.1 frystyk 1585: /* we've got real data */
1586: /*
1587: * Only if the content is not empty PCDATA (whitespace that is)
1588: * print the triple
1589: */
1590: tValue = trim(sValue);
1591: if (tValue && strlen(tValue) > 0) {
1592: if (reificate) {
1593: HT_FREE(nsStatementID);
1594: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1595: sTarget, tValue,
1596: predicate->m_sID);
1597: StrAllocCopy(predicate->m_sID, nsStatementID);
1598: } else {
1599: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1600: }
1601: }
1602: HT_FREE(tValue);
1603: } else if (HTRDF_isContainer(me, n2)) {
1604: HTElement *target = HTElement_target(description);
2.3 frystyk 1605: char * aboutTarget =
2.1 frystyk 1606: target ?
1607: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1608: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1609: StrAllocCopy(nsStatementID, sCollectionID);
1610: /* Attach the collection to the current predicate */
1611: if (target) {
1612: if (reificate) {
1613: HT_FREE(nsStatementID);
1614: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1615: aboutTarget,
1616: sCollectionID,
1617: predicate->m_sID);
1618: StrAllocCopy(predicate->m_sID, nsStatementID);
1619: } else {
1620: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1621: sCollectionID);
1622: }
1623: } else {
1624: if (reificate) {
1625: HT_FREE(nsStatementID);
1626: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1627: sTarget, sCollectionID,
1628: predicate->m_sID);
1629: StrAllocCopy(predicate->m_sID, nsStatementID);
1630: } else {
1631: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1632: sCollectionID);
1633: }
1634: }
1635: HT_FREE(sCollectionID);
1636: } else if (HTRDF_isTypedPredicate(me, n2)) {
1637: if (bUsedTypedNodeProduction) {
1638: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1639: } else {
1640: bUsedTypedNodeProduction = YES;
1641: }
1642: HT_FREE(nsStatementID);
1643: nsStatementID = HTRDF_processTypedNode(me, n2);
1644: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1645: }
1646: }
1647: return nsStatementID;
1648: } /* end of block */
1649: return NULL;
1650: }
1651:
1652: /*
1653: * processDescription manages Description elements
1654: *
1655: * description The Description element itself
1656: * inPredicate Is this is a nested description
1657: * reificate Do we need to reificate
1658: * createBag Do we create a bag container
1659: *
1660: * return An ID for the description
1661: *
1662: */
2.3 frystyk 1663: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1664: HTElement * description,
1665: BOOL inPredicate,
1666: BOOL reificate,
1667: BOOL createBag)
1668: {
1669: int iChildCount = 1;
1670: BOOL bOnce = YES;
1671:
2.3 frystyk 1672: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1673: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1674: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1675: "aboutEachPrefix");
2.3 frystyk 1676: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1677: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1678: HTElement *target = HTElement_target(description);
1679: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1680: BOOL targetIsContainer = NO;
2.3 frystyk 1681: char * sTargetAbout = NULL;
1682: char * sTargetBagID = NULL;
1683: char * sTargetID = NULL;
1684: char * dName = NULL;
1685: char * aName = NULL;
2.1 frystyk 1686:
1687: /*
1688: ** Return immediately if the description has already been managed
1689: */
1690: if (description->m_bDone) return description->m_sID;
1691:
1692: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1693: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1694:
1695: /*
1696: ** Determine what the target of the Description reference is
1697: */
1698: if (hasTarget) {
2.3 frystyk 1699: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1700: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1701: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1702: if (me->m_sSource && sTargetID2) {
1703: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1704: } else {
1705: StrAllocCopy(sTargetID, sTargetID2);
1706: }
1707: /*
1708: * Target is collection if
1709: * 1. it is identified with bagID attribute
1710: * 2. it is identified with ID attribute and is a collection
1711: */
1712: if (sTargetBagID && sAbout) {
1713: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1714: } else {
1715: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1716: HTRDF_isContainer(me, target))
1717: targetIsContainer = YES;
1718: }
1719: HT_FREE(sTargetID);
1720: }
1721:
1722: /*
1723: * Check if there are properties encoded using the abbreviated
1724: * syntax
1725: */
1726: HTRDF_expandAttributes(me, description, description);
1727:
1728: /*
1729: * Manage the aboutEach attribute here
1730: */
1731: if (sAboutEach && hasTarget) {
1732: if (HTRDF_isContainer(me, target)) {
1733: HTList *cur = target->m_children;
1734: HTElement *ele = NULL;
1735: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1736: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1737: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1738: if (sResource) {
1739: HTElement * newDescription = NULL;
1740: HTElement * ele2;
1741: HTList * cur2 = description->m_children;
1742:
1743: /*
1744: * Manage <li resource="..." /> case
1745: */
1746: if (sResource) {
1747: HTAssocList *newAL = HTAssocList_new();
1748: newDescription = HTElement_new(dName, newAL);
1749: HTElement_addAttribute(newDescription, aName, sResource);
1750: }
1751:
1752: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1753: if (newDescription) HTElement_addChild(newDescription, ele2);
1754: }
1755:
1756: if (newDescription)
1757: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1758:
1759: /* Not needed anymore */
1760: HTElement_delete(newDescription);
1761:
1762: } else {
1763: /**
1764: * Otherwise we have a structured value inside <li>
1765: *
1766: * loop through the children of <li>
1767: * (can be only one)
1768: */
1769: HTList *cur2 = ele->m_children;
1770: HTElement *ele2 = NULL;
1771: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1772: HTAssocList *newAL = HTAssocList_new();
1773: HTElement *newNode = HTElement_new(dName, newAL);
1774: HTList *cur3 = description->m_children;
1775: HTElement *ele3 = NULL;
1776: /* loop through the items in the
1777: * description with aboutEach
1778: * and add them to the target
1779: */
1780: while ((ele3 = (HTElement *)
1781: HTList_nextObject(cur3))) {
1782: HTElement_addChild(newNode, ele3);
1783: }
1784: HTElement_addTarget(newNode, ele2);
1785: HTRDF_processDescription(me, newNode, YES, NO, NO);
1786: }
1787: }
1788: } else if (HTRDF_isTypedPredicate(me, ele)) {
1789: HTAssocList *newAL = HTAssocList_new();
1790: HTElement *newNode = HTElement_new(dName, newAL);
1791: HTList *cur2 = description->m_children;
1792: HTElement *ele2 = NULL;
1793: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1794: HTElement_addChild(newNode, ele2);
1795: }
1796: HTElement_addTarget(newNode, ele);
1797: HTRDF_processDescription(me, newNode, YES, NO, NO);
1798: }
1799: } /* end of while */
1800: } else if (HTRDF_isDescription(me, target)) {
1801: HTList *cur = target->m_children;
1802: HTElement *ele = NULL;
1803: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1804: HTAssocList *newAL = HTAssocList_new();
1805: HTElement *newNode = HTElement_new(dName, newAL);
1806: HTList *cur2 = description->m_children;
1807: HTElement *ele2 = NULL;
1808: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1809: HTElement_addChild(newNode, ele2);
1810: }
1811: HTElement_addTarget(newNode, ele);
1812: HTRDF_processDescription(me, newNode, YES, NO, NO);
1813: } /* end of while */
1814: }
1815:
1816: HT_FREE(dName);
1817: HT_FREE(aName);
1818: return NULL;
1819: }
1820:
1821: /*
1822: * Manage the aboutEachPrefix attribute here
1823: */
1824: if (sAboutEachPrefix) {
1825: if (hasTarget) {
1826: HTList *cur = description->m_vTargets;
1827: HTElement *target = NULL;
1828: while ((target = (HTElement *) HTList_nextObject(cur))) {
1829: HTList *cur2 = description->m_children;
1830: HTElement *ele2 = NULL;
1831: HTElement *newDescription = NULL;
1832: HTAssocList *newAL = HTAssocList_new();
1833: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1834: newDescription = HTElement_new(dName, newAL);
1835: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1836: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1837: HTElement_addChild(newDescription, ele2);
1838: }
1839: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1840: }
1841: }
1842:
1843: HT_FREE(dName);
1844: HT_FREE(aName);
1845: return NULL;
1846: }
1847: /*
1848: * Enumerate through the children
1849: */
1850: {
1851: HTList *cur = description->m_children;
1852: HTElement *n = NULL;
1853: while ((n = (HTElement *) HTList_nextObject(cur))) {
1854: if (HTRDF_isDescription(me, n))
1855: HTPrint("Can not nest Description inside Description\n");
1856: else if (HTRDF_isListItem(me, n))
1857: HTPrint("Can not nest List Item inside Description\n");
1858: else if (HTRDF_isContainer(me, n))
1859: HTPrint("Can not nest Container inside Description\n");
1860: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1861: char * sChildID = NULL;
2.1 frystyk 1862: if (hasTarget && targetIsContainer) {
1863: sChildID = HTRDF_processPredicate(me, n, description,
1864: target->m_sBagID ?
1865: target->m_sBagID :
1866: target->m_sID, NO);
1867: StrAllocCopy(description->m_sID, sChildID);
1868: createBag = NO;
1869: } else if (hasTarget) {
1870: sChildID = HTRDF_processPredicate(me, n, description,
1871: target->m_sBagID ?
1872: target->m_sBagID :
1873: target->m_sID, reificate);
1874: StrAllocCopy(description->m_sID, sChildID);
1875: } else if (!hasTarget && !inPredicate) {
1876: if (!description->m_sID) {
2.3 frystyk 1877: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1878: StrAllocCopy(description->m_sID, nr);
1879: HT_FREE(nr);
1880: }
1881: if (!sAbout) {
1882: if (sID)
1883: sAbout = sID;
1884: else
1885: sAbout = description->m_sID;
1886: }
1887: sChildID = HTRDF_processPredicate(me, n, description,
1888: sAbout, sBagid ?
1889: YES : reificate);
1890:
1891: } else if (!hasTarget && inPredicate) {
1892: if (!sAbout) {
1893: if (sID) {
1894: StrAllocCopy(description->m_sID, sID);
1895: sAbout = sID;
1896: } else {
1897: if (!description->m_sID) {
2.3 frystyk 1898: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1899: StrAllocCopy(description->m_sID, nr);
1900: HT_FREE(nr);
1901: }
1902: sAbout = description->m_sID;
1903: }
1904: } else {
2.7 barstow 1905: if (description->m_sID != sAbout)
1906: StrAllocCopy(description->m_sID, sAbout);
2.1 frystyk 1907: }
1908: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1909: }
1910: /*
1911: * Each Description block creates also a Bag node which
1912: * has links to all properties within the block IF
1913: * the m_bCreateBags variable is true
1914: */
1915: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1916: char * sNamespace = RDFMS;
2.1 frystyk 1917: if (bOnce && sChildID) {
2.3 frystyk 1918: char * tName = NULL;
1919: char * bName = NULL;
2.1 frystyk 1920: bOnce = NO;
1921: if (!description->m_sBagID) {
2.3 frystyk 1922: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1923: StrAllocCopy(description->m_sBagID, nr);
1924: HT_FREE(nr);
1925: }
1926: if (!description->m_sID)
1927: StrAllocCopy(description->m_sID,
1928: description->m_sBagID);
1929: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1930: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1931: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1932: HT_FREE(tName);
1933: HT_FREE(bName);
1934:
1935: }
1936: if (sChildID) {
2.3 frystyk 1937: char * tName = NULL;
2.1 frystyk 1938: char si[20];
1939: sprintf(si, "%d", iChildCount);
1940: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1941: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1942: iChildCount++;
1943: HT_FREE(tName);
1944: }
1945: }
1946: HT_FREE(sChildID);
1947: }
1948: }
1949: } /* end of block*/
1950:
1951: description->m_bDone = YES;
1952:
1953: HT_FREE(dName);
1954: HT_FREE(aName);
1955: return (description->m_sID);
1956: }
1957:
1958: /*
1959: * Given an XML document (well-formed HTML, for example),
1960: * look for a suitable element to start parsing from
1961: *
1962: */
1963: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1964: {
1965: if (me && ele) {
1966: if (HTRDF_isRDF(me, ele)) {
1967: if (HTRDF_isRDFroot(me, ele)) {
1968: HTRDF_processRDF(me, ele);
1969: } else if (HTRDF_isDescription(me, ele)) {
1970: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1971: me->m_bCreateBags);
1972: }
1973: } else {
1974: HTList *cur = ele->m_children;
1975: HTElement *child = NULL;
1976: while ((child = (HTElement *) HTList_nextObject(cur))) {
1977: HTRDF_processXML(me, child);
1978: }
1979: }
1980:
1981: /* MISSING RECURSION */
1982:
1983: return YES;
1984: }
1985: return NO;
1986: }
1987:
1988: /*
1989: * Return the root element pointer. This requires the parsing
1990: * has been already done.
1991: */
1992: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1993: {
1994: return me ? me->m_root : NULL;
1995: }
1996:
1997: /*
1998: * Return the full namespace URI for a given prefix sPrefix.
1999: * The default namespace is identified with xmlns prefix.
2000: * The namespace of xmlns attribute is an empty string.
2001: */
2002:
2.3 frystyk 2003: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 2004: {
2.3 frystyk 2005: char * nPrefix = NULL;
2.1 frystyk 2006: HTAssocList * calist;
2007: HTList * cur = me->m_namespaceStack;
2008:
2009: if (!sPrefix)
2010: StrAllocCopy(nPrefix, "xmlns");
2011:
2012: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 2013: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 2014: if (sValue) {
2015: StrAllocCopy(nPrefix, sValue);
2016: return nPrefix;
2017: }
2018: }
2019: /*
2020: * Give error only if
2021: * 1. the prefix is not from the reserved xml namespace
2022: * 2. the prefix is not xmlns which is to look for the default
2023: * namespace
2024: */
2025: if (!strcmp(sPrefix, XMLSCHEMA)) {
2026: StrAllocCopy(nPrefix, sPrefix);
2027: return nPrefix;
2028: } else if (!strcmp(sPrefix, "xmlns")) {
2029: StrAllocCopy(nPrefix, "");
2030: return nPrefix;
2031: } else
2032: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2033:
2034: StrAllocCopy(nPrefix, "");
2035: return nPrefix;
2036: }
2037:
2038: /*
2039: * Methods to determine whether we are parsing
2040: * parseType="Literal" or parseType="Resource"
2041: */
2042:
2043: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2044: {
2045: HTElement *e = NULL;
2046: HTList *cur = me->m_elementStack;
2047: if (!HTList_isEmpty(me->m_elementStack)) {
2048: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2049: char * sParseType = NULL;
2.1 frystyk 2050: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2051: if (sParseType) {
2052: if (strcmp(sParseType, "Resource"))
2053: return YES;
2054: }
2055: }
2056: }
2057: return NO;
2058: }
2059:
2060: /*
2061: * Methods to determine whether we are parsing
2062: * parseType="Literal" or parseType="Resource"
2063: */
2064:
2065: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2066: {
2067: HTElement *e = NULL;
2068: HTList *cur = me->m_elementStack;
2069: if (!HTList_isEmpty(me->m_elementStack)) {
2070: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2071: char * sParseType = NULL;
2.1 frystyk 2072: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2073: if (sParseType) {
2074: if (!strcmp(sParseType, "Resource"))
2075: return YES;
2076: }
2077: }
2078: }
2079: return NO;
2080: }
2081: /*
2082: * checkAttributes goes through the attributes of element e<
2083: * to see
2084: * 1. if there are symbolic references to other nodes in the data model.
2085: * in which case they must be stored for later resolving with
2086: * resolveLater method.
2087: * 2. if there is an identity attribute, it is registered using
2088: * registerResource or registerID method.
2089: *
2090: */
2091:
2092: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2093: {
2094: {
2.3 frystyk 2095: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2096:
2097: if (sResource && sResource[0] == '#')
2098: HTRDF_resolveLater(me, e);
2099: }
2100: {
2.3 frystyk 2101: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2102:
2103: if (sAboutEach && sAboutEach[0] == '#')
2104: HTRDF_resolveLater(me, e);
2105: }
2106: {
2.3 frystyk 2107: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2108: "aboutEachPrefix");
2109:
2110: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2111: HTRDF_resolveLater(me, e);
2112: }
2113: {
2.3 frystyk 2114: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2115: if (sAbout) {
2116: if (sAbout[0] == '#')
2117: HTRDF_resolveLater(me, e);
2118: else
2119: HTRDF_registerResource(me, e);
2120: }
2121: }
2122:
2123: {
2.3 frystyk 2124: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2125:
2126: if (sBagID) {
2127: HTRDF_registerID(me, sBagID, e);
2128: StrAllocCopy(e->m_sBagID, sBagID);
2129: }
2130: }
2131: {
2.3 frystyk 2132: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2133: if (sID) {
2134: HTRDF_registerID(me, sID, e);
2135: StrAllocCopy(e->m_sID, sID);
2136: }
2137: }
2138: }
2139: /*
2140: * Add the element e to the m_vResolveQueue
2141: * to be resolved later.
2142: */
2143: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2144: {
2145: HTList_addObject(me->m_vResolveQueue, e);
2146: }
2147: /*
2148: * Add an element e to the Hashtable m_hIDtable
2149: * which stores all nodes with an ID
2150: */
2151:
2.3 frystyk 2152: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2153: {
2154: if (HTHashtable_object(me->m_hIDtable, sID))
2155: HTPrint("Node ID %s redefined", sID);
2156: HTHashtable_addObject(me->m_hIDtable, sID, e);
2157: }
2158: /*
2159: * Add an element e to the Vector m_vResources
2160: * which stores all nodes with an URI
2161: */
2162: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2163: {
2164: HTList_addObject(me->m_vResources, e);
2165: }
2166:
2167: /*
2168: * Look for a node by name sID from the Hashtable
2169: * m_hIDtable of all registered IDs.
2170: */
2171:
2.3 frystyk 2172: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2173: {
2174: if (sID)
2175: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2176: return NULL;
2177: }
2178:
2179: /*
2180: ** Special method to deal with rdf:resource attribute
2181: */
2.3 frystyk 2182: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2183: {
2.3 frystyk 2184: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2185: if (sResource != NULL && sResource[0] == '\0')
2186: sResource = me->m_sSource;
2187: return sResource;
2188: }
2189:
2190: /*
2191: ** Take an element ele with its parent element parent
2192: ** and evaluate all its attributes to see if they are non-RDF specific
2193: ** and non-XML specific in which case they must become children of
2194: ** the ele node.
2195: */
2196: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2197: {
2198: BOOL foundAbbreviation = NO;
2.3 frystyk 2199: char * sAttribute = NULL;
2200: char * sValue = NULL;
2.1 frystyk 2201: HTAssoc * assoc;
2202: HTAssocList * cur = ele->m_attributes;
2203: int lxmlschema = strlen(XMLSCHEMA);
2204: int lrdfms = strlen(RDFMS);
2205:
2206: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2207: int latt;
2208: sAttribute = HTAssoc_name(assoc);
2209: sValue = HTAssoc_value(assoc);
2.4 barstow 2210:
2.1 frystyk 2211: latt = strlen(sAttribute);
2212: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2213: continue;
2214:
2215: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2216: (sAttribute[lrdfms]!='_') &&
2217: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2218: strcmp(&(sAttribute[latt-4]), "type"))
2219: continue;
2220:
2221: if (strlen(sValue) > 0) {
2222: HTAssocList * newAL = HTAssocList_new();
2223: HTElement * newElement = HTElement_new(sAttribute, newAL);
2224: HTElement * newData = HTElement_new2(sValue);
2225: HTElement_addChild(newElement, newData);
2226: HTElement_addChild(parent, newElement);
2227: foundAbbreviation = YES;
2228: }
2229: }
2230: return foundAbbreviation;
2231: }
2232:
2233: /**
2234: * Create a new reification ID by using a name part and an
2235: * incremental counter m_iReificationCounter.
2236: */
2.3 frystyk 2237: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2238: {
2.3 frystyk 2239: char * nsid = NULL;
2.1 frystyk 2240: char nsrc[20];
2241: me->m_iReificationCounter++;
2242: sprintf(nsrc, "%d", me->m_iReificationCounter);
2243: if (!me->m_sSource) {
2244: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2245: } else {
2246: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2247: }
2248: return nsid;
2249: }
2250:
2251: /*
2252: * reificate creates one new node and four new triples
2253: * and returns the ID of the new node
2254: */
2255:
2.3 frystyk 2256: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2257: char * sObject, char * sNodeID)
2.1 frystyk 2258: {
2.3 frystyk 2259: char * sName = NULL;
2260: char * pName = NULL;
2261: char * oName = NULL;
2262: char * tName = NULL;
2263: char * stName = NULL;
2264: char * tNodeID = NULL;
2.1 frystyk 2265:
2266: if (!sNodeID)
2267: tNodeID = HTRDF_newReificationID(me);
2268: else
2269: StrAllocCopy(tNodeID, sNodeID);
2270:
2271: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2272: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2273: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2274: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2275: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2276:
2277: /*
2278: * The original statement must remain in the data model
2279: */
2280: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2281:
2282: /*
2283: * Do not reificate reificated properties
2284: */
2285: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2286: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2287:
2288: /* Reificate by creating 4 new triples */
2289: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2290: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2291: HTRDF_addTriple(me, oName, tNodeID, sObject);
2292: HTRDF_addTriple(me, tName, tNodeID, stName);
2293: } else
2294: HT_FREE(tNodeID);
2295:
2296: HT_FREE(sName);
2297: HT_FREE(pName);
2298: HT_FREE(oName);
2299: HT_FREE(tName);
2300: HT_FREE(stName);
2301:
2302: return tNodeID;
2303: }
2304: /*
2305: * Create a new triple and add it to the m_triples List
2306: * Send the triple to the Output stream
2307: */
2308:
2.3 frystyk 2309: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2310: char * sObject)
2.1 frystyk 2311: {
2312: HTTriple *t = NULL;
2313:
2314: /*
2315: * If there is no subject (about=""), then use the URI/filename where
2316: * the RDF description came from
2317: */
2318: if (!sPredicate || !sSubject || !sObject) {
2319: HTPrint("Predicate %s when subject %s and object %s \n",
2320: sPredicate ? sPredicate : "null",
2321: sSubject ? sSubject : "null",
2322: sObject ? sObject : "null");
2323: return;
2324: }
2325:
2326: if (sSubject[0]=='\0')
2327: sSubject = me->m_sSource;
2328:
2329: t = HTTriple_new(sPredicate, sSubject, sObject);
2330:
2331: /* Call the triple callback handler (if any) with this new triple */
2332: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2333:
2334: HTList_addObject(me->m_triples, t);
2335: }
2336:
2337: /*
2338: * createBags method allows one to determine whether SiRPAC
2339: * produces Bag instances for each Description block.
2340: * The default setting is not to generate them.
2341: */
2342:
2343: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2344: {
2345: if (me)
2346: me->m_bCreateBags = b;
2347: }
2348:
2349: /*
2350: Set output stream for RDF parser
2351: */
2352:
2353: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2354: {
2355: if (me)
2356: me->ostream = ostream;
2357: }
2358:
2359: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2360: {
2361: if (me) {
2362: me->newTripleInstance = cbf;
2363: me->tripleContext = context;
2364: return YES;
2365: }
2366: return NO;
2367: }
2368:
2369: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2370: {
2371: RDFInstance = me;
2372: RDFInstanceContext = context;
2373: return YES;
2374: }
2375:
2376: /* ------------------------------------------------------------------------- */
2377: /* HTRDFTriples STREAM HANDLERS */
2378: /* ------------------------------------------------------------------------- */
2379:
2380: PRIVATE int generate_triples(HTStream *me)
2381: {
2382: HTRDF *rdfp = me ? me->rdfparser : NULL;
2383: if (rdfp) {
2384:
2385: HTRDF_resolve(rdfp);
2386:
2387: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2388:
2389: return HT_OK;
2390: }
2391: return HT_ERROR;
2392: }
2393:
2394: PRIVATE int HTRDFTriples_flush (HTStream * me)
2395: {
2396: if (me->target)
2397: return (*me->target->isa->flush)(me->target);
2398: return HT_OK;
2399: }
2400:
2401: PRIVATE int HTRDFTriples_free (HTStream * me)
2402: {
2403: int status = HT_OK;
2404:
2405: status = generate_triples(me);
2406:
2407: HTRDF_delete(me->rdfparser);
2408:
2409: if (me->target) {
2410: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2411: return HT_WOULD_BLOCK;
2412: }
2413: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2414: HT_FREE(me);
2415: return status;
2416: }
2417:
2418: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2419: {
2420: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2421: HTRDF_delete(me->rdfparser);
2422: if (me->target)
2423: (*me->target->isa->abort)(me->target, NULL);
2424: HT_FREE(me);
2425: return HT_ERROR;
2426: }
2427:
2428: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2429: {
2430: return HT_OK;
2431: }
2432:
2433: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2434: {
2435: return HTRDFTriples_write(me, &c, 1);
2436: }
2437:
2438: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2439: {
2440: return HTRDFTriples_write(me, s, (int) strlen(s));
2441: }
2442:
2443: PRIVATE const HTStreamClass HTRDFTriplesClass =
2444: {
2445: "rdf",
2446: HTRDFTriples_flush,
2447: HTRDFTriples_free,
2448: HTRDFTriples_abort,
2449: HTRDFTriples_putCharacter,
2450: HTRDFTriples_putString,
2451: HTRDFTriples_write
2452: };
2453:
2454: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2455: void * param,
2456: HTFormat input_format,
2457: HTFormat output_format,
2458: HTStream * output_stream)
2459: {
2460: HTStream * me = NULL;
2461: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2462: HT_OUTOFMEM("HTRDFTriples_new");
2463: me->isa = &HTRDFTriplesClass;
2464: me->state = HT_OK;
2465: me->request = request;
2466: me->target = output_stream ? output_stream : HTErrorStream();
2467:
2468: /* Now create the RDF parser instance */
2469: if ((me->rdfparser = HTRDF_new()) == NULL) {
2470: HT_FREE(me);
2471: return HTErrorStream();
2472: }
2473:
2474: /* Set the source (I guess mostly to follow SiRPAC API) */
2475: {
2476: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2477: HTRDF_setSource(me->rdfparser, uri);
2478: HT_FREE(uri);
2479: }
2480:
2481: /* Where are we putting data? */
2482: HTRDF_setOutputStream(me->rdfparser, me);
2483:
2484: /* If you want to create Bags, change it to YES */
2485: HTRDF_createBags(me->rdfparser, NO);
2486:
2487: /* Register our new XML Instance handler */
2488: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2489: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2490:
2491: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2492:
2493: return me;
2494: }
2495:
2496: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2497: void * param,
2498: HTFormat input_format,
2499: HTFormat output_format,
2500: HTStream * output_stream)
2501: {
2502: return HTXML_new(request, param, input_format, output_format,
2503: RDFParser_new(request, param, input_format, output_format, output_stream));
2504: }
2505:
2506: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2507: {
2508: if (rdfp && t) {
2509: HTStream *ostream = rdfp->ostream;
2510: if (ostream) {
2511: PUTC(ostream,'(');
2512: PUTS(ostream, t->m_sPredicate);
2513: PUTC(ostream,',');
2514: PUTS(ostream, t->m_sSubject);
2515: PUTC(ostream,',');
2516: PUTS(ostream, t->m_sObject);
2517: PUTC(ostream,')');
2518: PUTC(ostream,'\n');
2519: }
2520: }
2521: }
2522:
2523: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2524: void * param,
2525: HTFormat input_format,
2526: HTFormat output_format,
2527: HTStream * output_stream)
2528: {
2.2 frystyk 2529: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2530: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2531:
2532: /* Register our own tripple instance handler */
2533: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2534:
2535: /* Create an XML parser instance and return */
2536: return HTXML_new(request, param, input_format, output_format, me);
2537: }
2538:
2.9 ! barstow 2539: /*
! 2540: ** This function initializes the XML parser and RDF parsers
! 2541: ** that are used to synchronously parse a file of RDF or a
! 2542: ** a buffer of RDF.
! 2543: **
! 2544: *@param xmlparser MODIFIED the XML parser to create. The caller is
! 2545: * responsible for free'ing this pointer.
! 2546: *@param rdfparser MODIFIED the RDF parser to create. The caller is
! 2547: * responsible for free'ing this pointer.
! 2548: *@param stream MODIFIED the HTStream needed by the RDF parser. The
! 2549: * caller is responsible for free'ing this pointer.
! 2550: *@param uri the URI created from name. It is used by the RDF parser
! 2551: * when creating anonymous node names. The caller is responsible for
! 2552: * freeing this pointer.
! 2553: *@param new_triple_callback the callback invoked when a new triple
! 2554: * is created. If NULL, the default handler will be invoked.
! 2555: *@param name the file name or buffer name to be used when the RDF
! 2556: * parser needs a document name
! 2557: *@return NULL if the initialization succeeds; otherwise a pointer
! 2558: * to a static string is returned. The caller must NOT free this
! 2559: * pointer.
! 2560: **/
! 2561:
! 2562: PRIVATE char * initialize_parsers(XML_Parser *xmlparser, HTRDF **rdfparser,
! 2563: HTStream **stream, char **uri, HTTripleCallback_new * new_triple_callback,
! 2564: const char * name)
! 2565: {
! 2566: /* Create an XML parser */
! 2567: #ifdef USE_NS
! 2568: *xmlparser = XML_ParserCreateNS (NULL, ':');
! 2569: #else
! 2570: *xmlparser = XML_ParserCreate (NULL);
! 2571: #endif /* USE_NS */
! 2572:
! 2573: if (!*xmlparser)
! 2574: return "Could not create an XML parser";
! 2575:
! 2576: /* We need also need RDF parser to create the triples */
! 2577: *rdfparser = HTRDF_new();
! 2578: if (!*rdfparser) {
! 2579: XML_ParserFree(*xmlparser);
! 2580: return "Could not allocate memory for RDF parser";
! 2581: }
! 2582:
! 2583: /* Must construct a URI from name for the parser */
! 2584: *uri = HTLocalToWWW (name, "file:");
! 2585: HTRDF_setSource(*rdfparser, *uri);
! 2586:
! 2587: HTRDF_createBags(*rdfparser, NO);
! 2588:
! 2589: if (new_triple_callback)
! 2590: HTRDF_registerNewTripleCallback(*rdfparser, new_triple_callback, NULL);
! 2591: else
! 2592: HTRDF_registerNewTripleCallback(*rdfparser, triple_newInstance, NULL);
! 2593:
! 2594: rdf_setHandlers(*xmlparser);
! 2595: XML_SetUserData(*xmlparser, *rdfparser);
! 2596:
! 2597: /* Create a stream to be used to process the triple output */
! 2598: if ((*stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
! 2599: HT_FREE(*uri);
! 2600: XML_ParserFree(*xmlparser);
! 2601: HTRDF_delete(*rdfparser);
! 2602: return "Could not allocate memory for HTStream";
! 2603: }
! 2604: (*stream)->isa = &HTRDFTriplesClass;
! 2605: (*stream)->state = HT_OK;
! 2606: (*stream)->request = NULL; /* Don't have a request */
! 2607: (*stream)->target = NULL; /* Don't have another stream */
! 2608: (*stream)->rdfparser = *rdfparser;
! 2609:
! 2610: return NULL;
! 2611: }
! 2612:
2.6 kahan 2613: /* HTRDFParseFile
2614: ** ---------------
2615: ** This function parses a file of RDF in a synchronous, non-blocking
2.9 ! barstow 2616: ** way. In other words, the file is not asynchronously loaded.
! 2617: **
! 2618: *@param file_name the name of the file to parse
! 2619: *@param new_triple_callback the callback that is invoked when a triple
! 2620: * is created. If NULL, the default triple handler is invoked.
! 2621: *@return NULL if the buffer is successfully parsed; otherwise a
! 2622: * pointer to a static error message is returned. The caller must NOT
! 2623: * free this pointer.
2.6 kahan 2624: */
2625:
2.5 barstow 2626: PUBLIC char * HTRDFParseFile (const char *file_name, HTTripleCallback_new * new_triple_callback)
2627: {
2628: char buff[512]; /* the file input buffer */
2629: FILE *fp;
2630: XML_Parser xmlparser;
2631: HTRDF *rdfparser;
2632: HTStream * stream = NULL;
2.6 kahan 2633: char *uri = NULL;
2.9 ! barstow 2634: char *s;
2.5 barstow 2635:
2636: /* Sanity check */
2.6 kahan 2637: if (!file_name) {
2638: HTTRACE(XML_TRACE, "RDFParseFile.. file name is NULL\n");
2639: return "RDFParseFile: file_name is NULL";
2640: }
2641:
2.5 barstow 2642: /* If the file does not exist, return now */
2643: fp = fopen (file_name, "r");
2.6 kahan 2644: if (!fp) { /* annotation index file doesn't exist */
2645: HTTRACE(XML_TRACE, "RDFParseFile.. file open failed\n");
2646: return "RDFParseFile: file open failed";
2647: }
2.5 barstow 2648:
2.9 ! barstow 2649: s = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri, new_triple_callback, file_name);
! 2650: if (s) {
2.6 kahan 2651: fclose (fp);
2.9 ! barstow 2652: return s;
2.5 barstow 2653: }
2654:
2655: /*
2656: * The parsing occurs on one read buffer at a time instead of
2657: * reading everything into memory and then parsing
2658: */
2659: for (;;) {
2660: int done;
2661: int buff_len;
2662: fgets(buff, sizeof(buff), fp);
2663: if (ferror(fp)) {
2.6 kahan 2664: HT_FREE(uri);
2.5 barstow 2665: fclose (fp);
2666: XML_ParserFree(xmlparser);
2667: HTRDF_delete(rdfparser);
2668: HT_FREE(stream);
2669: return "RDFParseFile: error reading file";
2670: }
2671: done = feof(fp);
2672: if (done)
2673: buff_len = 0;
2674: else
2675: buff_len = strlen (buff);
2676: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
2677: fprintf (stderr, "Parse error at line %d:\n%s\n",
2678: XML_GetCurrentLineNumber(xmlparser),
2679: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2.6 kahan 2680: HT_FREE(uri);
2.5 barstow 2681: fclose(fp);
2682: XML_ParserFree(xmlparser);
2683: HTRDF_delete(rdfparser);
2684: HT_FREE(stream);
2685: return "RDFParseFile: parse error";
2686: }
2687: if (done)
2688: break;
2689: }
2690:
2691: /* The file has been parsed, generate the triples */
2692: generate_triples(stream);
2693:
2694: /* Cleanup */
2.6 kahan 2695: HT_FREE(uri);
2.5 barstow 2696: fclose (fp);
2.9 ! barstow 2697: XML_ParserFree(xmlparser);
! 2698: HTRDF_delete(rdfparser);
! 2699: HT_FREE(stream);
! 2700:
! 2701: return NULL;
! 2702: }
! 2703:
! 2704: /* HTRDFParseBuffer
! 2705: ** ---------------
! 2706: ** This function parses a buffer of RDF in a synchronous, non-blocking
! 2707: ** way.
! 2708: **
! 2709: *@param buffer the buffer to parse
! 2710: *@param buffer_name the buffer's name. This is used by the parser
! 2711: * when naming "anonymous" subjects
! 2712: *@param buffer_len the buffer's length (number of bytes)
! 2713: *@param new_triple_callback the callback that is invoked when a triple
! 2714: * is created. If NULL, the default triple handler is invoked.
! 2715: *@return NULL if the buffer is successfully parsed; otherwise a
! 2716: * pointer to a static error message is returned. The caller must NOT
! 2717: * free this pointer.
! 2718: */
! 2719:
! 2720: PUBLIC char * HTRDFParseBuffer (const char *buffer, const char *buffer_name, int buffer_len, HTTripleCallback_new * new_triple_callback)
! 2721: {
! 2722: XML_Parser xmlparser;
! 2723: HTRDF *rdfparser;
! 2724: HTStream * stream = NULL;
! 2725: char *uri;
! 2726: char *s;
! 2727:
! 2728: /* Sanity checks */
! 2729: if (!buffer)
! 2730: return "RDFParseBuffer: buffer is NULL";
! 2731: if (buffer_len <= 0)
! 2732: return "RDFParseBuffer: buffer_len is <=0";
! 2733: if (!buffer_name)
! 2734: return "RDFParseBuffer: buffer_name is NULL";
! 2735:
! 2736: s = initialize_parsers(&xmlparser, &rdfparser, &stream, &uri, new_triple_callback, buffer_name);
! 2737: if (s)
! 2738: return s;
! 2739:
! 2740: if (! XML_Parse(xmlparser, buffer, buffer_len, 1)) {
! 2741: fprintf(stderr, "Parse error at line %d:\n%s\n",
! 2742: XML_GetCurrentLineNumber(xmlparser),
! 2743: XML_ErrorString(XML_GetErrorCode(xmlparser)));
! 2744: HT_FREE(uri);
! 2745: XML_ParserFree(xmlparser);
! 2746: HTRDF_delete(rdfparser);
! 2747: HT_FREE(stream);
! 2748: return "RDFParseBuffer: parse error";
! 2749: }
! 2750:
! 2751: /* The buffer has been parsed, generate the triples */
! 2752: generate_triples(stream);
! 2753:
! 2754: /* Cleanup */
! 2755: HT_FREE(uri);
2.5 barstow 2756: XML_ParserFree(xmlparser);
2757: HTRDF_delete(rdfparser);
2758: HT_FREE(stream);
2759:
2760: return NULL;
2761: }
Webmaster