Annotation of libwww/Library/src/HTRDF.c, revision 2.5
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.4 barstow 4: ** @(#) $Id: HTRDF.c,v 2.3 1999/05/05 18:41:49 frystyk Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
2.5 ! barstow 28: static const char * FILE_SCHEME = "file://";
! 29:
2.1 frystyk 30: struct _HTStream {
31: const HTStreamClass * isa;
32: int state;
33: HTRequest * request;
34: HTStream * target;
35: HTRDF * rdfparser;
36: };
37:
38: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
39: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
40:
41: struct _HTTriple {
2.3 frystyk 42: char * m_sPredicate;
43: char * m_sSubject;
44: char * m_sObject;
2.1 frystyk 45: };
46:
47: struct _HTElement {
2.3 frystyk 48: char * m_sName;
2.1 frystyk 49: HTAssocList * m_attributes;
50: HTList * m_children;
2.3 frystyk 51: char * m_sID;
52: char * m_sBagID;
2.1 frystyk 53: HTList * m_vTargets;
54: BOOL m_bDone;
2.3 frystyk 55: char * m_sPrefix;
56: char * m_sContent;
2.1 frystyk 57: };
58:
59: struct _HTRDFParser {
60: HTList * m_namespaceStack;
61: HTList * m_elementStack;
62: HTElement * m_root;
63: HTList * m_triples;
2.3 frystyk 64: char * m_sSource;
2.1 frystyk 65: HTList * m_vAllNameSpaces;
66:
67: BOOL m_bCreateBags;
68: BOOL m_bFetchSchemas;
69:
70: HTList * m_parseTypeStack;
71: HTList * m_parseElementStack;
2.3 frystyk 72: char * m_sLiteral;
2.1 frystyk 73:
74: HTList * m_vResources;
75: HTList * m_vResolveQueue;
76: HTHashtable * m_hIDtable;
77: int m_iReificationCounter;
78:
79: HTStream * ostream;
80:
81: HTTripleCallback_new * newTripleInstance;
82: void * tripleContext;
83: };
84:
85: /* @@@ Should not be global but controlled by name spaces @@@ */
86: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
87: PRIVATE void * RDFInstanceContext = NULL;
88:
2.3 frystyk 89: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
90: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 91: HTElement *description,
2.3 frystyk 92: char * sTarget,
2.1 frystyk 93: BOOL reificate);
2.3 frystyk 94: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 95: int iCounter);
96: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
97: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 98: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
99: char * sObject, char * sNodeID);
2.4 barstow 100: /* ------------------------------------------------------------------------- */
101:
102: /*
103: ** Append the markup for the given element and its attribute to the
104: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
105: */
106: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
107: {
108: int i=0;
109:
110: if (!rdfp || !name) return;
111:
112: StrAllocCat(rdfp->m_sLiteral, "<");
113: StrAllocCat(rdfp->m_sLiteral, name);
114:
115: while (atts[i]) {
116: StrAllocCat(rdfp->m_sLiteral, " ");
117: StrAllocCat(rdfp->m_sLiteral, atts[i]);
118: StrAllocCat(rdfp->m_sLiteral, "=\"");
119: StrAllocCat(rdfp->m_sLiteral, atts[i+1]);
120: StrAllocCat(rdfp->m_sLiteral, "\"");
121: i+=2;
122: }
123:
124: StrAllocCat(rdfp->m_sLiteral, ">");
125: }
126:
127: /*
128: ** Terminate this element's "Literal" buffer. This buffer is filled in when
129: ** parseType="Literal".
130: */
131: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
132: {
133: if (!rdfp || !name) return;
134:
135: StrAllocCat(rdfp->m_sLiteral, "</");
136: StrAllocCat(rdfp->m_sLiteral, name);
137: StrAllocCat(rdfp->m_sLiteral, ">");
138: }
2.1 frystyk 139:
140: /* ------------------------------------------------------------------------- */
141:
142: /*
143: ** Searches a whole list of Strings and returns true if the String is found.
144: */
2.3 frystyk 145: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 146: {
147: HTList *cur = list;
2.3 frystyk 148: char * cs = NULL;
149: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 150: if (!strcmp(cs, s)) return YES;
151: }
152: return NO;
153: }
154:
155: /*
156: ** Useful function that Trims a string
157: ** @@@ Should use HTStrip() @@@
158: */
159: PRIVATE char * trim (char *s)
160: {
161: char *p = NULL, *t = NULL;
162: int len = s ? strlen(s) : -1;
163: if (s && len > 0) {
164: StrAllocCopy(t, s);
165: p = &(s[len-1]);
166: while(p!=s) {
167: if (!isspace((int)(*p)))
168: break;
169: p--;
170: }
171: t[(int)(p-s)+1] = '\0';
172: if (isspace((int) t[(int)(p-s)]))
173: t[(int)(p-s)] = '\0';
174: }
175: return t;
176: }
177:
178: /* ------------------------------------------------------------------------- */
179: /* TRIPLE of RDF */
180: /* ------------------------------------------------------------------------- */
181:
2.3 frystyk 182: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 183: {
184: HTTriple * me = NULL;
185: if (p && s && o) {
186: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
187: HT_OUTOFMEM("HTTriple_new");
188: StrAllocCopy(me->m_sPredicate, p);
189: StrAllocCopy(me->m_sSubject, s);
190: StrAllocCopy(me->m_sObject, o);
191: }
192: return me;
193: }
194:
195: PUBLIC BOOL HTTriple_delete (HTTriple * me)
196: {
197: if (me) {
198: HT_FREE(me->m_sPredicate);
199: HT_FREE(me->m_sSubject);
200: HT_FREE(me->m_sObject);
201: HT_FREE(me);
202: return YES;
203: }
204: return NO;
205: }
206:
207: PUBLIC void HTTriple_print (HTTriple * me)
208: {
209: if (me)
210: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
211: me->m_sObject);
212: }
213:
2.3 frystyk 214: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 215: {
216: return me ? me->m_sSubject : NULL;
217: }
218:
2.3 frystyk 219: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 220: {
221: return me ? me->m_sPredicate : NULL;
222: }
223:
2.3 frystyk 224: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 225: {
226: return me ? me->m_sObject : NULL;
227: }
228:
229: /* ------------------------------------------------------------------------- */
230: /* ELEMENT of RDF */
231: /* ------------------------------------------------------------------------- */
232:
2.3 frystyk 233: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 234: {
235: HTElement * me = NULL;
236: if (sName) {
237: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
238: HT_OUTOFMEM("HTElement_new");
239: StrAllocCopy(me->m_sName, sName);
240: me->m_attributes = al ? al : HTAssocList_new();
241: me->m_children = HTList_new();
242: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
243: me->m_vTargets = HTList_new();
244: me->m_bDone = FALSE;
245: }
246: return me;
247: }
248:
249: /*
250: ** Creates a Data Element and saves the data in the Content field.
251: ** Data Element does not have attributes
252: */
2.3 frystyk 253: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 254: {
255: HTElement * me = NULL;
256: if (sContent) {
257: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
258: HT_OUTOFMEM("HTElement_new2");
259: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
260: me->m_attributes = NULL;
261: me->m_children = HTList_new();
262: /*me->m_nodes = HTAssocList_new();*/
263: me->m_vTargets = HTList_new();
264: me->m_bDone = FALSE;
265: StrAllocCopy(me->m_sContent, sContent);
266: }
267: return me;
268: }
269:
2.3 frystyk 270: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 271: {
272: if (me && sContent) {
273: int l = strlen(me->m_sName);
274: StrAllocCat(me->m_sContent, sContent);
275: me->m_sName[l-1]='\0';
276: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
277: return YES;
278: }
279: return NO;
280: }
281:
282: PUBLIC BOOL HTElement_delete (HTElement * me)
283: {
284: if (me) {
285: HT_FREE(me->m_sName);
286: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
287: if (me->m_children) HTList_delete(me->m_children);
288: HT_FREE(me->m_sID);
289: HT_FREE(me->m_sBagID);
290: if (me->m_vTargets) HTList_delete(me->m_vTargets);
291: HT_FREE(me->m_sPrefix);
292: HT_FREE(me->m_sContent);
293: HT_FREE(me);
294: return YES;
295: }
296: return NO;
297: }
298:
299: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
300: {
301: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
302: }
303:
2.3 frystyk 304: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 305: {
306: return (me && sName && sValue) ?
307: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
308: }
309:
2.3 frystyk 310: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 311: {
312: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
313: }
314:
2.3 frystyk 315: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 316: {
317: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
318: }
319:
2.3 frystyk 320: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 321: {
2.3 frystyk 322: char * fValue = NULL;
323: char * fName = NULL;
2.1 frystyk 324: if (me && sNamespace && sName) {
325: StrAllocMCopy(&fName, sNamespace, sName, NULL);
326: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
327: HT_FREE(fName);
328: }
329: return fValue;
330: }
331:
332: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
333: {
334: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
335: }
336:
337: PUBLIC HTElement * HTElement_target (HTElement * me)
338: {
339: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
340: }
341:
342: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
343: {
344: return (me && me->m_sContent) ? YES : NO;
345: }
346:
347: /* ------------------------------------------------------------------------- */
348: /* EXPAT HANDLERS */
349: /* ------------------------------------------------------------------------- */
350:
351: /*
352: * Called for each new element.
353: * Build up the document tree using an element stack
354: */
355: PRIVATE void XML_startElement (void * userData,
356: const XML_Char * name, const XML_Char ** atts)
357: {
358: HTRDF * rdfp = (HTRDF *) userData;
359: HTAssocList * namespaces = HTAssocList_new();
360: HTAssocList * newAL = HTAssocList_new();
361: int i = 0;
2.5 ! barstow 362:
2.1 frystyk 363: /**
364: * The following loop tries to identify special xmlns prefix
365: * attributes and update the namespace stack accordingly.
366: * While doing all this, it builds another AttributeList instance
367: * which will hold the expanded names of the attributes
368: * (I think this approach is only useful for RDF which uses
369: * attributes as an abbreviated syntax for element names)
370: */
371: if (atts) {
372: while (atts[i]) {
2.3 frystyk 373: char * aName = (char * ) atts[i];
2.1 frystyk 374: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 375: char * aValue = (char *) atts[i+1];
2.1 frystyk 376: int len = aValue ? strlen(aValue) : -1;
377: if (len == 0 && !rdfp->m_sSource)
378: aValue = rdfp->m_sSource;
379: HTAssocList_addObject(namespaces, aName, aValue);
380: /* save all non-RDF schema addresses */
381: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
382: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
383: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 384: char * nname = NULL;
2.1 frystyk 385: StrAllocCopy(nname, aValue);
386: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
387: }
388:
389: /* Special case: Don't save document's own address */
390: if (rdfp->m_sSource &&
391: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 392: char * nname = NULL;
2.1 frystyk 393: StrAllocCopy(nname, aValue);
394: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
395: }
396: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 397: char * aValue = (char *) atts[i+1];
398: char * nName = NULL;
2.1 frystyk 399: int len = aValue ? strlen(aValue) : -1;
400: if (len == 0 && !rdfp->m_sSource)
401: aValue = rdfp->m_sSource;
402: StrAllocCopy(nName, &(aName[6]));
403: HTAssocList_addObject(namespaces, nName, aValue);
404: HT_FREE(nName);
405:
406: /* Save all non-RDF schema addresses */
407: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
408: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
409: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 410: char * nname = NULL;
2.1 frystyk 411: StrAllocCopy(nname, aValue);
412: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
413: }
414:
415: /* Special case: Don't save document's own address */
416: if (rdfp->m_sSource &&
417: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 418: char * nname = NULL;
2.1 frystyk 419: StrAllocCopy(nname, aValue);
420: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
421: }
422: }
423: i+=2;
424: } /* end of while */
425: } /* end of if */
426:
427: /*
428: ** Place new namespace declarations into the stack
429: ** (Yes, I could optimize this a bit, not it wastes space
430: ** if there are no xmlns definitions)
431: */
432: HTList_addObject(rdfp->m_namespaceStack, namespaces);
433:
434: /*
435: ** Figure out the prefix part if it exists and
436: ** determine the namespace of the element accordingly
437: */
438: {
2.3 frystyk 439: char * sNamespace = NULL;
440: char * sElementName = NULL;
441: char * sPrefix2 = NULL;
2.1 frystyk 442: HTElement *newElement = NULL;
443: char *pindex = strchr(name, ':');
444: int ix = pindex ? (int) (pindex - name) : -1 ;
445: if (ix > 0) {
446: if (!(sPrefix2 = HT_MALLOC(ix+1)))
447: HT_OUTOFMEM("XML_startELement");
448: strncpy(sPrefix2, name, ix);
449: sPrefix2[ix]='\0';
450: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
451: StrAllocCopy(sElementName, &(name[ix+1]));
452: HT_FREE(sPrefix2);
453: } else {
454: sNamespace = HTRDF_namespace(rdfp, "xmlns");
455: StrAllocCopy(sElementName, name);
456: }
457:
458: /*
459: * Finally look for attributes other than the special xmlns,
460: * expand them, and place to the new Attribute List
461: */
462: i = 0;
463: if (atts) {
464: while (atts[i]) {
2.3 frystyk 465: char * aName = (char *) atts[i];
466: char * sAttributeNamespace = NULL;
2.1 frystyk 467: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 468: char * aValue = (char *) atts[i+1];
469: char * sPrefix = NULL;
2.1 frystyk 470: /* Expat does not have type for attributes */
471: pindex = strchr(aName, ':');
472: ix = pindex ? (int) (pindex - aName) : -1;
473: if (ix > 0) {
474: if (!(sPrefix = HT_MALLOC(ix+1)))
475: HT_OUTOFMEM("XML_startELement");
476: strncpy(sPrefix, aName, ix);
477: sPrefix[ix] = '\0';
478: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
479: aName = &(aName[ix+1]);
480: HT_FREE(sPrefix);
481: } else {
482: if (!sNamespace)
483: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
484: else
485: StrAllocCopy(sAttributeNamespace, sNamespace);
486: }
487:
488: if (HTRDF_parseLiteral(rdfp)) {
489: if (!sPrefix) {
490: if (!(sPrefix = HT_MALLOC(8)))
491: HT_OUTOFMEM("XML_startELement");
492: sprintf(sPrefix, "gen%d\n", i);
493: }
494: {
2.3 frystyk 495: char * fName = NULL;
2.1 frystyk 496: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
497: HTAssocList_addObject(newAL, fName, aValue);
498: HT_FREE(fName);
499: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
500: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
501: HT_FREE(fName);
502: }
503: } else {
2.3 frystyk 504: char * fName = NULL;
2.1 frystyk 505: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
506: HTAssocList_addObject(newAL, fName, aValue);
507: HT_FREE(fName);
508: }
509:
510: HT_FREE(sAttributeNamespace);
511:
512: /*
513: ** This call will try to see if the user is using
514: ** RDF look-alike elements from another namespace
515: **
516: ** Note: you can remove the call if you wish
517: */
518: #if 0
519: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
520: #endif
521:
522: } /* end of if */
523: i+=2;
524: } /* end of while */
525: } /* end of if atts */
526:
527: /*
528: * If we have parseType="Literal" set earlier, this element
529: * needs some additional attributes to make it stand-alone
530: * piece of XML
531: */
532: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 533: char * fName = NULL;
2.4 barstow 534:
2.1 frystyk 535: if (!sPrefix2) {
536: if (sNamespace)
537: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
538: StrAllocMCopy(&fName, "gen", sElementName, NULL);
539: newElement = HTElement_new(fName, newAL);
540: StrAllocCopy(newElement->m_sPrefix, "gen");
541: HT_FREE(fName);
542: } else {
2.3 frystyk 543: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 544: if (sAttributeNamespace) {
545: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
546: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
547: HT_FREE(fName);
548: }
549: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
550: newElement = HTElement_new(fName, newAL);
551: HT_FREE(fName);
552: }
553: } else {
2.3 frystyk 554: char * fName = NULL;
2.1 frystyk 555: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
556: newElement = HTElement_new(fName, newAL);
557: HT_FREE(fName);
558: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
559: }
560: HT_FREE(sElementName);
561: HT_FREE(sNamespace);
562: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 563:
2.1 frystyk 564: /*
565: ** Check parseType
566: */
567: {
2.3 frystyk 568: char * fName = NULL;
569: char * sLiteralValue = NULL;
2.1 frystyk 570: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
571: sLiteralValue = HTElement_getAttribute(newElement, fName);
572: HT_FREE(fName);
573: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
574: /**
575: * This is the management of the element where
576: * parseType="Literal" appears
577: *
578: * You should notice RDF V1.0 conforming implementations
579: * must treat other values than Literal and Resource as
580: * Literal. This is why the condition is !equals("Resource")
581: */
582:
583: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
584: if (!HTList_isEmpty(rdfp->m_elementStack)) {
585: HTElement *e = (HTElement *)
586: HTList_lastObject(rdfp->m_elementStack);
587: HTElement_addChild(e, newElement);
588: }
589: HTList_addObject(rdfp->m_elementStack, newElement);
590: HTList_addObject(rdfp->m_parseElementStack, newElement);
591: HT_FREE(rdfp->m_sLiteral);
592: StrAllocCopy(rdfp->m_sLiteral, "");
593: return;
594: }
595:
596: if (HTRDF_parseLiteral(rdfp)) {
597: /*
598: * This is the management of any element nested within
599: * a parseType="Literal" declaration
600: */
2.4 barstow 601: /* Add the element to the parser's literal buffer */
602: addMarkupStart (rdfp, name, atts);
603:
2.1 frystyk 604: HTList_addObject(rdfp->m_elementStack, newElement);
605: return;
606: }
607:
608: /*
609: ** Update the containment hierarchy with the stack.
610: */
611: if (!HTList_isEmpty(rdfp->m_elementStack)) {
612: HTElement *e = (HTElement *)
613: HTList_lastObject(rdfp->m_elementStack);
614: HTElement_addChild(e, newElement);
615: }
616:
617: /*
618: ** Place the new element into the stack
619: */
620: HTList_addObject(rdfp->m_elementStack, newElement);
621: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
622: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
623: HTList_addObject(rdfp->m_parseElementStack, newElement);
624: HT_FREE(rdfp->m_sLiteral);
625: StrAllocCopy(rdfp->m_sLiteral, "");
626:
627: /*
628: * Since parseType="Resource" implies the following
629: * production must match Description, let's create
630: * an additional Description node here in the document tree.
631: */
632: {
2.3 frystyk 633: char * fName = NULL;
2.1 frystyk 634: HTElement *desc = NULL;
635: HTAssocList * al = HTAssocList_new ();
636: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
637: desc = HTElement_new(fName, al);
638: HT_FREE(fName);
639: if (!HTList_isEmpty(rdfp->m_elementStack)) {
640: HTElement *e = (HTElement *)
641: HTList_lastObject(rdfp->m_elementStack);
642: HTElement_addChild(e, desc);
643: }
644: HTList_addObject(rdfp->m_elementStack, desc);
645: }
646: } /* end of if */
647: } /* end of block */
648: } /* end of block */
649: }
650:
651: /*
652: * For each end of an element scope step back in the
653: * element and namespace stack
654: */
655: PRIVATE void XML_endElement (void * userData,
656: const XML_Char * name)
657: {
658: HTRDF * rdfp = (HTRDF *) userData;
659: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
660: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
661: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
662: if (namespaces) HTAssocList_delete(namespaces);
663:
664: if (bParseLiteral) {
665: HTElement *pe = (HTElement *)
666: HTList_lastObject(rdfp->m_parseElementStack);
667: if (pe != rdfp->m_root) {
2.4 barstow 668: /* Terminate the literal */
669: addMarkupEnd (rdfp, name);
2.1 frystyk 670: } else {
671: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
672: HTElement_addChild(pe, de);
2.4 barstow 673:
2.1 frystyk 674: HT_FREE(rdfp->m_sLiteral);
675: StrAllocCopy(rdfp->m_sLiteral, "");
676: HTList_removeLastObject(rdfp->m_parseElementStack);
677: HTList_removeLastObject(rdfp->m_parseTypeStack);
678: }
679: } else if (HTRDF_parseResource(rdfp)) {
680: /**
681: * If we are doing parseType="Resource"
682: * we need to explore whether the next element in
683: * the stack is the closing element in which case
684: * we remove it as well (remember, there's an
685: * extra Description element to be removed)
686: */
687: if (!HTList_isEmpty(rdfp->m_elementStack)) {
688: HTElement *pe = (HTElement *)
689: HTList_lastObject(rdfp->m_parseElementStack);
690: HTElement *e = (HTElement *)
691: HTList_lastObject(rdfp->m_elementStack);
692: if (pe == e) {
693: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
694: HTList_removeLastObject(rdfp->m_parseElementStack);
695: HTList_removeLastObject(rdfp->m_parseTypeStack);
696: }
697: }
698: }
699: }
700:
701: PRIVATE void XML_characterData (void * userData,
702: const XML_Char * s, int len)
703: {
704: /*
705: * Place all characters as Data instance to the containment
706: * hierarchy with the help of the stack.
707: */
708: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 709: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
710: char * tstr = NULL;
711: char * str = NULL;
712: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 713: HT_OUTOFMEM("XML_characterData");
714: strncpy(str, s, len);
715: str[len]='\0';
716: if (HTRDF_parseLiteral(rdfp)) {
717: StrAllocCat(rdfp->m_sLiteral, str);
718: HT_FREE(str);
719: return;
720: }
721: /* JUST FOR EXPAT */
722: {
723: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
724: if (lch && HTElement_instanceOfData(lch)) {
725: HTElement_addData(lch, str);
726: HT_FREE(str);
727: return;
728: }
729: }
730: /*
731: * Warning: this is not correct procedure according to XML spec.
732: * All whitespace matters!
733: */
734: tstr = trim(str);
735: if (strlen(tstr) > 0) {
736: HTElement * de = HTElement_new2(tstr);
737: HTElement_addChild(e, de);
738: }
739: HT_FREE(str); HT_FREE(tstr);
740: }
741:
742: PRIVATE void XML_processingInstruction (void * userData,
743: const XML_Char * target,
744: const XML_Char * data)
745: {
746: return;
747: }
748:
749: /*
750: ** This is called for any characters in the XML document for
751: ** which there is no applicable handler. This includes both
752: ** characters that are part of markup which is of a kind that is
753: ** not reported (comments, markup declarations), or characters
754: ** that are part of a construct which could be reported but
755: ** for which no handler has been supplied. The characters are passed
756: ** exactly as they were in the XML document except that
757: ** they will be encoded in UTF-8. Line boundaries are not normalized.
758: ** Note that a byte order mark character is not passed to the default handler.
759: ** If a default handler is set, internal entity references
760: ** are not expanded. There are no guarantees about
761: ** how characters are divided between calls to the default handler:
762: ** for example, a comment might be split between multiple calls.
763: */
764: PRIVATE void XML_default (void * userData,
765: const XML_Char * s, int len)
766: {
767: return;
768: }
769:
770: /*
771: ** This is called for a declaration of an unparsed (NDATA)
772: ** entity. The base argument is whatever was set by XML_SetBase.
773: ** The entityName, systemId and notationName arguments will never be null.
774: ** The other arguments may be.
775: */
776: PRIVATE void XML_unparsedEntityDecl (void * userData,
777: const XML_Char * entityName,
778: const XML_Char * base,
779: const XML_Char * systemId,
780: const XML_Char * publicId,
781: const XML_Char * notationName)
782: {
783: return;
784: }
785:
786: /*
787: ** This is called for a declaration of notation.
788: ** The base argument is whatever was set by XML_SetBase.
789: ** The notationName will never be null. The other arguments can be.
790: */
791: PRIVATE void XML_notationDecl (void * userData,
792: const XML_Char * notationName,
793: const XML_Char * base,
794: const XML_Char * systemId,
795: const XML_Char * publicId)
796: {
797: return;
798: }
799:
800: /*
801: ** This is called for a reference to an external parsed general entity.
802: ** The referenced entity is not automatically parsed.
803: ** The application can parse it immediately or later using
804: ** XML_ExternalEntityParserCreate.
805: ** The parser argument is the parser parsing the entity containing the reference;
806: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
807: ** The systemId argument is the system identifier as specified in the entity
808: ** declaration; it will not be null.
809: ** The base argument is the system identifier that should be used as the base for
810: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
811: ** it may be null.
812: ** The publicId argument is the public identifier as specified in the entity declaration,
813: ** or null if none was specified; the whitespace in the public identifier
814: ** will have been normalized as required by the XML spec.
815: ** The openEntityNames argument is a space-separated list of the names of the entities
816: ** that are open for the parse of this entity (including the name of the referenced
817: ** entity); this can be passed as the openEntityNames argument to
818: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
819: ** returns, so if the referenced entity is to be parsed later, it must be copied.
820: ** The handler should return 0 if processing should not continue because of
821: ** a fatal error in the handling of the external entity.
822: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
823: ** error.
824: ** Note that unlike other handlers the first argument is the parser, not userData.
825: */
826: PRIVATE int XML_externalEntityRef (XML_Parser parser,
827: const XML_Char * openEntityNames,
828: const XML_Char * base,
829: const XML_Char * systemId,
830: const XML_Char * publicId)
831: {
832: return 0;
833: }
834:
835: /*
836: ** This is called for an encoding that is unknown to the parser.
837: ** The encodingHandlerData argument is that which was passed as the
838: ** second argument to XML_SetUnknownEncodingHandler.
839: ** The name argument gives the name of the encoding as specified in
840: ** the encoding declaration.
841: ** If the callback can provide information about the encoding,
842: ** it must fill in the XML_Encoding structure, and return 1.
843: ** Otherwise it must return 0.
844: ** If info does not describe a suitable encoding,
845: ** then the parser will return an XML_UNKNOWN_ENCODING error.
846: */
847: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
848: const XML_Char * name,
849: XML_Encoding * info)
850: {
851: return 0;
852: }
853:
854: /* ------------------------------------------------------------------------- */
855: /* HTXML STREAM HANDLERS */
856: /* ------------------------------------------------------------------------- */
857:
858: PRIVATE void rdf_setHandlers (XML_Parser me)
859: {
860: XML_SetElementHandler(me, XML_startElement, XML_endElement);
861: XML_SetCharacterDataHandler(me, XML_characterData);
862: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
863: XML_SetDefaultHandler(me, XML_default);
864: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
865: XML_SetNotationDeclHandler(me, XML_notationDecl);
866: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
867: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
868: }
869:
870: PRIVATE void rdf_newInstance (HTStream * me,
871: HTRequest * request,
872: HTFormat target_format,
873: HTStream * target_stream,
874: XML_Parser xmlparser,
875: void * context)
876: {
877: if (me && xmlparser) {
878: rdf_setHandlers(xmlparser);
879: XML_SetUserData(xmlparser, context);
880:
881: /* Call the new RDF instance callback (if any) with this new stream */
882: if (RDFInstance)
883: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
884: }
885: }
886:
887: /* ------------------------------------------------------------------------- */
888: /* RDF PARSER */
889: /* ------------------------------------------------------------------------- */
890:
891: PRIVATE void visit_element_children (HTList *children)
892: {
893: HTElement *child = NULL;
894: HTList *cur = children;
895: while ((child = (HTElement *) HTList_nextObject(cur))) {
896: if (!HTList_isEmpty(child->m_children))
897: visit_element_children(child->m_children);
898: HTElement_delete(child);
899: }
900: }
901:
902: PRIVATE void delete_elements (HTRDF * me)
903: {
904: if (me && me->m_root) {
905: HTElement *r = me->m_root;
906: if (!HTList_isEmpty(r->m_children))
907: visit_element_children(r->m_children);
908: HTElement_delete(r);
909: }
910: }
911:
912: PUBLIC HTRDF * HTRDF_new (void)
913: {
914: HTRDF * me;
915: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
916: HT_OUTOFMEM("HTRDF_new");
917: me->m_namespaceStack = HTList_new();
918: me->m_elementStack = HTList_new();
919:
920: me->m_triples = HTList_new();
921: me->m_vAllNameSpaces = HTList_new();
922:
923: me->m_bCreateBags = FALSE;
924: me->m_bFetchSchemas = FALSE;
925:
926: me->m_parseTypeStack = HTList_new();
927: me->m_parseElementStack = HTList_new();
928:
929: me->m_vResources = HTList_new();
930: me->m_vResolveQueue = HTList_new();
931: me->m_hIDtable = HTHashtable_new(0);
932:
933: return me;
934: }
935:
936: PUBLIC BOOL HTRDF_delete (HTRDF * me)
937: {
938: if (me) {
939: delete_elements(me);
940: if (me->m_namespaceStack) {
941: HTList *cur = me->m_namespaceStack;
942: HTAssocList *alist = NULL;
943: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
944: HTAssocList_delete(alist);
945: }
946: HTList_delete(me->m_namespaceStack);
947: }
948: if (me->m_elementStack) HTList_delete(me->m_elementStack);
949: me->m_root = NULL;
950: if (me->m_triples) {
951: HTList *cur = me->m_triples;
952: HTTriple *t = NULL;
953: while ((t = (HTTriple *) HTList_nextObject(cur))) {
954: /*HTTriple_print(t);*/
955: HTTriple_delete(t);
956: }
957: HTList_delete(me->m_triples);
958: }
959: HT_FREE(me->m_sSource);
960: if (me->m_vAllNameSpaces) {
961: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 962: char * s = NULL;
963: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 964: HT_FREE(s);
965: }
966: HTList_delete(me->m_vAllNameSpaces);
967: }
968: if (me->m_parseTypeStack)
969: HTList_delete(me->m_parseTypeStack);
970: if (me->m_parseElementStack)
971: HTList_delete(me->m_parseElementStack);
972: if (me->m_vResources)
973: HTList_delete(me->m_vResources);
974: if (me->m_vResolveQueue)
975: HTList_delete(me->m_vResolveQueue);
976: if (me->m_hIDtable)
977: HTHashtable_delete(me->m_hIDtable);
978: HT_FREE(me->m_sLiteral);
979: HT_FREE(me);
980: return YES;
981: }
982: return NO;
983: }
984:
985: /*
986: * setSource method saves the name of the source document for
987: * later inspection if needed
988: */
2.3 frystyk 989: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 990: {
991: if (me && source) {
992: StrAllocCopy (me->m_sSource, source);
993: return YES;
994: }
995: return NO;
996: }
997:
998: /*
999: * Go through the m_vResolveQueue and assign
1000: * direct object reference for each symbolic reference
1001: */
1002: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
1003: {
1004: if (me) {
1005: HTList * cur = me->m_vResolveQueue;
1006: HTElement *e = NULL;
1007: HTElement *e2 = NULL;
1008: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1009: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1010: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1011: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1012: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1013: "aboutEachPrefix");
1014: if (sAbout) {
1015: if (sAbout[0]=='#')
1016: sAbout = &(sAbout[1]);
1017: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1018: if (e2)
1019: HTElement_addTarget(e, e2);
1020: else
1021: HTPrint("Unresolved internal reference %s\n", sAbout);
1022: }
1023: if (sResource) {
1024: if (sResource[0]=='#')
1025: sResource = &(sResource[1]);
1026: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1027: if (e2)
1028: HTElement_addTarget(e, e2);
1029: }
1030:
1031: if (sAboutEach) {
1032: sAboutEach = &(sAboutEach[1]);
1033: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1034: if (e2)
1035: HTElement_addTarget(e, e2);
1036: }
1037: if (sAboutEachPrefix) {
1038: HTList * curr = me->m_vResources;
1039: HTElement *ele = NULL;
1040: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1041: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1042: if (sA &&
1043: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1044: HTElement_addTarget(e, ele);
1045: }
1046: }
1047: }
1048: }
1049: HTList_delete(me->m_vResources);
1050: me->m_vResources = HTList_new();
1051: return YES;
1052: }
1053: return NO;
1054: }
1055:
1056: /**
1057: * Check if the element e is from the namespace
1058: * of the RDF schema by comparing only the beginning of
1059: * the expanded element name with the canonical RDFMS
1060: * URI
1061: */
1062: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1063: {
1064: return (me && e && e->m_sName) ?
1065: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1066: }
1067:
1068: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1069: {
1070: if (me && e && e->m_sName) {
1071: int len = strlen(e->m_sName);
1072: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1073: }
1074: return NO;
1075: }
1076:
1077: /**
1078: * Is the element a Description
1079: */
1080: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1081: {
1082: if (me && e && e->m_sName) {
1083: int len = strlen(e->m_sName);
1084: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1085: }
1086: return NO;
1087: }
1088:
1089: /*
1090: * Is the element a ListItem
1091: */
1092: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1093: {
1094: if (me && e && e->m_sName) {
1095: int len = strlen(e->m_sName);
1096: if (len > 2)
1097: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1098: }
1099: return NO;
1100: }
1101:
1102: /**
1103: * Is the element a Sequence
1104: */
1105: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1106: {
1107: if (me && e && e->m_sName) {
1108: int len = strlen(e->m_sName);
1109: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1110: }
1111: return NO;
1112: }
1113:
1114: /*
1115: * Is the element an Alternative
1116: */
1117: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1118: {
1119: if (me && e && e->m_sName) {
1120: int len = strlen(e->m_sName);
1121: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1122: }
1123: return NO;
1124: }
1125:
1126: /*
1127: * Is the element a Bag
1128: */
1129: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1130: {
1131: if (me && e && e->m_sName) {
1132: int len = strlen(e->m_sName);
1133: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1134: }
1135: return NO;
1136: }
1137:
1138: /**
1139: * Is the element a Container
1140: */
1141: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1142: {
1143: return (HTRDF_isSequence(me, e) ||
1144: HTRDF_isAlternative(me, e) ||
1145: HTRDF_isBag(me, e));
1146: }
1147:
1148: /*
1149: * This method matches all properties but those from RDF namespace
1150: */
1151: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1152: {
1153: if (me && e && e->m_sName) {
1154: int len = strlen(e->m_sName);
2.3 frystyk 1155: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1156: "value", "type", "Property", "Statement"};
1157: int i;
1158: if (HTRDF_isRDF(me, e)) {
1159: for(i = 0; i< 7; i++) {
1160: int ntp = strlen(tp[i]);
1161: if (len > ntp) {
1162: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1163: return YES;
1164: }
1165: }
1166: return NO;
1167: }
1168: if (len > 0) return YES;
1169: }
1170: return NO;
1171: }
1172:
2.3 frystyk 1173: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1174: int iCounter)
1175: {
1176: /*
1177: * Two different cases for
1178: * 1. LI element without content (resource available)
1179: * 2. LI element with content (resource unavailable)
1180: */
2.3 frystyk 1181: char * cName = NULL;
1182: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1183: char sdig[20];
1184: sprintf(sdig, "_%d", iCounter);
1185: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1186: if (sResource) {
1187: HTRDF_addTriple(me, cName, sID, sResource);
1188: /* validity checking */
1189: if (!HTList_isEmpty(listitem->m_children)){
1190: HTPrint("Listitem with resource attribute can not have child nodes");
1191: }
1192: StrAllocCopy(listitem->m_sID, sResource);
1193: } else {
1194: HTList *cur = listitem->m_children;
1195: HTElement *n = NULL;
1196: while ((n = (HTElement *) HTList_nextObject(cur))) {
1197: if (HTElement_instanceOfData(n)) {
1198: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1199: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1200: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1201: HTRDF_addTriple(me, cName, sID, sNodeID);
1202: StrAllocCopy(listitem->m_sID, sNodeID);
1203: } else if (HTRDF_isListItem(me, n)) {
1204: HTPrint("Can not nest list item inside list item\n");
1205: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1206: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1207: HTRDF_addTriple(me, cName, sID, n->m_sID);
1208: HT_FREE(c);
1209: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1210: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1211: HTRDF_addTriple(me, cName, sID, sNodeID);
1212: HT_FREE(sNodeID);
1213: }
1214: }
1215: }
1216: HT_FREE(cName);
1217: }
1218:
2.3 frystyk 1219: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1220: {
2.3 frystyk 1221: char * sID = NULL;
1222: char * tName = NULL;
1223: char * aName = NULL;
1224: char * sName = NULL;
1225: char * bName = NULL;
2.1 frystyk 1226: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1227: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1228: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1229: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1230:
1231: StrAllocCopy(sID, n->m_sID);
1232: if (!sID)
1233: sID = HTRDF_newReificationID(me);
1234: /*
1235: * Do the instantiation only once
1236: */
1237: if (!n->m_bDone) {
1238: if (HTRDF_isSequence(me, n)) {
1239: HTRDF_addTriple(me, tName, sID, sName);
1240: } else if (HTRDF_isAlternative(me, n)) {
1241: HTRDF_addTriple(me, tName, sID, aName);
1242: } else if (HTRDF_isBag(me, n)) {
1243: HTRDF_addTriple(me, tName, sID, bName);
1244: }
1245: n->m_bDone = YES;
1246: }
1247: HTRDF_expandAttributes(me, n, n);
1248:
1249: {
1250: HTList *cur = n->m_children;
1251: HTElement *n2 = NULL;
1252: int iCounter = 1;
1253: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1254: HTPrint("An RDF:Alt container must have at least one list item\n");
1255: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1256: if (HTRDF_isListItem(me, n2)) {
1257: HTRDF_processListItem(me, sID, n2, iCounter);
1258: iCounter++;
1259: } else {
1260: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1261: }
1262: }
1263: } /* end of block */
1264:
1265: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1266:
1267: return sID;
1268: }
1269: /*
1270: * Manage the typedNode production in the RDF grammar.
1271: *
1272: */
2.3 frystyk 1273: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1274: {
2.3 frystyk 1275: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1276: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1277: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1278: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1279: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1280: "aboutEachPrefix");*/
2.3 frystyk 1281: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1282: char * iName = NULL;
1283: char * bName = NULL;
1284: char * tName = NULL;
2.1 frystyk 1285:
2.3 frystyk 1286: char * sObject = NULL;
2.1 frystyk 1287:
1288: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1289: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1290: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1291:
1292: if (resource)
1293: HTPrint("resource attribute not allowed for a typedNode %s\n",
1294: typedNode->m_sName);
1295:
1296: /*
1297: * We are going to manage this typedNode using the processDescription
1298: * routine later on. Before that, place all properties encoded as
1299: * attributes to separate child nodes.
1300: */
1301: {
1302: HTAssoc * assoc;
1303: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1304: char * sAttribute = NULL;
1305: char * tValue = NULL;
1306: char * sValue = NULL;
2.1 frystyk 1307: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1308: sAttribute = HTAssoc_name(assoc);
1309: sValue = HTAssoc_value(assoc);
1310: tValue = trim(sValue);
1311: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1312: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1313: if (strlen(tValue) > 0) {
1314: HTAssocList *newAL = HTAssocList_new();
1315: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1316: HTElement *d = NULL;
1317: HTElement_addAttribute(newPredicate, iName,
1318: sAbout ? sAbout : sID);
1319: HTElement_addAttribute(newPredicate, bName, sBagID);
1320: d = HTElement_new2(tValue);
1321: HTElement_addChild(newPredicate, d);
1322: HTElement_addChild(typedNode, newPredicate);
1323: HTElement_removeAttribute(typedNode, sAttribute);
1324: }
1325: }
1326: HT_FREE(tValue);
1327: } /* end of while */
1328: }/* end of block */
1329: {
1330: if (sAbout)
1331: StrAllocCopy(sObject, sAbout);
1332: else if (sID)
1333: StrAllocCopy(sObject, sID);
1334: else
1335: sObject = HTRDF_newReificationID(me);
1336: StrAllocCopy(typedNode->m_sID, sObject);
1337:
1338: /* special case: should the typedNode have aboutEach attribute,
1339: ** the type predicate should distribute to pointed
1340: ** collection also -> create a child node to the typedNode
1341: */
1342: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1343: HTAssocList *newAL = HTAssocList_new();
1344: HTElement *newPredicate = HTElement_new(tName, newAL);
1345: HTElement *d = HTElement_new2(typedNode->m_sName);
1346: HTElement_addChild(newPredicate, d);
1347: HTElement_addChild(typedNode, newPredicate);
1348: } else {
1349: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1350: }
1351: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1352: }/* end of block */
1353:
1354: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1355:
1356: return sObject;
1357: }
1358:
1359: /*
1360: * Start processing an RDF/XML document instance from the
1361: * root element rdf.
1362: *
1363: */
1364: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1365: {
1366: if (me && e) {
1367: HTList *cur = e->m_children;
1368: HTElement *ele = NULL;
1369: if (HTList_isEmpty(e->m_children)) {
1370: HTPrint("Empty RDF Element\n");
1371: return NO;
1372: }
1373: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1374: if (HTRDF_isDescription(me, ele)) {
1375: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1376: me->m_bCreateBags);
1377: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1378: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1379: HT_FREE(c);
1380: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1381: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1382: HT_FREE(t);
1383: }
1384: }
1385: return YES;
1386: }
1387: return NO;
1388: }
1389:
1390: /*
1391: * processPredicate handles all elements not defined as special
1392: * RDF elements.
1393: *
1394: * predicate The predicate element itself
1395: * description Context for the predicate
1396: * sTarget The target resource
1397: * reificate Should this predicate be reificated
1398: *
1399: * return the new ID which can be used to identify the predicate
1400: *
1401: */
2.3 frystyk 1402: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1403: HTElement * predicate,
1404: HTElement * description,
2.3 frystyk 1405: char * sTarget,
2.1 frystyk 1406: BOOL reificate)
1407: {
2.3 frystyk 1408: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1409: char * nsStatementID = NULL;
1410: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1411: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1412:
1413: /*
1414: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1415: ** or xmlns... -> generate new triples according to the spec.
1416: ** (See end of Section 6)
1417: */
1418: {
1419: HTElement * place_holder = NULL;
1420: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1421: char * fName = NULL;
1422: char * aName = NULL;
2.1 frystyk 1423:
1424: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1425: place_holder = HTElement_new(fName, newAL);
1426: HT_FREE(fName);
1427:
1428: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1429:
1430: /* error checking */
1431: if (!HTList_isEmpty(predicate->m_children)) {
1432: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1433: HTElement_delete(place_holder);
1434: return NULL;
1435: }
1436: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1437:
1438: /* determine the 'about' part for the new statements */
1439: if (sStatementID) {
1440: HTElement *data = HTElement_new2(sStatementID);
1441: HTElement_addAttribute(place_holder, aName, sStatementID);
1442:
1443: /* hack: make rdf:ID the value of the predicate */
1444: HTElement_addChild(predicate, data);
1445: } else if (sResource) {
1446: HTElement_addAttribute(place_holder, aName, sResource);
1447: } else {
1448: nsStatementID = HTRDF_newReificationID(me);
1449: HTElement_addAttribute(place_holder, aName, nsStatementID);
1450: HT_FREE(nsStatementID);
1451: }
1452: HT_FREE(aName);
1453:
1454: if (sBagID) {
1455: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1456: HTElement_addAttribute(place_holder, fName, sBagID);
1457: HT_FREE(fName);
1458: StrAllocCopy(place_holder->m_sBagID, sBagID);
1459: }
1460: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1461: } else {
1462:
1463: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1464: HTElement_delete(place_holder);
1465: }
1466: }
1467:
1468: /*
1469: ** Tricky part: if the resource attribute is present for a predicate
1470: ** AND there are no children, the value of the predicate is either
1471: ** 1. the URI in the resource attribute OR
1472: ** 2. the node ID of the resolved #resource attribute
1473: */
1474: if (sResource && HTList_isEmpty(predicate->m_children)) {
1475: if (!HTElement_target(predicate)) {
1476: if (reificate) {
1477: HT_FREE(nsStatementID);
1478: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1479: sTarget, sResource,
1480: predicate->m_sID);
1481: StrAllocCopy(predicate->m_sID, nsStatementID);
1482: } else {
1483: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1484: }
1485: } else {
1486: HTElement *target = HTElement_target(predicate);
1487: if (reificate) {
1488: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1489: sTarget,
1490: target->m_sID,
1491: predicate->m_sID);
1492: StrAllocCopy(predicate->m_sID, nsStatementID);
1493: } else {
1494: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1495: }
1496: }
1497: StrAllocCopy(nsStatementID, predicate->m_sID);
1498: return nsStatementID;
1499: }
1500:
1501: /*
1502: ** Does this predicate make a reference somewhere using the
1503: ** sResource attribute
1504: */
1505: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1506: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1507: HTElement_target(predicate),
1508: YES, NO, NO);
1509: if (reificate) {
1510: HT_FREE(nsStatementID);
1511: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1512: sTarget, dStatementID,
1513: predicate->m_sID);
1514: StrAllocCopy(predicate->m_sID, nsStatementID);
1515: } else {
1516: StrAllocCopy(nsStatementID, dStatementID);
1517: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1518: }
1519: return nsStatementID;
1520: }
1521:
1522: /*
1523: ** Before looping through the children, let's check
1524: ** if there are any. If not, the value of the predicate is
1525: ** an anonymous node
1526: */
1527: {
1528: HTList *cur = predicate->m_children;
1529: BOOL bUsedTypedNodeProduction = NO;
1530: HTElement *n2;
1531: StrAllocCopy(nsStatementID, sStatementID);
1532: if (HTList_isEmpty(cur)) {
1533: if (reificate) {
2.3 frystyk 1534: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1535: HT_FREE(nsStatementID);
1536: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1537: sTarget, nr,
1538: predicate->m_sID);
1539: HT_FREE(nr);
1540: } else {
2.3 frystyk 1541: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1542: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1543: HT_FREE(nr);
1544: }
1545: }
1546: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1547: if (HTRDF_isDescription(me, n2)) {
1548: HTElement *d2 = n2;
2.3 frystyk 1549: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.1 frystyk 1550: StrAllocCopy(d2->m_sID, dStatementID);
1551:
1552: if (reificate) {
1553: HT_FREE(nsStatementID);
1554: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1555: sTarget, dStatementID,
1556: predicate->m_sID);
1557: } else {
1558: StrAllocCopy(nsStatementID, dStatementID);
1559: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1560: nsStatementID);
1561: }
1562: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1563: char * tValue = NULL;
1564: char * sValue = n2->m_sContent;
2.1 frystyk 1565: /* we've got real data */
1566: /*
1567: * Only if the content is not empty PCDATA (whitespace that is)
1568: * print the triple
1569: */
1570: tValue = trim(sValue);
1571: if (tValue && strlen(tValue) > 0) {
1572: if (reificate) {
1573: HT_FREE(nsStatementID);
1574: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1575: sTarget, tValue,
1576: predicate->m_sID);
1577: StrAllocCopy(predicate->m_sID, nsStatementID);
1578: } else {
1579: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1580: }
1581: }
1582: HT_FREE(tValue);
1583: } else if (HTRDF_isContainer(me, n2)) {
1584: HTElement *target = HTElement_target(description);
2.3 frystyk 1585: char * aboutTarget =
2.1 frystyk 1586: target ?
1587: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1588: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1589: StrAllocCopy(nsStatementID, sCollectionID);
1590: /* Attach the collection to the current predicate */
1591: if (target) {
1592: if (reificate) {
1593: HT_FREE(nsStatementID);
1594: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1595: aboutTarget,
1596: sCollectionID,
1597: predicate->m_sID);
1598: StrAllocCopy(predicate->m_sID, nsStatementID);
1599: } else {
1600: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1601: sCollectionID);
1602: }
1603: } else {
1604: if (reificate) {
1605: HT_FREE(nsStatementID);
1606: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1607: sTarget, sCollectionID,
1608: predicate->m_sID);
1609: StrAllocCopy(predicate->m_sID, nsStatementID);
1610: } else {
1611: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1612: sCollectionID);
1613: }
1614: }
1615: HT_FREE(sCollectionID);
1616: } else if (HTRDF_isTypedPredicate(me, n2)) {
1617: if (bUsedTypedNodeProduction) {
1618: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1619: } else {
1620: bUsedTypedNodeProduction = YES;
1621: }
1622: HT_FREE(nsStatementID);
1623: nsStatementID = HTRDF_processTypedNode(me, n2);
1624: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1625: }
1626: }
1627: return nsStatementID;
1628: } /* end of block */
1629: return NULL;
1630: }
1631:
1632: /*
1633: * processDescription manages Description elements
1634: *
1635: * description The Description element itself
1636: * inPredicate Is this is a nested description
1637: * reificate Do we need to reificate
1638: * createBag Do we create a bag container
1639: *
1640: * return An ID for the description
1641: *
1642: */
2.3 frystyk 1643: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1644: HTElement * description,
1645: BOOL inPredicate,
1646: BOOL reificate,
1647: BOOL createBag)
1648: {
1649: int iChildCount = 1;
1650: BOOL bOnce = YES;
1651:
2.3 frystyk 1652: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1653: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1654: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1655: "aboutEachPrefix");
2.3 frystyk 1656: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1657: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1658: HTElement *target = HTElement_target(description);
1659: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1660: BOOL targetIsContainer = NO;
2.3 frystyk 1661: char * sTargetAbout = NULL;
1662: char * sTargetBagID = NULL;
1663: char * sTargetID = NULL;
1664: char * dName = NULL;
1665: char * aName = NULL;
2.1 frystyk 1666:
1667: /*
1668: ** Return immediately if the description has already been managed
1669: */
1670: if (description->m_bDone) return description->m_sID;
1671:
1672: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1673: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1674:
1675: /*
1676: ** Determine what the target of the Description reference is
1677: */
1678: if (hasTarget) {
2.3 frystyk 1679: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1680: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1681: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1682: if (me->m_sSource && sTargetID2) {
1683: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1684: } else {
1685: StrAllocCopy(sTargetID, sTargetID2);
1686: }
1687: /*
1688: * Target is collection if
1689: * 1. it is identified with bagID attribute
1690: * 2. it is identified with ID attribute and is a collection
1691: */
1692: if (sTargetBagID && sAbout) {
1693: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1694: } else {
1695: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1696: HTRDF_isContainer(me, target))
1697: targetIsContainer = YES;
1698: }
1699: HT_FREE(sTargetID);
1700: }
1701:
1702: /*
1703: * Check if there are properties encoded using the abbreviated
1704: * syntax
1705: */
1706: HTRDF_expandAttributes(me, description, description);
1707:
1708: /*
1709: * Manage the aboutEach attribute here
1710: */
1711: if (sAboutEach && hasTarget) {
1712: if (HTRDF_isContainer(me, target)) {
1713: HTList *cur = target->m_children;
1714: HTElement *ele = NULL;
1715: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1716: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1717: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1718: if (sResource) {
1719: HTElement * newDescription = NULL;
1720: HTElement * ele2;
1721: HTList * cur2 = description->m_children;
1722:
1723: /*
1724: * Manage <li resource="..." /> case
1725: */
1726: if (sResource) {
1727: HTAssocList *newAL = HTAssocList_new();
1728: newDescription = HTElement_new(dName, newAL);
1729: HTElement_addAttribute(newDescription, aName, sResource);
1730: }
1731:
1732: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1733: if (newDescription) HTElement_addChild(newDescription, ele2);
1734: }
1735:
1736: if (newDescription)
1737: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1738:
1739: /* Not needed anymore */
1740: HTElement_delete(newDescription);
1741:
1742: } else {
1743: /**
1744: * Otherwise we have a structured value inside <li>
1745: *
1746: * loop through the children of <li>
1747: * (can be only one)
1748: */
1749: HTList *cur2 = ele->m_children;
1750: HTElement *ele2 = NULL;
1751: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1752: HTAssocList *newAL = HTAssocList_new();
1753: HTElement *newNode = HTElement_new(dName, newAL);
1754: HTList *cur3 = description->m_children;
1755: HTElement *ele3 = NULL;
1756: /* loop through the items in the
1757: * description with aboutEach
1758: * and add them to the target
1759: */
1760: while ((ele3 = (HTElement *)
1761: HTList_nextObject(cur3))) {
1762: HTElement_addChild(newNode, ele3);
1763: }
1764: HTElement_addTarget(newNode, ele2);
1765: HTRDF_processDescription(me, newNode, YES, NO, NO);
1766: }
1767: }
1768: } else if (HTRDF_isTypedPredicate(me, ele)) {
1769: HTAssocList *newAL = HTAssocList_new();
1770: HTElement *newNode = HTElement_new(dName, newAL);
1771: HTList *cur2 = description->m_children;
1772: HTElement *ele2 = NULL;
1773: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1774: HTElement_addChild(newNode, ele2);
1775: }
1776: HTElement_addTarget(newNode, ele);
1777: HTRDF_processDescription(me, newNode, YES, NO, NO);
1778: }
1779: } /* end of while */
1780: } else if (HTRDF_isDescription(me, target)) {
1781: HTList *cur = target->m_children;
1782: HTElement *ele = NULL;
1783: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1784: HTAssocList *newAL = HTAssocList_new();
1785: HTElement *newNode = HTElement_new(dName, newAL);
1786: HTList *cur2 = description->m_children;
1787: HTElement *ele2 = NULL;
1788: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1789: HTElement_addChild(newNode, ele2);
1790: }
1791: HTElement_addTarget(newNode, ele);
1792: HTRDF_processDescription(me, newNode, YES, NO, NO);
1793: } /* end of while */
1794: }
1795:
1796: HT_FREE(dName);
1797: HT_FREE(aName);
1798: return NULL;
1799: }
1800:
1801: /*
1802: * Manage the aboutEachPrefix attribute here
1803: */
1804: if (sAboutEachPrefix) {
1805: if (hasTarget) {
1806: HTList *cur = description->m_vTargets;
1807: HTElement *target = NULL;
1808: while ((target = (HTElement *) HTList_nextObject(cur))) {
1809: HTList *cur2 = description->m_children;
1810: HTElement *ele2 = NULL;
1811: HTElement *newDescription = NULL;
1812: HTAssocList *newAL = HTAssocList_new();
1813: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1814: newDescription = HTElement_new(dName, newAL);
1815: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1816: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1817: HTElement_addChild(newDescription, ele2);
1818: }
1819: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1820: }
1821: }
1822:
1823: HT_FREE(dName);
1824: HT_FREE(aName);
1825: return NULL;
1826: }
1827: /*
1828: * Enumerate through the children
1829: */
1830: {
1831: HTList *cur = description->m_children;
1832: HTElement *n = NULL;
1833: while ((n = (HTElement *) HTList_nextObject(cur))) {
1834: if (HTRDF_isDescription(me, n))
1835: HTPrint("Can not nest Description inside Description\n");
1836: else if (HTRDF_isListItem(me, n))
1837: HTPrint("Can not nest List Item inside Description\n");
1838: else if (HTRDF_isContainer(me, n))
1839: HTPrint("Can not nest Container inside Description\n");
1840: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1841: char * sChildID = NULL;
2.1 frystyk 1842: if (hasTarget && targetIsContainer) {
1843: sChildID = HTRDF_processPredicate(me, n, description,
1844: target->m_sBagID ?
1845: target->m_sBagID :
1846: target->m_sID, NO);
1847: StrAllocCopy(description->m_sID, sChildID);
1848: createBag = NO;
1849: } else if (hasTarget) {
1850: sChildID = HTRDF_processPredicate(me, n, description,
1851: target->m_sBagID ?
1852: target->m_sBagID :
1853: target->m_sID, reificate);
1854: StrAllocCopy(description->m_sID, sChildID);
1855: } else if (!hasTarget && !inPredicate) {
1856: if (!description->m_sID) {
2.3 frystyk 1857: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1858: StrAllocCopy(description->m_sID, nr);
1859: HT_FREE(nr);
1860: }
1861: if (!sAbout) {
1862: if (sID)
1863: sAbout = sID;
1864: else
1865: sAbout = description->m_sID;
1866: }
1867: sChildID = HTRDF_processPredicate(me, n, description,
1868: sAbout, sBagid ?
1869: YES : reificate);
1870:
1871: } else if (!hasTarget && inPredicate) {
1872: if (!sAbout) {
1873: if (sID) {
1874: StrAllocCopy(description->m_sID, sID);
1875: sAbout = sID;
1876: } else {
1877: if (!description->m_sID) {
2.3 frystyk 1878: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1879: StrAllocCopy(description->m_sID, nr);
1880: HT_FREE(nr);
1881: }
1882: sAbout = description->m_sID;
1883: }
1884: } else {
1885: StrAllocCopy(description->m_sID, sAbout);
1886: }
1887: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1888: }
1889: /*
1890: * Each Description block creates also a Bag node which
1891: * has links to all properties within the block IF
1892: * the m_bCreateBags variable is true
1893: */
1894: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1895: char * sNamespace = RDFMS;
2.1 frystyk 1896: if (bOnce && sChildID) {
2.3 frystyk 1897: char * tName = NULL;
1898: char * bName = NULL;
2.1 frystyk 1899: bOnce = NO;
1900: if (!description->m_sBagID) {
2.3 frystyk 1901: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1902: StrAllocCopy(description->m_sBagID, nr);
1903: HT_FREE(nr);
1904: }
1905: if (!description->m_sID)
1906: StrAllocCopy(description->m_sID,
1907: description->m_sBagID);
1908: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1909: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1910: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1911: HT_FREE(tName);
1912: HT_FREE(bName);
1913:
1914: }
1915: if (sChildID) {
2.3 frystyk 1916: char * tName = NULL;
2.1 frystyk 1917: char si[20];
1918: sprintf(si, "%d", iChildCount);
1919: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1920: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1921: iChildCount++;
1922: HT_FREE(tName);
1923: }
1924: }
1925: HT_FREE(sChildID);
1926: }
1927: }
1928: } /* end of block*/
1929:
1930: description->m_bDone = YES;
1931:
1932: HT_FREE(dName);
1933: HT_FREE(aName);
1934: return (description->m_sID);
1935: }
1936:
1937: /*
1938: * Given an XML document (well-formed HTML, for example),
1939: * look for a suitable element to start parsing from
1940: *
1941: */
1942: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1943: {
1944: if (me && ele) {
1945: if (HTRDF_isRDF(me, ele)) {
1946: if (HTRDF_isRDFroot(me, ele)) {
1947: HTRDF_processRDF(me, ele);
1948: } else if (HTRDF_isDescription(me, ele)) {
1949: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1950: me->m_bCreateBags);
1951: }
1952: } else {
1953: HTList *cur = ele->m_children;
1954: HTElement *child = NULL;
1955: while ((child = (HTElement *) HTList_nextObject(cur))) {
1956: HTRDF_processXML(me, child);
1957: }
1958: }
1959:
1960: /* MISSING RECURSION */
1961:
1962: return YES;
1963: }
1964: return NO;
1965: }
1966:
1967: /*
1968: * Return the root element pointer. This requires the parsing
1969: * has been already done.
1970: */
1971: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1972: {
1973: return me ? me->m_root : NULL;
1974: }
1975:
1976: /*
1977: * Return the full namespace URI for a given prefix sPrefix.
1978: * The default namespace is identified with xmlns prefix.
1979: * The namespace of xmlns attribute is an empty string.
1980: */
1981:
2.3 frystyk 1982: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 1983: {
2.3 frystyk 1984: char * nPrefix = NULL;
2.1 frystyk 1985: HTAssocList * calist;
1986: HTList * cur = me->m_namespaceStack;
1987:
1988: if (!sPrefix)
1989: StrAllocCopy(nPrefix, "xmlns");
1990:
1991: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 1992: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 1993: if (sValue) {
1994: StrAllocCopy(nPrefix, sValue);
1995: return nPrefix;
1996: }
1997: }
1998: /*
1999: * Give error only if
2000: * 1. the prefix is not from the reserved xml namespace
2001: * 2. the prefix is not xmlns which is to look for the default
2002: * namespace
2003: */
2004: if (!strcmp(sPrefix, XMLSCHEMA)) {
2005: StrAllocCopy(nPrefix, sPrefix);
2006: return nPrefix;
2007: } else if (!strcmp(sPrefix, "xmlns")) {
2008: StrAllocCopy(nPrefix, "");
2009: return nPrefix;
2010: } else
2011: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2012:
2013: StrAllocCopy(nPrefix, "");
2014: return nPrefix;
2015: }
2016:
2017: /*
2018: * Methods to determine whether we are parsing
2019: * parseType="Literal" or parseType="Resource"
2020: */
2021:
2022: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2023: {
2024: HTElement *e = NULL;
2025: HTList *cur = me->m_elementStack;
2026: if (!HTList_isEmpty(me->m_elementStack)) {
2027: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2028: char * sParseType = NULL;
2.1 frystyk 2029: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2030: if (sParseType) {
2031: if (strcmp(sParseType, "Resource"))
2032: return YES;
2033: }
2034: }
2035: }
2036: return NO;
2037: }
2038:
2039: /*
2040: * Methods to determine whether we are parsing
2041: * parseType="Literal" or parseType="Resource"
2042: */
2043:
2044: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2045: {
2046: HTElement *e = NULL;
2047: HTList *cur = me->m_elementStack;
2048: if (!HTList_isEmpty(me->m_elementStack)) {
2049: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2050: char * sParseType = NULL;
2.1 frystyk 2051: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2052: if (sParseType) {
2053: if (!strcmp(sParseType, "Resource"))
2054: return YES;
2055: }
2056: }
2057: }
2058: return NO;
2059: }
2060: /*
2061: * checkAttributes goes through the attributes of element e<
2062: * to see
2063: * 1. if there are symbolic references to other nodes in the data model.
2064: * in which case they must be stored for later resolving with
2065: * resolveLater method.
2066: * 2. if there is an identity attribute, it is registered using
2067: * registerResource or registerID method.
2068: *
2069: */
2070:
2071: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2072: {
2073: {
2.3 frystyk 2074: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2075:
2076: if (sResource && sResource[0] == '#')
2077: HTRDF_resolveLater(me, e);
2078: }
2079: {
2.3 frystyk 2080: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2081:
2082: if (sAboutEach && sAboutEach[0] == '#')
2083: HTRDF_resolveLater(me, e);
2084: }
2085: {
2.3 frystyk 2086: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2087: "aboutEachPrefix");
2088:
2089: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2090: HTRDF_resolveLater(me, e);
2091: }
2092: {
2.3 frystyk 2093: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2094: if (sAbout) {
2095: if (sAbout[0] == '#')
2096: HTRDF_resolveLater(me, e);
2097: else
2098: HTRDF_registerResource(me, e);
2099: }
2100: }
2101:
2102: {
2.3 frystyk 2103: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2104:
2105: if (sBagID) {
2106: HTRDF_registerID(me, sBagID, e);
2107: StrAllocCopy(e->m_sBagID, sBagID);
2108: }
2109: }
2110: {
2.3 frystyk 2111: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2112: if (sID) {
2113: HTRDF_registerID(me, sID, e);
2114: StrAllocCopy(e->m_sID, sID);
2115: }
2116: }
2117: }
2118: /*
2119: * Add the element e to the m_vResolveQueue
2120: * to be resolved later.
2121: */
2122: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2123: {
2124: HTList_addObject(me->m_vResolveQueue, e);
2125: }
2126: /*
2127: * Add an element e to the Hashtable m_hIDtable
2128: * which stores all nodes with an ID
2129: */
2130:
2.3 frystyk 2131: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2132: {
2133: if (HTHashtable_object(me->m_hIDtable, sID))
2134: HTPrint("Node ID %s redefined", sID);
2135: HTHashtable_addObject(me->m_hIDtable, sID, e);
2136: }
2137: /*
2138: * Add an element e to the Vector m_vResources
2139: * which stores all nodes with an URI
2140: */
2141: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2142: {
2143: HTList_addObject(me->m_vResources, e);
2144: }
2145:
2146: /*
2147: * Look for a node by name sID from the Hashtable
2148: * m_hIDtable of all registered IDs.
2149: */
2150:
2.3 frystyk 2151: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2152: {
2153: if (sID)
2154: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2155: return NULL;
2156: }
2157:
2158: /*
2159: ** Special method to deal with rdf:resource attribute
2160: */
2.3 frystyk 2161: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2162: {
2.3 frystyk 2163: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2164: if (sResource != NULL && sResource[0] == '\0')
2165: sResource = me->m_sSource;
2166: return sResource;
2167: }
2168:
2169: /*
2170: ** Take an element ele with its parent element parent
2171: ** and evaluate all its attributes to see if they are non-RDF specific
2172: ** and non-XML specific in which case they must become children of
2173: ** the ele node.
2174: */
2175: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2176: {
2177: BOOL foundAbbreviation = NO;
2.3 frystyk 2178: char * sAttribute = NULL;
2179: char * sValue = NULL;
2.1 frystyk 2180: HTAssoc * assoc;
2181: HTAssocList * cur = ele->m_attributes;
2182: int lxmlschema = strlen(XMLSCHEMA);
2183: int lrdfms = strlen(RDFMS);
2184:
2185: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2186: int latt;
2187: sAttribute = HTAssoc_name(assoc);
2188: sValue = HTAssoc_value(assoc);
2.4 barstow 2189:
2.1 frystyk 2190: latt = strlen(sAttribute);
2191: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2192: continue;
2193:
2194: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2195: (sAttribute[lrdfms]!='_') &&
2196: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2197: strcmp(&(sAttribute[latt-4]), "type"))
2198: continue;
2199:
2200: if (strlen(sValue) > 0) {
2201: HTAssocList * newAL = HTAssocList_new();
2202: HTElement * newElement = HTElement_new(sAttribute, newAL);
2203: HTElement * newData = HTElement_new2(sValue);
2204: HTElement_addChild(newElement, newData);
2205: HTElement_addChild(parent, newElement);
2206: foundAbbreviation = YES;
2207: }
2208: }
2209: return foundAbbreviation;
2210: }
2211:
2212: /**
2213: * Create a new reification ID by using a name part and an
2214: * incremental counter m_iReificationCounter.
2215: */
2.3 frystyk 2216: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2217: {
2.3 frystyk 2218: char * nsid = NULL;
2.1 frystyk 2219: char nsrc[20];
2220: me->m_iReificationCounter++;
2221: sprintf(nsrc, "%d", me->m_iReificationCounter);
2222: if (!me->m_sSource) {
2223: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2224: } else {
2225: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2226: }
2227: return nsid;
2228: }
2229:
2230: /*
2231: * reificate creates one new node and four new triples
2232: * and returns the ID of the new node
2233: */
2234:
2.3 frystyk 2235: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2236: char * sObject, char * sNodeID)
2.1 frystyk 2237: {
2.3 frystyk 2238: char * sName = NULL;
2239: char * pName = NULL;
2240: char * oName = NULL;
2241: char * tName = NULL;
2242: char * stName = NULL;
2243: char * tNodeID = NULL;
2.1 frystyk 2244:
2245: if (!sNodeID)
2246: tNodeID = HTRDF_newReificationID(me);
2247: else
2248: StrAllocCopy(tNodeID, sNodeID);
2249:
2250: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2251: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2252: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2253: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2254: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2255:
2256: /*
2257: * The original statement must remain in the data model
2258: */
2259: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2260:
2261: /*
2262: * Do not reificate reificated properties
2263: */
2264: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2265: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2266:
2267: /* Reificate by creating 4 new triples */
2268: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2269: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2270: HTRDF_addTriple(me, oName, tNodeID, sObject);
2271: HTRDF_addTriple(me, tName, tNodeID, stName);
2272: } else
2273: HT_FREE(tNodeID);
2274:
2275: HT_FREE(sName);
2276: HT_FREE(pName);
2277: HT_FREE(oName);
2278: HT_FREE(tName);
2279: HT_FREE(stName);
2280:
2281: return tNodeID;
2282: }
2283: /*
2284: * Create a new triple and add it to the m_triples List
2285: * Send the triple to the Output stream
2286: */
2287:
2.3 frystyk 2288: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2289: char * sObject)
2.1 frystyk 2290: {
2291: HTTriple *t = NULL;
2292:
2293: /*
2294: * If there is no subject (about=""), then use the URI/filename where
2295: * the RDF description came from
2296: */
2297: if (!sPredicate || !sSubject || !sObject) {
2298: HTPrint("Predicate %s when subject %s and object %s \n",
2299: sPredicate ? sPredicate : "null",
2300: sSubject ? sSubject : "null",
2301: sObject ? sObject : "null");
2302: return;
2303: }
2304:
2305: if (sSubject[0]=='\0')
2306: sSubject = me->m_sSource;
2307:
2308: t = HTTriple_new(sPredicate, sSubject, sObject);
2309:
2310: /* Call the triple callback handler (if any) with this new triple */
2311: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2312:
2313: HTList_addObject(me->m_triples, t);
2314: }
2315:
2316: /*
2317: * createBags method allows one to determine whether SiRPAC
2318: * produces Bag instances for each Description block.
2319: * The default setting is not to generate them.
2320: */
2321:
2322: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2323: {
2324: if (me)
2325: me->m_bCreateBags = b;
2326: }
2327:
2328: /*
2329: Set output stream for RDF parser
2330: */
2331:
2332: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2333: {
2334: if (me)
2335: me->ostream = ostream;
2336: }
2337:
2338: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2339: {
2340: if (me) {
2341: me->newTripleInstance = cbf;
2342: me->tripleContext = context;
2343: return YES;
2344: }
2345: return NO;
2346: }
2347:
2348: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2349: {
2350: RDFInstance = me;
2351: RDFInstanceContext = context;
2352: return YES;
2353: }
2354:
2355: /* ------------------------------------------------------------------------- */
2356: /* HTRDFTriples STREAM HANDLERS */
2357: /* ------------------------------------------------------------------------- */
2358:
2359: PRIVATE int generate_triples(HTStream *me)
2360: {
2361: HTRDF *rdfp = me ? me->rdfparser : NULL;
2362: if (rdfp) {
2363:
2364: HTRDF_resolve(rdfp);
2365:
2366: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2367:
2368: return HT_OK;
2369: }
2370: return HT_ERROR;
2371: }
2372:
2373: PRIVATE int HTRDFTriples_flush (HTStream * me)
2374: {
2375: if (me->target)
2376: return (*me->target->isa->flush)(me->target);
2377: return HT_OK;
2378: }
2379:
2380: PRIVATE int HTRDFTriples_free (HTStream * me)
2381: {
2382: int status = HT_OK;
2383:
2384: status = generate_triples(me);
2385:
2386: HTRDF_delete(me->rdfparser);
2387:
2388: if (me->target) {
2389: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2390: return HT_WOULD_BLOCK;
2391: }
2392: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2393: HT_FREE(me);
2394: return status;
2395: }
2396:
2397: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2398: {
2399: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2400: HTRDF_delete(me->rdfparser);
2401: if (me->target)
2402: (*me->target->isa->abort)(me->target, NULL);
2403: HT_FREE(me);
2404: return HT_ERROR;
2405: }
2406:
2407: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2408: {
2409: return HT_OK;
2410: }
2411:
2412: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2413: {
2414: return HTRDFTriples_write(me, &c, 1);
2415: }
2416:
2417: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2418: {
2419: return HTRDFTriples_write(me, s, (int) strlen(s));
2420: }
2421:
2422: PRIVATE const HTStreamClass HTRDFTriplesClass =
2423: {
2424: "rdf",
2425: HTRDFTriples_flush,
2426: HTRDFTriples_free,
2427: HTRDFTriples_abort,
2428: HTRDFTriples_putCharacter,
2429: HTRDFTriples_putString,
2430: HTRDFTriples_write
2431: };
2432:
2433: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2434: void * param,
2435: HTFormat input_format,
2436: HTFormat output_format,
2437: HTStream * output_stream)
2438: {
2439: HTStream * me = NULL;
2440: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2441: HT_OUTOFMEM("HTRDFTriples_new");
2442: me->isa = &HTRDFTriplesClass;
2443: me->state = HT_OK;
2444: me->request = request;
2445: me->target = output_stream ? output_stream : HTErrorStream();
2446:
2447: /* Now create the RDF parser instance */
2448: if ((me->rdfparser = HTRDF_new()) == NULL) {
2449: HT_FREE(me);
2450: return HTErrorStream();
2451: }
2452:
2453: /* Set the source (I guess mostly to follow SiRPAC API) */
2454: {
2455: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2456: HTRDF_setSource(me->rdfparser, uri);
2457: HT_FREE(uri);
2458: }
2459:
2460: /* Where are we putting data? */
2461: HTRDF_setOutputStream(me->rdfparser, me);
2462:
2463: /* If you want to create Bags, change it to YES */
2464: HTRDF_createBags(me->rdfparser, NO);
2465:
2466: /* Register our new XML Instance handler */
2467: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2468: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2469:
2470: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2471:
2472: return me;
2473: }
2474:
2475: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2476: void * param,
2477: HTFormat input_format,
2478: HTFormat output_format,
2479: HTStream * output_stream)
2480: {
2481: return HTXML_new(request, param, input_format, output_format,
2482: RDFParser_new(request, param, input_format, output_format, output_stream));
2483: }
2484:
2485: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2486: {
2487: if (rdfp && t) {
2488: HTStream *ostream = rdfp->ostream;
2489: if (ostream) {
2490: PUTC(ostream,'(');
2491: PUTS(ostream, t->m_sPredicate);
2492: PUTC(ostream,',');
2493: PUTS(ostream, t->m_sSubject);
2494: PUTC(ostream,',');
2495: PUTS(ostream, t->m_sObject);
2496: PUTC(ostream,')');
2497: PUTC(ostream,'\n');
2498: }
2499: }
2500: }
2501:
2502: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2503: void * param,
2504: HTFormat input_format,
2505: HTFormat output_format,
2506: HTStream * output_stream)
2507: {
2.2 frystyk 2508: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2509: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2510:
2511: /* Register our own tripple instance handler */
2512: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2513:
2514: /* Create an XML parser instance and return */
2515: return HTXML_new(request, param, input_format, output_format, me);
2516: }
2517:
2.5 ! barstow 2518: PUBLIC char * HTRDFParseFile (const char *file_name, HTTripleCallback_new * new_triple_callback)
! 2519: {
! 2520: char buff[512]; /* the file input buffer */
! 2521: FILE *fp;
! 2522: XML_Parser xmlparser;
! 2523: HTRDF *rdfparser;
! 2524: HTStream * stream = NULL;
! 2525: char *uri;
! 2526: BOOL free_uri = YES;
! 2527:
! 2528: /* Sanity check */
! 2529: if (!file_name)
! 2530: return "RDFParseFile: file_name is NULL";
! 2531:
! 2532: /* If the file does not exist, return now */
! 2533: fp = fopen (file_name, "r");
! 2534: if (!fp) /* annotation index file doesn't exist */
! 2535: return "RDFParseFile: file open failed";
! 2536:
! 2537: /* We need an XML parser */
! 2538: #ifdef USE_NS
! 2539: xmlparser = XML_ParserCreateNS (NULL, ':');
! 2540: #else
! 2541: xmlparser = XML_ParserCreate (NULL);
! 2542: #endif /* USE_NS */
! 2543:
! 2544: if (!xmlparser) {
! 2545: fclose (fp);
! 2546: return "RDFParseFile: Could not create an XML parser";
! 2547: }
! 2548:
! 2549: /* We need also need RDF parser to create the triples */
! 2550: rdfparser = HTRDF_new();
! 2551: if (!rdfparser) {
! 2552: fclose (fp);
! 2553: XML_ParserFree(xmlparser);
! 2554: return "RDFParseFile: Could not allocate memory for RDF parser";
! 2555: }
! 2556:
! 2557: /* Must construct a URI from file_name for the parser */
! 2558: if (strncmp (file_name, FILE_SCHEME, 7)) {
! 2559: uri = HT_MALLOC (strlen(FILE_SCHEME) + strlen(file_name) + 1);
! 2560: if (!uri) {
! 2561: fclose (fp);
! 2562: XML_ParserFree(xmlparser);
! 2563: HTRDF_delete(rdfparser);
! 2564: return "RDFParseFile: memory allocation error";
! 2565: }
! 2566: (void) strcpy (uri, FILE_SCHEME);
! 2567: (void) strcat (uri, file_name);
! 2568: free_uri = YES;
! 2569: }
! 2570:
! 2571: HTRDF_setSource(rdfparser, uri);
! 2572: HTRDF_createBags(rdfparser, NO);
! 2573:
! 2574: if (new_triple_callback)
! 2575: HTRDF_registerNewTripleCallback(rdfparser, new_triple_callback, NULL);
! 2576: else
! 2577: HTRDF_registerNewTripleCallback(rdfparser, triple_newInstance, NULL);
! 2578:
! 2579: rdf_setHandlers(xmlparser);
! 2580: XML_SetUserData(xmlparser, rdfparser);
! 2581:
! 2582: /* Create a stream to be used to process the triple output */
! 2583: if ((stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
! 2584: if (free_uri) HT_FREE(uri);
! 2585: fclose (fp);
! 2586: XML_ParserFree(xmlparser);
! 2587: HTRDF_delete(rdfparser);
! 2588: return "RDFParseFile: Could not allocate memory for HTStream";
! 2589: }
! 2590: stream->isa = &HTRDFTriplesClass;
! 2591: stream->state = HT_OK;
! 2592: stream->request = NULL; /* Don't have a request */
! 2593: stream->target = NULL; /* Don't have another stream */
! 2594: stream->rdfparser = rdfparser;
! 2595:
! 2596: /*
! 2597: * The parsing occurs on one read buffer at a time instead of
! 2598: * reading everything into memory and then parsing
! 2599: */
! 2600: for (;;) {
! 2601: int done;
! 2602: int buff_len;
! 2603: fgets(buff, sizeof(buff), fp);
! 2604: if (ferror(fp)) {
! 2605: if (free_uri) HT_FREE(uri);
! 2606: fclose (fp);
! 2607: XML_ParserFree(xmlparser);
! 2608: HTRDF_delete(rdfparser);
! 2609: HT_FREE(stream);
! 2610: return "RDFParseFile: error reading file";
! 2611: }
! 2612: done = feof(fp);
! 2613: if (done)
! 2614: buff_len = 0;
! 2615: else
! 2616: buff_len = strlen (buff);
! 2617: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
! 2618: fprintf (stderr, "Parse error at line %d:\n%s\n",
! 2619: XML_GetCurrentLineNumber(xmlparser),
! 2620: XML_ErrorString(XML_GetErrorCode(xmlparser)));
! 2621: if (free_uri) HT_FREE(uri);
! 2622: fclose(fp);
! 2623: XML_ParserFree(xmlparser);
! 2624: HTRDF_delete(rdfparser);
! 2625: HT_FREE(stream);
! 2626: return "RDFParseFile: parse error";
! 2627: }
! 2628: if (done)
! 2629: break;
! 2630: }
! 2631:
! 2632: /* The file has been parsed, generate the triples */
! 2633: generate_triples(stream);
! 2634:
! 2635: /* Cleanup */
! 2636: if (free_uri) HT_FREE(uri);
! 2637: fclose (fp);
! 2638: XML_ParserFree(xmlparser);
! 2639: HTRDF_delete(rdfparser);
! 2640: HT_FREE(stream);
! 2641:
! 2642: return NULL;
! 2643: }
Webmaster