Annotation of libwww/Library/src/HTRDF.c, revision 2.6
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.6 ! kahan 4: ** @(#) $Id: HTRDF.c,v 2.5 2000/08/09 14:19:07 barstow Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
2.3 frystyk 40: char * m_sPredicate;
41: char * m_sSubject;
42: char * m_sObject;
2.1 frystyk 43: };
44:
45: struct _HTElement {
2.3 frystyk 46: char * m_sName;
2.1 frystyk 47: HTAssocList * m_attributes;
48: HTList * m_children;
2.3 frystyk 49: char * m_sID;
50: char * m_sBagID;
2.1 frystyk 51: HTList * m_vTargets;
52: BOOL m_bDone;
2.3 frystyk 53: char * m_sPrefix;
54: char * m_sContent;
2.1 frystyk 55: };
56:
57: struct _HTRDFParser {
58: HTList * m_namespaceStack;
59: HTList * m_elementStack;
60: HTElement * m_root;
61: HTList * m_triples;
2.3 frystyk 62: char * m_sSource;
2.1 frystyk 63: HTList * m_vAllNameSpaces;
64:
65: BOOL m_bCreateBags;
66: BOOL m_bFetchSchemas;
67:
68: HTList * m_parseTypeStack;
69: HTList * m_parseElementStack;
2.3 frystyk 70: char * m_sLiteral;
2.1 frystyk 71:
72: HTList * m_vResources;
73: HTList * m_vResolveQueue;
74: HTHashtable * m_hIDtable;
75: int m_iReificationCounter;
76:
77: HTStream * ostream;
78:
79: HTTripleCallback_new * newTripleInstance;
80: void * tripleContext;
81: };
82:
83: /* @@@ Should not be global but controlled by name spaces @@@ */
84: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
85: PRIVATE void * RDFInstanceContext = NULL;
86:
2.3 frystyk 87: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
88: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 89: HTElement *description,
2.3 frystyk 90: char * sTarget,
2.1 frystyk 91: BOOL reificate);
2.3 frystyk 92: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 93: int iCounter);
94: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
95: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 96: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
97: char * sObject, char * sNodeID);
2.4 barstow 98: /* ------------------------------------------------------------------------- */
99:
100: /*
101: ** Append the markup for the given element and its attribute to the
102: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
103: */
104: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
105: {
106: int i=0;
107:
108: if (!rdfp || !name) return;
109:
110: StrAllocCat(rdfp->m_sLiteral, "<");
111: StrAllocCat(rdfp->m_sLiteral, name);
112:
113: while (atts[i]) {
114: StrAllocCat(rdfp->m_sLiteral, " ");
115: StrAllocCat(rdfp->m_sLiteral, atts[i]);
116: StrAllocCat(rdfp->m_sLiteral, "=\"");
117: StrAllocCat(rdfp->m_sLiteral, atts[i+1]);
118: StrAllocCat(rdfp->m_sLiteral, "\"");
119: i+=2;
120: }
121:
122: StrAllocCat(rdfp->m_sLiteral, ">");
123: }
124:
125: /*
126: ** Terminate this element's "Literal" buffer. This buffer is filled in when
127: ** parseType="Literal".
128: */
129: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
130: {
131: if (!rdfp || !name) return;
132:
133: StrAllocCat(rdfp->m_sLiteral, "</");
134: StrAllocCat(rdfp->m_sLiteral, name);
135: StrAllocCat(rdfp->m_sLiteral, ">");
136: }
2.1 frystyk 137:
138: /* ------------------------------------------------------------------------- */
139:
140: /*
141: ** Searches a whole list of Strings and returns true if the String is found.
142: */
2.3 frystyk 143: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 144: {
145: HTList *cur = list;
2.3 frystyk 146: char * cs = NULL;
147: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 148: if (!strcmp(cs, s)) return YES;
149: }
150: return NO;
151: }
152:
153: /*
154: ** Useful function that Trims a string
155: ** @@@ Should use HTStrip() @@@
156: */
157: PRIVATE char * trim (char *s)
158: {
159: char *p = NULL, *t = NULL;
160: int len = s ? strlen(s) : -1;
161: if (s && len > 0) {
162: StrAllocCopy(t, s);
163: p = &(s[len-1]);
164: while(p!=s) {
165: if (!isspace((int)(*p)))
166: break;
167: p--;
168: }
169: t[(int)(p-s)+1] = '\0';
170: if (isspace((int) t[(int)(p-s)]))
171: t[(int)(p-s)] = '\0';
172: }
173: return t;
174: }
175:
176: /* ------------------------------------------------------------------------- */
177: /* TRIPLE of RDF */
178: /* ------------------------------------------------------------------------- */
179:
2.3 frystyk 180: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 181: {
182: HTTriple * me = NULL;
183: if (p && s && o) {
184: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
185: HT_OUTOFMEM("HTTriple_new");
186: StrAllocCopy(me->m_sPredicate, p);
187: StrAllocCopy(me->m_sSubject, s);
188: StrAllocCopy(me->m_sObject, o);
189: }
190: return me;
191: }
192:
193: PUBLIC BOOL HTTriple_delete (HTTriple * me)
194: {
195: if (me) {
196: HT_FREE(me->m_sPredicate);
197: HT_FREE(me->m_sSubject);
198: HT_FREE(me->m_sObject);
199: HT_FREE(me);
200: return YES;
201: }
202: return NO;
203: }
204:
205: PUBLIC void HTTriple_print (HTTriple * me)
206: {
207: if (me)
208: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
209: me->m_sObject);
210: }
211:
2.3 frystyk 212: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 213: {
214: return me ? me->m_sSubject : NULL;
215: }
216:
2.3 frystyk 217: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 218: {
219: return me ? me->m_sPredicate : NULL;
220: }
221:
2.3 frystyk 222: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 223: {
224: return me ? me->m_sObject : NULL;
225: }
226:
227: /* ------------------------------------------------------------------------- */
228: /* ELEMENT of RDF */
229: /* ------------------------------------------------------------------------- */
230:
2.3 frystyk 231: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 232: {
233: HTElement * me = NULL;
234: if (sName) {
235: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
236: HT_OUTOFMEM("HTElement_new");
237: StrAllocCopy(me->m_sName, sName);
238: me->m_attributes = al ? al : HTAssocList_new();
239: me->m_children = HTList_new();
240: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
241: me->m_vTargets = HTList_new();
242: me->m_bDone = FALSE;
243: }
244: return me;
245: }
246:
247: /*
248: ** Creates a Data Element and saves the data in the Content field.
249: ** Data Element does not have attributes
250: */
2.3 frystyk 251: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 252: {
253: HTElement * me = NULL;
254: if (sContent) {
255: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
256: HT_OUTOFMEM("HTElement_new2");
257: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
258: me->m_attributes = NULL;
259: me->m_children = HTList_new();
260: /*me->m_nodes = HTAssocList_new();*/
261: me->m_vTargets = HTList_new();
262: me->m_bDone = FALSE;
263: StrAllocCopy(me->m_sContent, sContent);
264: }
265: return me;
266: }
267:
2.3 frystyk 268: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 269: {
270: if (me && sContent) {
271: int l = strlen(me->m_sName);
272: StrAllocCat(me->m_sContent, sContent);
273: me->m_sName[l-1]='\0';
274: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
275: return YES;
276: }
277: return NO;
278: }
279:
280: PUBLIC BOOL HTElement_delete (HTElement * me)
281: {
282: if (me) {
283: HT_FREE(me->m_sName);
284: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
285: if (me->m_children) HTList_delete(me->m_children);
286: HT_FREE(me->m_sID);
287: HT_FREE(me->m_sBagID);
288: if (me->m_vTargets) HTList_delete(me->m_vTargets);
289: HT_FREE(me->m_sPrefix);
290: HT_FREE(me->m_sContent);
291: HT_FREE(me);
292: return YES;
293: }
294: return NO;
295: }
296:
297: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
298: {
299: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
300: }
301:
2.3 frystyk 302: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 303: {
304: return (me && sName && sValue) ?
305: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
306: }
307:
2.3 frystyk 308: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 309: {
310: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
311: }
312:
2.3 frystyk 313: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 314: {
315: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
316: }
317:
2.3 frystyk 318: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 319: {
2.3 frystyk 320: char * fValue = NULL;
321: char * fName = NULL;
2.1 frystyk 322: if (me && sNamespace && sName) {
323: StrAllocMCopy(&fName, sNamespace, sName, NULL);
324: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
325: HT_FREE(fName);
326: }
327: return fValue;
328: }
329:
330: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
331: {
332: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
333: }
334:
335: PUBLIC HTElement * HTElement_target (HTElement * me)
336: {
337: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
338: }
339:
340: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
341: {
342: return (me && me->m_sContent) ? YES : NO;
343: }
344:
345: /* ------------------------------------------------------------------------- */
346: /* EXPAT HANDLERS */
347: /* ------------------------------------------------------------------------- */
348:
349: /*
350: * Called for each new element.
351: * Build up the document tree using an element stack
352: */
353: PRIVATE void XML_startElement (void * userData,
354: const XML_Char * name, const XML_Char ** atts)
355: {
356: HTRDF * rdfp = (HTRDF *) userData;
357: HTAssocList * namespaces = HTAssocList_new();
358: HTAssocList * newAL = HTAssocList_new();
359: int i = 0;
2.5 barstow 360:
2.1 frystyk 361: /**
362: * The following loop tries to identify special xmlns prefix
363: * attributes and update the namespace stack accordingly.
364: * While doing all this, it builds another AttributeList instance
365: * which will hold the expanded names of the attributes
366: * (I think this approach is only useful for RDF which uses
367: * attributes as an abbreviated syntax for element names)
368: */
369: if (atts) {
370: while (atts[i]) {
2.3 frystyk 371: char * aName = (char * ) atts[i];
2.1 frystyk 372: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 373: char * aValue = (char *) atts[i+1];
2.1 frystyk 374: int len = aValue ? strlen(aValue) : -1;
375: if (len == 0 && !rdfp->m_sSource)
376: aValue = rdfp->m_sSource;
377: HTAssocList_addObject(namespaces, aName, aValue);
378: /* save all non-RDF schema addresses */
379: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
380: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
381: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 382: char * nname = NULL;
2.1 frystyk 383: StrAllocCopy(nname, aValue);
384: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
385: }
386:
387: /* Special case: Don't save document's own address */
388: if (rdfp->m_sSource &&
389: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 390: char * nname = NULL;
2.1 frystyk 391: StrAllocCopy(nname, aValue);
392: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
393: }
394: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 395: char * aValue = (char *) atts[i+1];
396: char * nName = NULL;
2.1 frystyk 397: int len = aValue ? strlen(aValue) : -1;
398: if (len == 0 && !rdfp->m_sSource)
399: aValue = rdfp->m_sSource;
400: StrAllocCopy(nName, &(aName[6]));
401: HTAssocList_addObject(namespaces, nName, aValue);
402: HT_FREE(nName);
403:
404: /* Save all non-RDF schema addresses */
405: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
406: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
407: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 408: char * nname = NULL;
2.1 frystyk 409: StrAllocCopy(nname, aValue);
410: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
411: }
412:
413: /* Special case: Don't save document's own address */
414: if (rdfp->m_sSource &&
415: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 416: char * nname = NULL;
2.1 frystyk 417: StrAllocCopy(nname, aValue);
418: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
419: }
420: }
421: i+=2;
422: } /* end of while */
423: } /* end of if */
424:
425: /*
426: ** Place new namespace declarations into the stack
427: ** (Yes, I could optimize this a bit, not it wastes space
428: ** if there are no xmlns definitions)
429: */
430: HTList_addObject(rdfp->m_namespaceStack, namespaces);
431:
432: /*
433: ** Figure out the prefix part if it exists and
434: ** determine the namespace of the element accordingly
435: */
436: {
2.3 frystyk 437: char * sNamespace = NULL;
438: char * sElementName = NULL;
439: char * sPrefix2 = NULL;
2.1 frystyk 440: HTElement *newElement = NULL;
441: char *pindex = strchr(name, ':');
442: int ix = pindex ? (int) (pindex - name) : -1 ;
443: if (ix > 0) {
444: if (!(sPrefix2 = HT_MALLOC(ix+1)))
445: HT_OUTOFMEM("XML_startELement");
446: strncpy(sPrefix2, name, ix);
447: sPrefix2[ix]='\0';
448: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
449: StrAllocCopy(sElementName, &(name[ix+1]));
450: HT_FREE(sPrefix2);
451: } else {
452: sNamespace = HTRDF_namespace(rdfp, "xmlns");
453: StrAllocCopy(sElementName, name);
454: }
455:
456: /*
457: * Finally look for attributes other than the special xmlns,
458: * expand them, and place to the new Attribute List
459: */
460: i = 0;
461: if (atts) {
462: while (atts[i]) {
2.3 frystyk 463: char * aName = (char *) atts[i];
464: char * sAttributeNamespace = NULL;
2.1 frystyk 465: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 466: char * aValue = (char *) atts[i+1];
467: char * sPrefix = NULL;
2.1 frystyk 468: /* Expat does not have type for attributes */
469: pindex = strchr(aName, ':');
470: ix = pindex ? (int) (pindex - aName) : -1;
471: if (ix > 0) {
472: if (!(sPrefix = HT_MALLOC(ix+1)))
473: HT_OUTOFMEM("XML_startELement");
474: strncpy(sPrefix, aName, ix);
475: sPrefix[ix] = '\0';
476: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
477: aName = &(aName[ix+1]);
478: HT_FREE(sPrefix);
479: } else {
480: if (!sNamespace)
481: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
482: else
483: StrAllocCopy(sAttributeNamespace, sNamespace);
484: }
485:
486: if (HTRDF_parseLiteral(rdfp)) {
487: if (!sPrefix) {
488: if (!(sPrefix = HT_MALLOC(8)))
489: HT_OUTOFMEM("XML_startELement");
490: sprintf(sPrefix, "gen%d\n", i);
491: }
492: {
2.3 frystyk 493: char * fName = NULL;
2.1 frystyk 494: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
495: HTAssocList_addObject(newAL, fName, aValue);
496: HT_FREE(fName);
497: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
498: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
499: HT_FREE(fName);
500: }
501: } else {
2.3 frystyk 502: char * fName = NULL;
2.1 frystyk 503: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
504: HTAssocList_addObject(newAL, fName, aValue);
505: HT_FREE(fName);
506: }
507:
508: HT_FREE(sAttributeNamespace);
509:
510: /*
511: ** This call will try to see if the user is using
512: ** RDF look-alike elements from another namespace
513: **
514: ** Note: you can remove the call if you wish
515: */
516: #if 0
517: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
518: #endif
519:
520: } /* end of if */
521: i+=2;
522: } /* end of while */
523: } /* end of if atts */
524:
525: /*
526: * If we have parseType="Literal" set earlier, this element
527: * needs some additional attributes to make it stand-alone
528: * piece of XML
529: */
530: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 531: char * fName = NULL;
2.4 barstow 532:
2.1 frystyk 533: if (!sPrefix2) {
534: if (sNamespace)
535: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
536: StrAllocMCopy(&fName, "gen", sElementName, NULL);
537: newElement = HTElement_new(fName, newAL);
538: StrAllocCopy(newElement->m_sPrefix, "gen");
539: HT_FREE(fName);
540: } else {
2.3 frystyk 541: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 542: if (sAttributeNamespace) {
543: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
544: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
545: HT_FREE(fName);
546: }
547: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
548: newElement = HTElement_new(fName, newAL);
549: HT_FREE(fName);
550: }
551: } else {
2.3 frystyk 552: char * fName = NULL;
2.1 frystyk 553: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
554: newElement = HTElement_new(fName, newAL);
555: HT_FREE(fName);
556: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
557: }
558: HT_FREE(sElementName);
559: HT_FREE(sNamespace);
560: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 561:
2.1 frystyk 562: /*
563: ** Check parseType
564: */
565: {
2.3 frystyk 566: char * fName = NULL;
567: char * sLiteralValue = NULL;
2.1 frystyk 568: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
569: sLiteralValue = HTElement_getAttribute(newElement, fName);
570: HT_FREE(fName);
571: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
572: /**
573: * This is the management of the element where
574: * parseType="Literal" appears
575: *
576: * You should notice RDF V1.0 conforming implementations
577: * must treat other values than Literal and Resource as
578: * Literal. This is why the condition is !equals("Resource")
579: */
580:
581: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
582: if (!HTList_isEmpty(rdfp->m_elementStack)) {
583: HTElement *e = (HTElement *)
584: HTList_lastObject(rdfp->m_elementStack);
585: HTElement_addChild(e, newElement);
586: }
587: HTList_addObject(rdfp->m_elementStack, newElement);
588: HTList_addObject(rdfp->m_parseElementStack, newElement);
589: HT_FREE(rdfp->m_sLiteral);
590: StrAllocCopy(rdfp->m_sLiteral, "");
591: return;
592: }
593:
594: if (HTRDF_parseLiteral(rdfp)) {
595: /*
596: * This is the management of any element nested within
597: * a parseType="Literal" declaration
598: */
2.4 barstow 599: /* Add the element to the parser's literal buffer */
600: addMarkupStart (rdfp, name, atts);
601:
2.1 frystyk 602: HTList_addObject(rdfp->m_elementStack, newElement);
603: return;
604: }
605:
606: /*
607: ** Update the containment hierarchy with the stack.
608: */
609: if (!HTList_isEmpty(rdfp->m_elementStack)) {
610: HTElement *e = (HTElement *)
611: HTList_lastObject(rdfp->m_elementStack);
612: HTElement_addChild(e, newElement);
613: }
614:
615: /*
616: ** Place the new element into the stack
617: */
618: HTList_addObject(rdfp->m_elementStack, newElement);
619: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
620: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
621: HTList_addObject(rdfp->m_parseElementStack, newElement);
622: HT_FREE(rdfp->m_sLiteral);
623: StrAllocCopy(rdfp->m_sLiteral, "");
624:
625: /*
626: * Since parseType="Resource" implies the following
627: * production must match Description, let's create
628: * an additional Description node here in the document tree.
629: */
630: {
2.3 frystyk 631: char * fName = NULL;
2.1 frystyk 632: HTElement *desc = NULL;
633: HTAssocList * al = HTAssocList_new ();
634: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
635: desc = HTElement_new(fName, al);
636: HT_FREE(fName);
637: if (!HTList_isEmpty(rdfp->m_elementStack)) {
638: HTElement *e = (HTElement *)
639: HTList_lastObject(rdfp->m_elementStack);
640: HTElement_addChild(e, desc);
641: }
642: HTList_addObject(rdfp->m_elementStack, desc);
643: }
644: } /* end of if */
645: } /* end of block */
646: } /* end of block */
647: }
648:
649: /*
650: * For each end of an element scope step back in the
651: * element and namespace stack
652: */
653: PRIVATE void XML_endElement (void * userData,
654: const XML_Char * name)
655: {
656: HTRDF * rdfp = (HTRDF *) userData;
657: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
658: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
659: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
660: if (namespaces) HTAssocList_delete(namespaces);
661:
662: if (bParseLiteral) {
663: HTElement *pe = (HTElement *)
664: HTList_lastObject(rdfp->m_parseElementStack);
665: if (pe != rdfp->m_root) {
2.4 barstow 666: /* Terminate the literal */
667: addMarkupEnd (rdfp, name);
2.1 frystyk 668: } else {
669: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
670: HTElement_addChild(pe, de);
2.4 barstow 671:
2.1 frystyk 672: HT_FREE(rdfp->m_sLiteral);
673: StrAllocCopy(rdfp->m_sLiteral, "");
674: HTList_removeLastObject(rdfp->m_parseElementStack);
675: HTList_removeLastObject(rdfp->m_parseTypeStack);
676: }
677: } else if (HTRDF_parseResource(rdfp)) {
678: /**
679: * If we are doing parseType="Resource"
680: * we need to explore whether the next element in
681: * the stack is the closing element in which case
682: * we remove it as well (remember, there's an
683: * extra Description element to be removed)
684: */
685: if (!HTList_isEmpty(rdfp->m_elementStack)) {
686: HTElement *pe = (HTElement *)
687: HTList_lastObject(rdfp->m_parseElementStack);
688: HTElement *e = (HTElement *)
689: HTList_lastObject(rdfp->m_elementStack);
690: if (pe == e) {
691: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
692: HTList_removeLastObject(rdfp->m_parseElementStack);
693: HTList_removeLastObject(rdfp->m_parseTypeStack);
694: }
695: }
696: }
697: }
698:
699: PRIVATE void XML_characterData (void * userData,
700: const XML_Char * s, int len)
701: {
702: /*
703: * Place all characters as Data instance to the containment
704: * hierarchy with the help of the stack.
705: */
706: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 707: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
708: char * tstr = NULL;
709: char * str = NULL;
710: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 711: HT_OUTOFMEM("XML_characterData");
712: strncpy(str, s, len);
713: str[len]='\0';
714: if (HTRDF_parseLiteral(rdfp)) {
715: StrAllocCat(rdfp->m_sLiteral, str);
716: HT_FREE(str);
717: return;
718: }
719: /* JUST FOR EXPAT */
720: {
721: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
722: if (lch && HTElement_instanceOfData(lch)) {
723: HTElement_addData(lch, str);
724: HT_FREE(str);
725: return;
726: }
727: }
728: /*
729: * Warning: this is not correct procedure according to XML spec.
730: * All whitespace matters!
731: */
732: tstr = trim(str);
733: if (strlen(tstr) > 0) {
734: HTElement * de = HTElement_new2(tstr);
735: HTElement_addChild(e, de);
736: }
737: HT_FREE(str); HT_FREE(tstr);
738: }
739:
740: PRIVATE void XML_processingInstruction (void * userData,
741: const XML_Char * target,
742: const XML_Char * data)
743: {
744: return;
745: }
746:
747: /*
748: ** This is called for any characters in the XML document for
749: ** which there is no applicable handler. This includes both
750: ** characters that are part of markup which is of a kind that is
751: ** not reported (comments, markup declarations), or characters
752: ** that are part of a construct which could be reported but
753: ** for which no handler has been supplied. The characters are passed
754: ** exactly as they were in the XML document except that
755: ** they will be encoded in UTF-8. Line boundaries are not normalized.
756: ** Note that a byte order mark character is not passed to the default handler.
757: ** If a default handler is set, internal entity references
758: ** are not expanded. There are no guarantees about
759: ** how characters are divided between calls to the default handler:
760: ** for example, a comment might be split between multiple calls.
761: */
762: PRIVATE void XML_default (void * userData,
763: const XML_Char * s, int len)
764: {
765: return;
766: }
767:
768: /*
769: ** This is called for a declaration of an unparsed (NDATA)
770: ** entity. The base argument is whatever was set by XML_SetBase.
771: ** The entityName, systemId and notationName arguments will never be null.
772: ** The other arguments may be.
773: */
774: PRIVATE void XML_unparsedEntityDecl (void * userData,
775: const XML_Char * entityName,
776: const XML_Char * base,
777: const XML_Char * systemId,
778: const XML_Char * publicId,
779: const XML_Char * notationName)
780: {
781: return;
782: }
783:
784: /*
785: ** This is called for a declaration of notation.
786: ** The base argument is whatever was set by XML_SetBase.
787: ** The notationName will never be null. The other arguments can be.
788: */
789: PRIVATE void XML_notationDecl (void * userData,
790: const XML_Char * notationName,
791: const XML_Char * base,
792: const XML_Char * systemId,
793: const XML_Char * publicId)
794: {
795: return;
796: }
797:
798: /*
799: ** This is called for a reference to an external parsed general entity.
800: ** The referenced entity is not automatically parsed.
801: ** The application can parse it immediately or later using
802: ** XML_ExternalEntityParserCreate.
803: ** The parser argument is the parser parsing the entity containing the reference;
804: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
805: ** The systemId argument is the system identifier as specified in the entity
806: ** declaration; it will not be null.
807: ** The base argument is the system identifier that should be used as the base for
808: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
809: ** it may be null.
810: ** The publicId argument is the public identifier as specified in the entity declaration,
811: ** or null if none was specified; the whitespace in the public identifier
812: ** will have been normalized as required by the XML spec.
813: ** The openEntityNames argument is a space-separated list of the names of the entities
814: ** that are open for the parse of this entity (including the name of the referenced
815: ** entity); this can be passed as the openEntityNames argument to
816: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
817: ** returns, so if the referenced entity is to be parsed later, it must be copied.
818: ** The handler should return 0 if processing should not continue because of
819: ** a fatal error in the handling of the external entity.
820: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
821: ** error.
822: ** Note that unlike other handlers the first argument is the parser, not userData.
823: */
824: PRIVATE int XML_externalEntityRef (XML_Parser parser,
825: const XML_Char * openEntityNames,
826: const XML_Char * base,
827: const XML_Char * systemId,
828: const XML_Char * publicId)
829: {
830: return 0;
831: }
832:
833: /*
834: ** This is called for an encoding that is unknown to the parser.
835: ** The encodingHandlerData argument is that which was passed as the
836: ** second argument to XML_SetUnknownEncodingHandler.
837: ** The name argument gives the name of the encoding as specified in
838: ** the encoding declaration.
839: ** If the callback can provide information about the encoding,
840: ** it must fill in the XML_Encoding structure, and return 1.
841: ** Otherwise it must return 0.
842: ** If info does not describe a suitable encoding,
843: ** then the parser will return an XML_UNKNOWN_ENCODING error.
844: */
845: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
846: const XML_Char * name,
847: XML_Encoding * info)
848: {
849: return 0;
850: }
851:
852: /* ------------------------------------------------------------------------- */
853: /* HTXML STREAM HANDLERS */
854: /* ------------------------------------------------------------------------- */
855:
856: PRIVATE void rdf_setHandlers (XML_Parser me)
857: {
858: XML_SetElementHandler(me, XML_startElement, XML_endElement);
859: XML_SetCharacterDataHandler(me, XML_characterData);
860: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
861: XML_SetDefaultHandler(me, XML_default);
862: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
863: XML_SetNotationDeclHandler(me, XML_notationDecl);
864: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
865: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
866: }
867:
868: PRIVATE void rdf_newInstance (HTStream * me,
869: HTRequest * request,
870: HTFormat target_format,
871: HTStream * target_stream,
872: XML_Parser xmlparser,
873: void * context)
874: {
875: if (me && xmlparser) {
876: rdf_setHandlers(xmlparser);
877: XML_SetUserData(xmlparser, context);
878:
879: /* Call the new RDF instance callback (if any) with this new stream */
880: if (RDFInstance)
881: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
882: }
883: }
884:
885: /* ------------------------------------------------------------------------- */
886: /* RDF PARSER */
887: /* ------------------------------------------------------------------------- */
888:
889: PRIVATE void visit_element_children (HTList *children)
890: {
891: HTElement *child = NULL;
892: HTList *cur = children;
893: while ((child = (HTElement *) HTList_nextObject(cur))) {
894: if (!HTList_isEmpty(child->m_children))
895: visit_element_children(child->m_children);
896: HTElement_delete(child);
897: }
898: }
899:
900: PRIVATE void delete_elements (HTRDF * me)
901: {
902: if (me && me->m_root) {
903: HTElement *r = me->m_root;
904: if (!HTList_isEmpty(r->m_children))
905: visit_element_children(r->m_children);
906: HTElement_delete(r);
907: }
908: }
909:
910: PUBLIC HTRDF * HTRDF_new (void)
911: {
912: HTRDF * me;
913: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
914: HT_OUTOFMEM("HTRDF_new");
915: me->m_namespaceStack = HTList_new();
916: me->m_elementStack = HTList_new();
917:
918: me->m_triples = HTList_new();
919: me->m_vAllNameSpaces = HTList_new();
920:
921: me->m_bCreateBags = FALSE;
922: me->m_bFetchSchemas = FALSE;
923:
924: me->m_parseTypeStack = HTList_new();
925: me->m_parseElementStack = HTList_new();
926:
927: me->m_vResources = HTList_new();
928: me->m_vResolveQueue = HTList_new();
929: me->m_hIDtable = HTHashtable_new(0);
930:
931: return me;
932: }
933:
934: PUBLIC BOOL HTRDF_delete (HTRDF * me)
935: {
936: if (me) {
937: delete_elements(me);
938: if (me->m_namespaceStack) {
939: HTList *cur = me->m_namespaceStack;
940: HTAssocList *alist = NULL;
941: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
942: HTAssocList_delete(alist);
943: }
944: HTList_delete(me->m_namespaceStack);
945: }
946: if (me->m_elementStack) HTList_delete(me->m_elementStack);
947: me->m_root = NULL;
948: if (me->m_triples) {
949: HTList *cur = me->m_triples;
950: HTTriple *t = NULL;
951: while ((t = (HTTriple *) HTList_nextObject(cur))) {
952: /*HTTriple_print(t);*/
953: HTTriple_delete(t);
954: }
955: HTList_delete(me->m_triples);
956: }
957: HT_FREE(me->m_sSource);
958: if (me->m_vAllNameSpaces) {
959: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 960: char * s = NULL;
961: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 962: HT_FREE(s);
963: }
964: HTList_delete(me->m_vAllNameSpaces);
965: }
966: if (me->m_parseTypeStack)
967: HTList_delete(me->m_parseTypeStack);
968: if (me->m_parseElementStack)
969: HTList_delete(me->m_parseElementStack);
970: if (me->m_vResources)
971: HTList_delete(me->m_vResources);
972: if (me->m_vResolveQueue)
973: HTList_delete(me->m_vResolveQueue);
974: if (me->m_hIDtable)
975: HTHashtable_delete(me->m_hIDtable);
976: HT_FREE(me->m_sLiteral);
977: HT_FREE(me);
978: return YES;
979: }
980: return NO;
981: }
982:
983: /*
984: * setSource method saves the name of the source document for
985: * later inspection if needed
986: */
2.3 frystyk 987: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 988: {
989: if (me && source) {
990: StrAllocCopy (me->m_sSource, source);
991: return YES;
992: }
993: return NO;
994: }
995:
996: /*
997: * Go through the m_vResolveQueue and assign
998: * direct object reference for each symbolic reference
999: */
1000: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
1001: {
1002: if (me) {
1003: HTList * cur = me->m_vResolveQueue;
1004: HTElement *e = NULL;
1005: HTElement *e2 = NULL;
1006: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1007: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1008: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1009: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1010: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1011: "aboutEachPrefix");
1012: if (sAbout) {
1013: if (sAbout[0]=='#')
1014: sAbout = &(sAbout[1]);
1015: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1016: if (e2)
1017: HTElement_addTarget(e, e2);
1018: else
1019: HTPrint("Unresolved internal reference %s\n", sAbout);
1020: }
1021: if (sResource) {
1022: if (sResource[0]=='#')
1023: sResource = &(sResource[1]);
1024: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1025: if (e2)
1026: HTElement_addTarget(e, e2);
1027: }
1028:
1029: if (sAboutEach) {
1030: sAboutEach = &(sAboutEach[1]);
1031: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1032: if (e2)
1033: HTElement_addTarget(e, e2);
1034: }
1035: if (sAboutEachPrefix) {
1036: HTList * curr = me->m_vResources;
1037: HTElement *ele = NULL;
1038: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1039: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1040: if (sA &&
1041: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1042: HTElement_addTarget(e, ele);
1043: }
1044: }
1045: }
1046: }
1047: HTList_delete(me->m_vResources);
1048: me->m_vResources = HTList_new();
1049: return YES;
1050: }
1051: return NO;
1052: }
1053:
1054: /**
1055: * Check if the element e is from the namespace
1056: * of the RDF schema by comparing only the beginning of
1057: * the expanded element name with the canonical RDFMS
1058: * URI
1059: */
1060: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1061: {
1062: return (me && e && e->m_sName) ?
1063: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1064: }
1065:
1066: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1067: {
1068: if (me && e && e->m_sName) {
1069: int len = strlen(e->m_sName);
1070: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1071: }
1072: return NO;
1073: }
1074:
1075: /**
1076: * Is the element a Description
1077: */
1078: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1079: {
1080: if (me && e && e->m_sName) {
1081: int len = strlen(e->m_sName);
1082: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1083: }
1084: return NO;
1085: }
1086:
1087: /*
1088: * Is the element a ListItem
1089: */
1090: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1091: {
1092: if (me && e && e->m_sName) {
1093: int len = strlen(e->m_sName);
1094: if (len > 2)
1095: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1096: }
1097: return NO;
1098: }
1099:
1100: /**
1101: * Is the element a Sequence
1102: */
1103: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1104: {
1105: if (me && e && e->m_sName) {
1106: int len = strlen(e->m_sName);
1107: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1108: }
1109: return NO;
1110: }
1111:
1112: /*
1113: * Is the element an Alternative
1114: */
1115: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1116: {
1117: if (me && e && e->m_sName) {
1118: int len = strlen(e->m_sName);
1119: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1120: }
1121: return NO;
1122: }
1123:
1124: /*
1125: * Is the element a Bag
1126: */
1127: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1128: {
1129: if (me && e && e->m_sName) {
1130: int len = strlen(e->m_sName);
1131: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1132: }
1133: return NO;
1134: }
1135:
1136: /**
1137: * Is the element a Container
1138: */
1139: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1140: {
1141: return (HTRDF_isSequence(me, e) ||
1142: HTRDF_isAlternative(me, e) ||
1143: HTRDF_isBag(me, e));
1144: }
1145:
1146: /*
1147: * This method matches all properties but those from RDF namespace
1148: */
1149: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1150: {
1151: if (me && e && e->m_sName) {
1152: int len = strlen(e->m_sName);
2.3 frystyk 1153: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1154: "value", "type", "Property", "Statement"};
1155: int i;
1156: if (HTRDF_isRDF(me, e)) {
1157: for(i = 0; i< 7; i++) {
1158: int ntp = strlen(tp[i]);
1159: if (len > ntp) {
1160: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1161: return YES;
1162: }
1163: }
1164: return NO;
1165: }
1166: if (len > 0) return YES;
1167: }
1168: return NO;
1169: }
1170:
2.3 frystyk 1171: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1172: int iCounter)
1173: {
1174: /*
1175: * Two different cases for
1176: * 1. LI element without content (resource available)
1177: * 2. LI element with content (resource unavailable)
1178: */
2.3 frystyk 1179: char * cName = NULL;
1180: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1181: char sdig[20];
1182: sprintf(sdig, "_%d", iCounter);
1183: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1184: if (sResource) {
1185: HTRDF_addTriple(me, cName, sID, sResource);
1186: /* validity checking */
1187: if (!HTList_isEmpty(listitem->m_children)){
1188: HTPrint("Listitem with resource attribute can not have child nodes");
1189: }
1190: StrAllocCopy(listitem->m_sID, sResource);
1191: } else {
1192: HTList *cur = listitem->m_children;
1193: HTElement *n = NULL;
1194: while ((n = (HTElement *) HTList_nextObject(cur))) {
1195: if (HTElement_instanceOfData(n)) {
1196: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1197: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1198: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1199: HTRDF_addTriple(me, cName, sID, sNodeID);
1200: StrAllocCopy(listitem->m_sID, sNodeID);
1201: } else if (HTRDF_isListItem(me, n)) {
1202: HTPrint("Can not nest list item inside list item\n");
1203: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1204: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1205: HTRDF_addTriple(me, cName, sID, n->m_sID);
1206: HT_FREE(c);
1207: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1208: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1209: HTRDF_addTriple(me, cName, sID, sNodeID);
1210: HT_FREE(sNodeID);
1211: }
1212: }
1213: }
1214: HT_FREE(cName);
1215: }
1216:
2.3 frystyk 1217: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1218: {
2.3 frystyk 1219: char * sID = NULL;
1220: char * tName = NULL;
1221: char * aName = NULL;
1222: char * sName = NULL;
1223: char * bName = NULL;
2.1 frystyk 1224: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1225: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1226: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1227: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1228:
1229: StrAllocCopy(sID, n->m_sID);
1230: if (!sID)
1231: sID = HTRDF_newReificationID(me);
1232: /*
1233: * Do the instantiation only once
1234: */
1235: if (!n->m_bDone) {
1236: if (HTRDF_isSequence(me, n)) {
1237: HTRDF_addTriple(me, tName, sID, sName);
1238: } else if (HTRDF_isAlternative(me, n)) {
1239: HTRDF_addTriple(me, tName, sID, aName);
1240: } else if (HTRDF_isBag(me, n)) {
1241: HTRDF_addTriple(me, tName, sID, bName);
1242: }
1243: n->m_bDone = YES;
1244: }
1245: HTRDF_expandAttributes(me, n, n);
1246:
1247: {
1248: HTList *cur = n->m_children;
1249: HTElement *n2 = NULL;
1250: int iCounter = 1;
1251: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1252: HTPrint("An RDF:Alt container must have at least one list item\n");
1253: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1254: if (HTRDF_isListItem(me, n2)) {
1255: HTRDF_processListItem(me, sID, n2, iCounter);
1256: iCounter++;
1257: } else {
1258: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1259: }
1260: }
1261: } /* end of block */
1262:
1263: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1264:
1265: return sID;
1266: }
1267: /*
1268: * Manage the typedNode production in the RDF grammar.
1269: *
1270: */
2.3 frystyk 1271: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1272: {
2.3 frystyk 1273: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1274: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1275: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1276: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1277: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1278: "aboutEachPrefix");*/
2.3 frystyk 1279: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1280: char * iName = NULL;
1281: char * bName = NULL;
1282: char * tName = NULL;
2.1 frystyk 1283:
2.3 frystyk 1284: char * sObject = NULL;
2.1 frystyk 1285:
1286: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1287: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1288: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1289:
1290: if (resource)
1291: HTPrint("resource attribute not allowed for a typedNode %s\n",
1292: typedNode->m_sName);
1293:
1294: /*
1295: * We are going to manage this typedNode using the processDescription
1296: * routine later on. Before that, place all properties encoded as
1297: * attributes to separate child nodes.
1298: */
1299: {
1300: HTAssoc * assoc;
1301: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1302: char * sAttribute = NULL;
1303: char * tValue = NULL;
1304: char * sValue = NULL;
2.1 frystyk 1305: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1306: sAttribute = HTAssoc_name(assoc);
1307: sValue = HTAssoc_value(assoc);
1308: tValue = trim(sValue);
1309: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1310: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1311: if (strlen(tValue) > 0) {
1312: HTAssocList *newAL = HTAssocList_new();
1313: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1314: HTElement *d = NULL;
1315: HTElement_addAttribute(newPredicate, iName,
1316: sAbout ? sAbout : sID);
1317: HTElement_addAttribute(newPredicate, bName, sBagID);
1318: d = HTElement_new2(tValue);
1319: HTElement_addChild(newPredicate, d);
1320: HTElement_addChild(typedNode, newPredicate);
1321: HTElement_removeAttribute(typedNode, sAttribute);
1322: }
1323: }
1324: HT_FREE(tValue);
1325: } /* end of while */
1326: }/* end of block */
1327: {
1328: if (sAbout)
1329: StrAllocCopy(sObject, sAbout);
1330: else if (sID)
1331: StrAllocCopy(sObject, sID);
1332: else
1333: sObject = HTRDF_newReificationID(me);
1334: StrAllocCopy(typedNode->m_sID, sObject);
1335:
1336: /* special case: should the typedNode have aboutEach attribute,
1337: ** the type predicate should distribute to pointed
1338: ** collection also -> create a child node to the typedNode
1339: */
1340: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1341: HTAssocList *newAL = HTAssocList_new();
1342: HTElement *newPredicate = HTElement_new(tName, newAL);
1343: HTElement *d = HTElement_new2(typedNode->m_sName);
1344: HTElement_addChild(newPredicate, d);
1345: HTElement_addChild(typedNode, newPredicate);
1346: } else {
1347: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1348: }
1349: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1350: }/* end of block */
1351:
1352: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1353:
1354: return sObject;
1355: }
1356:
1357: /*
1358: * Start processing an RDF/XML document instance from the
1359: * root element rdf.
1360: *
1361: */
1362: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1363: {
1364: if (me && e) {
1365: HTList *cur = e->m_children;
1366: HTElement *ele = NULL;
1367: if (HTList_isEmpty(e->m_children)) {
1368: HTPrint("Empty RDF Element\n");
1369: return NO;
1370: }
1371: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1372: if (HTRDF_isDescription(me, ele)) {
1373: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1374: me->m_bCreateBags);
1375: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1376: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1377: HT_FREE(c);
1378: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1379: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1380: HT_FREE(t);
1381: }
1382: }
1383: return YES;
1384: }
1385: return NO;
1386: }
1387:
1388: /*
1389: * processPredicate handles all elements not defined as special
1390: * RDF elements.
1391: *
1392: * predicate The predicate element itself
1393: * description Context for the predicate
1394: * sTarget The target resource
1395: * reificate Should this predicate be reificated
1396: *
1397: * return the new ID which can be used to identify the predicate
1398: *
1399: */
2.3 frystyk 1400: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1401: HTElement * predicate,
1402: HTElement * description,
2.3 frystyk 1403: char * sTarget,
2.1 frystyk 1404: BOOL reificate)
1405: {
2.3 frystyk 1406: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1407: char * nsStatementID = NULL;
1408: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1409: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1410:
1411: /*
1412: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1413: ** or xmlns... -> generate new triples according to the spec.
1414: ** (See end of Section 6)
1415: */
1416: {
1417: HTElement * place_holder = NULL;
1418: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1419: char * fName = NULL;
1420: char * aName = NULL;
2.1 frystyk 1421:
1422: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1423: place_holder = HTElement_new(fName, newAL);
1424: HT_FREE(fName);
1425:
1426: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1427:
1428: /* error checking */
1429: if (!HTList_isEmpty(predicate->m_children)) {
1430: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1431: HTElement_delete(place_holder);
1432: return NULL;
1433: }
1434: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1435:
1436: /* determine the 'about' part for the new statements */
1437: if (sStatementID) {
1438: HTElement *data = HTElement_new2(sStatementID);
1439: HTElement_addAttribute(place_holder, aName, sStatementID);
1440:
1441: /* hack: make rdf:ID the value of the predicate */
1442: HTElement_addChild(predicate, data);
1443: } else if (sResource) {
1444: HTElement_addAttribute(place_holder, aName, sResource);
1445: } else {
1446: nsStatementID = HTRDF_newReificationID(me);
1447: HTElement_addAttribute(place_holder, aName, nsStatementID);
1448: HT_FREE(nsStatementID);
1449: }
1450: HT_FREE(aName);
1451:
1452: if (sBagID) {
1453: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1454: HTElement_addAttribute(place_holder, fName, sBagID);
1455: HT_FREE(fName);
1456: StrAllocCopy(place_holder->m_sBagID, sBagID);
1457: }
1458: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1459: } else {
1460:
1461: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1462: HTElement_delete(place_holder);
1463: }
1464: }
1465:
1466: /*
1467: ** Tricky part: if the resource attribute is present for a predicate
1468: ** AND there are no children, the value of the predicate is either
1469: ** 1. the URI in the resource attribute OR
1470: ** 2. the node ID of the resolved #resource attribute
1471: */
1472: if (sResource && HTList_isEmpty(predicate->m_children)) {
1473: if (!HTElement_target(predicate)) {
1474: if (reificate) {
1475: HT_FREE(nsStatementID);
1476: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1477: sTarget, sResource,
1478: predicate->m_sID);
1479: StrAllocCopy(predicate->m_sID, nsStatementID);
1480: } else {
1481: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1482: }
1483: } else {
1484: HTElement *target = HTElement_target(predicate);
1485: if (reificate) {
1486: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1487: sTarget,
1488: target->m_sID,
1489: predicate->m_sID);
1490: StrAllocCopy(predicate->m_sID, nsStatementID);
1491: } else {
1492: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1493: }
1494: }
1495: StrAllocCopy(nsStatementID, predicate->m_sID);
1496: return nsStatementID;
1497: }
1498:
1499: /*
1500: ** Does this predicate make a reference somewhere using the
1501: ** sResource attribute
1502: */
1503: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1504: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1505: HTElement_target(predicate),
1506: YES, NO, NO);
1507: if (reificate) {
1508: HT_FREE(nsStatementID);
1509: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1510: sTarget, dStatementID,
1511: predicate->m_sID);
1512: StrAllocCopy(predicate->m_sID, nsStatementID);
1513: } else {
1514: StrAllocCopy(nsStatementID, dStatementID);
1515: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1516: }
1517: return nsStatementID;
1518: }
1519:
1520: /*
1521: ** Before looping through the children, let's check
1522: ** if there are any. If not, the value of the predicate is
1523: ** an anonymous node
1524: */
1525: {
1526: HTList *cur = predicate->m_children;
1527: BOOL bUsedTypedNodeProduction = NO;
1528: HTElement *n2;
1529: StrAllocCopy(nsStatementID, sStatementID);
1530: if (HTList_isEmpty(cur)) {
1531: if (reificate) {
2.3 frystyk 1532: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1533: HT_FREE(nsStatementID);
1534: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1535: sTarget, nr,
1536: predicate->m_sID);
1537: HT_FREE(nr);
1538: } else {
2.3 frystyk 1539: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1540: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1541: HT_FREE(nr);
1542: }
1543: }
1544: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1545: if (HTRDF_isDescription(me, n2)) {
1546: HTElement *d2 = n2;
2.3 frystyk 1547: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.1 frystyk 1548: StrAllocCopy(d2->m_sID, dStatementID);
1549:
1550: if (reificate) {
1551: HT_FREE(nsStatementID);
1552: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1553: sTarget, dStatementID,
1554: predicate->m_sID);
1555: } else {
1556: StrAllocCopy(nsStatementID, dStatementID);
1557: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1558: nsStatementID);
1559: }
1560: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1561: char * tValue = NULL;
1562: char * sValue = n2->m_sContent;
2.1 frystyk 1563: /* we've got real data */
1564: /*
1565: * Only if the content is not empty PCDATA (whitespace that is)
1566: * print the triple
1567: */
1568: tValue = trim(sValue);
1569: if (tValue && strlen(tValue) > 0) {
1570: if (reificate) {
1571: HT_FREE(nsStatementID);
1572: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1573: sTarget, tValue,
1574: predicate->m_sID);
1575: StrAllocCopy(predicate->m_sID, nsStatementID);
1576: } else {
1577: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1578: }
1579: }
1580: HT_FREE(tValue);
1581: } else if (HTRDF_isContainer(me, n2)) {
1582: HTElement *target = HTElement_target(description);
2.3 frystyk 1583: char * aboutTarget =
2.1 frystyk 1584: target ?
1585: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1586: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1587: StrAllocCopy(nsStatementID, sCollectionID);
1588: /* Attach the collection to the current predicate */
1589: if (target) {
1590: if (reificate) {
1591: HT_FREE(nsStatementID);
1592: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1593: aboutTarget,
1594: sCollectionID,
1595: predicate->m_sID);
1596: StrAllocCopy(predicate->m_sID, nsStatementID);
1597: } else {
1598: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1599: sCollectionID);
1600: }
1601: } else {
1602: if (reificate) {
1603: HT_FREE(nsStatementID);
1604: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1605: sTarget, sCollectionID,
1606: predicate->m_sID);
1607: StrAllocCopy(predicate->m_sID, nsStatementID);
1608: } else {
1609: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1610: sCollectionID);
1611: }
1612: }
1613: HT_FREE(sCollectionID);
1614: } else if (HTRDF_isTypedPredicate(me, n2)) {
1615: if (bUsedTypedNodeProduction) {
1616: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1617: } else {
1618: bUsedTypedNodeProduction = YES;
1619: }
1620: HT_FREE(nsStatementID);
1621: nsStatementID = HTRDF_processTypedNode(me, n2);
1622: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1623: }
1624: }
1625: return nsStatementID;
1626: } /* end of block */
1627: return NULL;
1628: }
1629:
1630: /*
1631: * processDescription manages Description elements
1632: *
1633: * description The Description element itself
1634: * inPredicate Is this is a nested description
1635: * reificate Do we need to reificate
1636: * createBag Do we create a bag container
1637: *
1638: * return An ID for the description
1639: *
1640: */
2.3 frystyk 1641: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1642: HTElement * description,
1643: BOOL inPredicate,
1644: BOOL reificate,
1645: BOOL createBag)
1646: {
1647: int iChildCount = 1;
1648: BOOL bOnce = YES;
1649:
2.3 frystyk 1650: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1651: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1652: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1653: "aboutEachPrefix");
2.3 frystyk 1654: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1655: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1656: HTElement *target = HTElement_target(description);
1657: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1658: BOOL targetIsContainer = NO;
2.3 frystyk 1659: char * sTargetAbout = NULL;
1660: char * sTargetBagID = NULL;
1661: char * sTargetID = NULL;
1662: char * dName = NULL;
1663: char * aName = NULL;
2.1 frystyk 1664:
1665: /*
1666: ** Return immediately if the description has already been managed
1667: */
1668: if (description->m_bDone) return description->m_sID;
1669:
1670: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1671: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1672:
1673: /*
1674: ** Determine what the target of the Description reference is
1675: */
1676: if (hasTarget) {
2.3 frystyk 1677: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1678: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1679: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1680: if (me->m_sSource && sTargetID2) {
1681: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1682: } else {
1683: StrAllocCopy(sTargetID, sTargetID2);
1684: }
1685: /*
1686: * Target is collection if
1687: * 1. it is identified with bagID attribute
1688: * 2. it is identified with ID attribute and is a collection
1689: */
1690: if (sTargetBagID && sAbout) {
1691: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1692: } else {
1693: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1694: HTRDF_isContainer(me, target))
1695: targetIsContainer = YES;
1696: }
1697: HT_FREE(sTargetID);
1698: }
1699:
1700: /*
1701: * Check if there are properties encoded using the abbreviated
1702: * syntax
1703: */
1704: HTRDF_expandAttributes(me, description, description);
1705:
1706: /*
1707: * Manage the aboutEach attribute here
1708: */
1709: if (sAboutEach && hasTarget) {
1710: if (HTRDF_isContainer(me, target)) {
1711: HTList *cur = target->m_children;
1712: HTElement *ele = NULL;
1713: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1714: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1715: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1716: if (sResource) {
1717: HTElement * newDescription = NULL;
1718: HTElement * ele2;
1719: HTList * cur2 = description->m_children;
1720:
1721: /*
1722: * Manage <li resource="..." /> case
1723: */
1724: if (sResource) {
1725: HTAssocList *newAL = HTAssocList_new();
1726: newDescription = HTElement_new(dName, newAL);
1727: HTElement_addAttribute(newDescription, aName, sResource);
1728: }
1729:
1730: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1731: if (newDescription) HTElement_addChild(newDescription, ele2);
1732: }
1733:
1734: if (newDescription)
1735: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1736:
1737: /* Not needed anymore */
1738: HTElement_delete(newDescription);
1739:
1740: } else {
1741: /**
1742: * Otherwise we have a structured value inside <li>
1743: *
1744: * loop through the children of <li>
1745: * (can be only one)
1746: */
1747: HTList *cur2 = ele->m_children;
1748: HTElement *ele2 = NULL;
1749: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1750: HTAssocList *newAL = HTAssocList_new();
1751: HTElement *newNode = HTElement_new(dName, newAL);
1752: HTList *cur3 = description->m_children;
1753: HTElement *ele3 = NULL;
1754: /* loop through the items in the
1755: * description with aboutEach
1756: * and add them to the target
1757: */
1758: while ((ele3 = (HTElement *)
1759: HTList_nextObject(cur3))) {
1760: HTElement_addChild(newNode, ele3);
1761: }
1762: HTElement_addTarget(newNode, ele2);
1763: HTRDF_processDescription(me, newNode, YES, NO, NO);
1764: }
1765: }
1766: } else if (HTRDF_isTypedPredicate(me, ele)) {
1767: HTAssocList *newAL = HTAssocList_new();
1768: HTElement *newNode = HTElement_new(dName, newAL);
1769: HTList *cur2 = description->m_children;
1770: HTElement *ele2 = NULL;
1771: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1772: HTElement_addChild(newNode, ele2);
1773: }
1774: HTElement_addTarget(newNode, ele);
1775: HTRDF_processDescription(me, newNode, YES, NO, NO);
1776: }
1777: } /* end of while */
1778: } else if (HTRDF_isDescription(me, target)) {
1779: HTList *cur = target->m_children;
1780: HTElement *ele = NULL;
1781: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1782: HTAssocList *newAL = HTAssocList_new();
1783: HTElement *newNode = HTElement_new(dName, newAL);
1784: HTList *cur2 = description->m_children;
1785: HTElement *ele2 = NULL;
1786: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1787: HTElement_addChild(newNode, ele2);
1788: }
1789: HTElement_addTarget(newNode, ele);
1790: HTRDF_processDescription(me, newNode, YES, NO, NO);
1791: } /* end of while */
1792: }
1793:
1794: HT_FREE(dName);
1795: HT_FREE(aName);
1796: return NULL;
1797: }
1798:
1799: /*
1800: * Manage the aboutEachPrefix attribute here
1801: */
1802: if (sAboutEachPrefix) {
1803: if (hasTarget) {
1804: HTList *cur = description->m_vTargets;
1805: HTElement *target = NULL;
1806: while ((target = (HTElement *) HTList_nextObject(cur))) {
1807: HTList *cur2 = description->m_children;
1808: HTElement *ele2 = NULL;
1809: HTElement *newDescription = NULL;
1810: HTAssocList *newAL = HTAssocList_new();
1811: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1812: newDescription = HTElement_new(dName, newAL);
1813: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1814: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1815: HTElement_addChild(newDescription, ele2);
1816: }
1817: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1818: }
1819: }
1820:
1821: HT_FREE(dName);
1822: HT_FREE(aName);
1823: return NULL;
1824: }
1825: /*
1826: * Enumerate through the children
1827: */
1828: {
1829: HTList *cur = description->m_children;
1830: HTElement *n = NULL;
1831: while ((n = (HTElement *) HTList_nextObject(cur))) {
1832: if (HTRDF_isDescription(me, n))
1833: HTPrint("Can not nest Description inside Description\n");
1834: else if (HTRDF_isListItem(me, n))
1835: HTPrint("Can not nest List Item inside Description\n");
1836: else if (HTRDF_isContainer(me, n))
1837: HTPrint("Can not nest Container inside Description\n");
1838: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1839: char * sChildID = NULL;
2.1 frystyk 1840: if (hasTarget && targetIsContainer) {
1841: sChildID = HTRDF_processPredicate(me, n, description,
1842: target->m_sBagID ?
1843: target->m_sBagID :
1844: target->m_sID, NO);
1845: StrAllocCopy(description->m_sID, sChildID);
1846: createBag = NO;
1847: } else if (hasTarget) {
1848: sChildID = HTRDF_processPredicate(me, n, description,
1849: target->m_sBagID ?
1850: target->m_sBagID :
1851: target->m_sID, reificate);
1852: StrAllocCopy(description->m_sID, sChildID);
1853: } else if (!hasTarget && !inPredicate) {
1854: if (!description->m_sID) {
2.3 frystyk 1855: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1856: StrAllocCopy(description->m_sID, nr);
1857: HT_FREE(nr);
1858: }
1859: if (!sAbout) {
1860: if (sID)
1861: sAbout = sID;
1862: else
1863: sAbout = description->m_sID;
1864: }
1865: sChildID = HTRDF_processPredicate(me, n, description,
1866: sAbout, sBagid ?
1867: YES : reificate);
1868:
1869: } else if (!hasTarget && inPredicate) {
1870: if (!sAbout) {
1871: if (sID) {
1872: StrAllocCopy(description->m_sID, sID);
1873: sAbout = sID;
1874: } else {
1875: if (!description->m_sID) {
2.3 frystyk 1876: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1877: StrAllocCopy(description->m_sID, nr);
1878: HT_FREE(nr);
1879: }
1880: sAbout = description->m_sID;
1881: }
1882: } else {
1883: StrAllocCopy(description->m_sID, sAbout);
1884: }
1885: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1886: }
1887: /*
1888: * Each Description block creates also a Bag node which
1889: * has links to all properties within the block IF
1890: * the m_bCreateBags variable is true
1891: */
1892: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1893: char * sNamespace = RDFMS;
2.1 frystyk 1894: if (bOnce && sChildID) {
2.3 frystyk 1895: char * tName = NULL;
1896: char * bName = NULL;
2.1 frystyk 1897: bOnce = NO;
1898: if (!description->m_sBagID) {
2.3 frystyk 1899: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1900: StrAllocCopy(description->m_sBagID, nr);
1901: HT_FREE(nr);
1902: }
1903: if (!description->m_sID)
1904: StrAllocCopy(description->m_sID,
1905: description->m_sBagID);
1906: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1907: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1908: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1909: HT_FREE(tName);
1910: HT_FREE(bName);
1911:
1912: }
1913: if (sChildID) {
2.3 frystyk 1914: char * tName = NULL;
2.1 frystyk 1915: char si[20];
1916: sprintf(si, "%d", iChildCount);
1917: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1918: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1919: iChildCount++;
1920: HT_FREE(tName);
1921: }
1922: }
1923: HT_FREE(sChildID);
1924: }
1925: }
1926: } /* end of block*/
1927:
1928: description->m_bDone = YES;
1929:
1930: HT_FREE(dName);
1931: HT_FREE(aName);
1932: return (description->m_sID);
1933: }
1934:
1935: /*
1936: * Given an XML document (well-formed HTML, for example),
1937: * look for a suitable element to start parsing from
1938: *
1939: */
1940: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1941: {
1942: if (me && ele) {
1943: if (HTRDF_isRDF(me, ele)) {
1944: if (HTRDF_isRDFroot(me, ele)) {
1945: HTRDF_processRDF(me, ele);
1946: } else if (HTRDF_isDescription(me, ele)) {
1947: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1948: me->m_bCreateBags);
1949: }
1950: } else {
1951: HTList *cur = ele->m_children;
1952: HTElement *child = NULL;
1953: while ((child = (HTElement *) HTList_nextObject(cur))) {
1954: HTRDF_processXML(me, child);
1955: }
1956: }
1957:
1958: /* MISSING RECURSION */
1959:
1960: return YES;
1961: }
1962: return NO;
1963: }
1964:
1965: /*
1966: * Return the root element pointer. This requires the parsing
1967: * has been already done.
1968: */
1969: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1970: {
1971: return me ? me->m_root : NULL;
1972: }
1973:
1974: /*
1975: * Return the full namespace URI for a given prefix sPrefix.
1976: * The default namespace is identified with xmlns prefix.
1977: * The namespace of xmlns attribute is an empty string.
1978: */
1979:
2.3 frystyk 1980: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 1981: {
2.3 frystyk 1982: char * nPrefix = NULL;
2.1 frystyk 1983: HTAssocList * calist;
1984: HTList * cur = me->m_namespaceStack;
1985:
1986: if (!sPrefix)
1987: StrAllocCopy(nPrefix, "xmlns");
1988:
1989: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 1990: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 1991: if (sValue) {
1992: StrAllocCopy(nPrefix, sValue);
1993: return nPrefix;
1994: }
1995: }
1996: /*
1997: * Give error only if
1998: * 1. the prefix is not from the reserved xml namespace
1999: * 2. the prefix is not xmlns which is to look for the default
2000: * namespace
2001: */
2002: if (!strcmp(sPrefix, XMLSCHEMA)) {
2003: StrAllocCopy(nPrefix, sPrefix);
2004: return nPrefix;
2005: } else if (!strcmp(sPrefix, "xmlns")) {
2006: StrAllocCopy(nPrefix, "");
2007: return nPrefix;
2008: } else
2009: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2010:
2011: StrAllocCopy(nPrefix, "");
2012: return nPrefix;
2013: }
2014:
2015: /*
2016: * Methods to determine whether we are parsing
2017: * parseType="Literal" or parseType="Resource"
2018: */
2019:
2020: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2021: {
2022: HTElement *e = NULL;
2023: HTList *cur = me->m_elementStack;
2024: if (!HTList_isEmpty(me->m_elementStack)) {
2025: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2026: char * sParseType = NULL;
2.1 frystyk 2027: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2028: if (sParseType) {
2029: if (strcmp(sParseType, "Resource"))
2030: return YES;
2031: }
2032: }
2033: }
2034: return NO;
2035: }
2036:
2037: /*
2038: * Methods to determine whether we are parsing
2039: * parseType="Literal" or parseType="Resource"
2040: */
2041:
2042: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2043: {
2044: HTElement *e = NULL;
2045: HTList *cur = me->m_elementStack;
2046: if (!HTList_isEmpty(me->m_elementStack)) {
2047: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2048: char * sParseType = NULL;
2.1 frystyk 2049: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2050: if (sParseType) {
2051: if (!strcmp(sParseType, "Resource"))
2052: return YES;
2053: }
2054: }
2055: }
2056: return NO;
2057: }
2058: /*
2059: * checkAttributes goes through the attributes of element e<
2060: * to see
2061: * 1. if there are symbolic references to other nodes in the data model.
2062: * in which case they must be stored for later resolving with
2063: * resolveLater method.
2064: * 2. if there is an identity attribute, it is registered using
2065: * registerResource or registerID method.
2066: *
2067: */
2068:
2069: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2070: {
2071: {
2.3 frystyk 2072: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2073:
2074: if (sResource && sResource[0] == '#')
2075: HTRDF_resolveLater(me, e);
2076: }
2077: {
2.3 frystyk 2078: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2079:
2080: if (sAboutEach && sAboutEach[0] == '#')
2081: HTRDF_resolveLater(me, e);
2082: }
2083: {
2.3 frystyk 2084: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2085: "aboutEachPrefix");
2086:
2087: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2088: HTRDF_resolveLater(me, e);
2089: }
2090: {
2.3 frystyk 2091: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2092: if (sAbout) {
2093: if (sAbout[0] == '#')
2094: HTRDF_resolveLater(me, e);
2095: else
2096: HTRDF_registerResource(me, e);
2097: }
2098: }
2099:
2100: {
2.3 frystyk 2101: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2102:
2103: if (sBagID) {
2104: HTRDF_registerID(me, sBagID, e);
2105: StrAllocCopy(e->m_sBagID, sBagID);
2106: }
2107: }
2108: {
2.3 frystyk 2109: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2110: if (sID) {
2111: HTRDF_registerID(me, sID, e);
2112: StrAllocCopy(e->m_sID, sID);
2113: }
2114: }
2115: }
2116: /*
2117: * Add the element e to the m_vResolveQueue
2118: * to be resolved later.
2119: */
2120: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2121: {
2122: HTList_addObject(me->m_vResolveQueue, e);
2123: }
2124: /*
2125: * Add an element e to the Hashtable m_hIDtable
2126: * which stores all nodes with an ID
2127: */
2128:
2.3 frystyk 2129: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2130: {
2131: if (HTHashtable_object(me->m_hIDtable, sID))
2132: HTPrint("Node ID %s redefined", sID);
2133: HTHashtable_addObject(me->m_hIDtable, sID, e);
2134: }
2135: /*
2136: * Add an element e to the Vector m_vResources
2137: * which stores all nodes with an URI
2138: */
2139: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2140: {
2141: HTList_addObject(me->m_vResources, e);
2142: }
2143:
2144: /*
2145: * Look for a node by name sID from the Hashtable
2146: * m_hIDtable of all registered IDs.
2147: */
2148:
2.3 frystyk 2149: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2150: {
2151: if (sID)
2152: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2153: return NULL;
2154: }
2155:
2156: /*
2157: ** Special method to deal with rdf:resource attribute
2158: */
2.3 frystyk 2159: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2160: {
2.3 frystyk 2161: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2162: if (sResource != NULL && sResource[0] == '\0')
2163: sResource = me->m_sSource;
2164: return sResource;
2165: }
2166:
2167: /*
2168: ** Take an element ele with its parent element parent
2169: ** and evaluate all its attributes to see if they are non-RDF specific
2170: ** and non-XML specific in which case they must become children of
2171: ** the ele node.
2172: */
2173: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2174: {
2175: BOOL foundAbbreviation = NO;
2.3 frystyk 2176: char * sAttribute = NULL;
2177: char * sValue = NULL;
2.1 frystyk 2178: HTAssoc * assoc;
2179: HTAssocList * cur = ele->m_attributes;
2180: int lxmlschema = strlen(XMLSCHEMA);
2181: int lrdfms = strlen(RDFMS);
2182:
2183: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2184: int latt;
2185: sAttribute = HTAssoc_name(assoc);
2186: sValue = HTAssoc_value(assoc);
2.4 barstow 2187:
2.1 frystyk 2188: latt = strlen(sAttribute);
2189: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2190: continue;
2191:
2192: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2193: (sAttribute[lrdfms]!='_') &&
2194: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2195: strcmp(&(sAttribute[latt-4]), "type"))
2196: continue;
2197:
2198: if (strlen(sValue) > 0) {
2199: HTAssocList * newAL = HTAssocList_new();
2200: HTElement * newElement = HTElement_new(sAttribute, newAL);
2201: HTElement * newData = HTElement_new2(sValue);
2202: HTElement_addChild(newElement, newData);
2203: HTElement_addChild(parent, newElement);
2204: foundAbbreviation = YES;
2205: }
2206: }
2207: return foundAbbreviation;
2208: }
2209:
2210: /**
2211: * Create a new reification ID by using a name part and an
2212: * incremental counter m_iReificationCounter.
2213: */
2.3 frystyk 2214: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2215: {
2.3 frystyk 2216: char * nsid = NULL;
2.1 frystyk 2217: char nsrc[20];
2218: me->m_iReificationCounter++;
2219: sprintf(nsrc, "%d", me->m_iReificationCounter);
2220: if (!me->m_sSource) {
2221: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2222: } else {
2223: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2224: }
2225: return nsid;
2226: }
2227:
2228: /*
2229: * reificate creates one new node and four new triples
2230: * and returns the ID of the new node
2231: */
2232:
2.3 frystyk 2233: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2234: char * sObject, char * sNodeID)
2.1 frystyk 2235: {
2.3 frystyk 2236: char * sName = NULL;
2237: char * pName = NULL;
2238: char * oName = NULL;
2239: char * tName = NULL;
2240: char * stName = NULL;
2241: char * tNodeID = NULL;
2.1 frystyk 2242:
2243: if (!sNodeID)
2244: tNodeID = HTRDF_newReificationID(me);
2245: else
2246: StrAllocCopy(tNodeID, sNodeID);
2247:
2248: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2249: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2250: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2251: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2252: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2253:
2254: /*
2255: * The original statement must remain in the data model
2256: */
2257: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2258:
2259: /*
2260: * Do not reificate reificated properties
2261: */
2262: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2263: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2264:
2265: /* Reificate by creating 4 new triples */
2266: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2267: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2268: HTRDF_addTriple(me, oName, tNodeID, sObject);
2269: HTRDF_addTriple(me, tName, tNodeID, stName);
2270: } else
2271: HT_FREE(tNodeID);
2272:
2273: HT_FREE(sName);
2274: HT_FREE(pName);
2275: HT_FREE(oName);
2276: HT_FREE(tName);
2277: HT_FREE(stName);
2278:
2279: return tNodeID;
2280: }
2281: /*
2282: * Create a new triple and add it to the m_triples List
2283: * Send the triple to the Output stream
2284: */
2285:
2.3 frystyk 2286: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2287: char * sObject)
2.1 frystyk 2288: {
2289: HTTriple *t = NULL;
2290:
2291: /*
2292: * If there is no subject (about=""), then use the URI/filename where
2293: * the RDF description came from
2294: */
2295: if (!sPredicate || !sSubject || !sObject) {
2296: HTPrint("Predicate %s when subject %s and object %s \n",
2297: sPredicate ? sPredicate : "null",
2298: sSubject ? sSubject : "null",
2299: sObject ? sObject : "null");
2300: return;
2301: }
2302:
2303: if (sSubject[0]=='\0')
2304: sSubject = me->m_sSource;
2305:
2306: t = HTTriple_new(sPredicate, sSubject, sObject);
2307:
2308: /* Call the triple callback handler (if any) with this new triple */
2309: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2310:
2311: HTList_addObject(me->m_triples, t);
2312: }
2313:
2314: /*
2315: * createBags method allows one to determine whether SiRPAC
2316: * produces Bag instances for each Description block.
2317: * The default setting is not to generate them.
2318: */
2319:
2320: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2321: {
2322: if (me)
2323: me->m_bCreateBags = b;
2324: }
2325:
2326: /*
2327: Set output stream for RDF parser
2328: */
2329:
2330: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2331: {
2332: if (me)
2333: me->ostream = ostream;
2334: }
2335:
2336: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2337: {
2338: if (me) {
2339: me->newTripleInstance = cbf;
2340: me->tripleContext = context;
2341: return YES;
2342: }
2343: return NO;
2344: }
2345:
2346: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2347: {
2348: RDFInstance = me;
2349: RDFInstanceContext = context;
2350: return YES;
2351: }
2352:
2353: /* ------------------------------------------------------------------------- */
2354: /* HTRDFTriples STREAM HANDLERS */
2355: /* ------------------------------------------------------------------------- */
2356:
2357: PRIVATE int generate_triples(HTStream *me)
2358: {
2359: HTRDF *rdfp = me ? me->rdfparser : NULL;
2360: if (rdfp) {
2361:
2362: HTRDF_resolve(rdfp);
2363:
2364: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2365:
2366: return HT_OK;
2367: }
2368: return HT_ERROR;
2369: }
2370:
2371: PRIVATE int HTRDFTriples_flush (HTStream * me)
2372: {
2373: if (me->target)
2374: return (*me->target->isa->flush)(me->target);
2375: return HT_OK;
2376: }
2377:
2378: PRIVATE int HTRDFTriples_free (HTStream * me)
2379: {
2380: int status = HT_OK;
2381:
2382: status = generate_triples(me);
2383:
2384: HTRDF_delete(me->rdfparser);
2385:
2386: if (me->target) {
2387: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2388: return HT_WOULD_BLOCK;
2389: }
2390: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2391: HT_FREE(me);
2392: return status;
2393: }
2394:
2395: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2396: {
2397: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2398: HTRDF_delete(me->rdfparser);
2399: if (me->target)
2400: (*me->target->isa->abort)(me->target, NULL);
2401: HT_FREE(me);
2402: return HT_ERROR;
2403: }
2404:
2405: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2406: {
2407: return HT_OK;
2408: }
2409:
2410: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2411: {
2412: return HTRDFTriples_write(me, &c, 1);
2413: }
2414:
2415: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2416: {
2417: return HTRDFTriples_write(me, s, (int) strlen(s));
2418: }
2419:
2420: PRIVATE const HTStreamClass HTRDFTriplesClass =
2421: {
2422: "rdf",
2423: HTRDFTriples_flush,
2424: HTRDFTriples_free,
2425: HTRDFTriples_abort,
2426: HTRDFTriples_putCharacter,
2427: HTRDFTriples_putString,
2428: HTRDFTriples_write
2429: };
2430:
2431: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2432: void * param,
2433: HTFormat input_format,
2434: HTFormat output_format,
2435: HTStream * output_stream)
2436: {
2437: HTStream * me = NULL;
2438: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2439: HT_OUTOFMEM("HTRDFTriples_new");
2440: me->isa = &HTRDFTriplesClass;
2441: me->state = HT_OK;
2442: me->request = request;
2443: me->target = output_stream ? output_stream : HTErrorStream();
2444:
2445: /* Now create the RDF parser instance */
2446: if ((me->rdfparser = HTRDF_new()) == NULL) {
2447: HT_FREE(me);
2448: return HTErrorStream();
2449: }
2450:
2451: /* Set the source (I guess mostly to follow SiRPAC API) */
2452: {
2453: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2454: HTRDF_setSource(me->rdfparser, uri);
2455: HT_FREE(uri);
2456: }
2457:
2458: /* Where are we putting data? */
2459: HTRDF_setOutputStream(me->rdfparser, me);
2460:
2461: /* If you want to create Bags, change it to YES */
2462: HTRDF_createBags(me->rdfparser, NO);
2463:
2464: /* Register our new XML Instance handler */
2465: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2466: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2467:
2468: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2469:
2470: return me;
2471: }
2472:
2473: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2474: void * param,
2475: HTFormat input_format,
2476: HTFormat output_format,
2477: HTStream * output_stream)
2478: {
2479: return HTXML_new(request, param, input_format, output_format,
2480: RDFParser_new(request, param, input_format, output_format, output_stream));
2481: }
2482:
2483: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2484: {
2485: if (rdfp && t) {
2486: HTStream *ostream = rdfp->ostream;
2487: if (ostream) {
2488: PUTC(ostream,'(');
2489: PUTS(ostream, t->m_sPredicate);
2490: PUTC(ostream,',');
2491: PUTS(ostream, t->m_sSubject);
2492: PUTC(ostream,',');
2493: PUTS(ostream, t->m_sObject);
2494: PUTC(ostream,')');
2495: PUTC(ostream,'\n');
2496: }
2497: }
2498: }
2499:
2500: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2501: void * param,
2502: HTFormat input_format,
2503: HTFormat output_format,
2504: HTStream * output_stream)
2505: {
2.2 frystyk 2506: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2507: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2508:
2509: /* Register our own tripple instance handler */
2510: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2511:
2512: /* Create an XML parser instance and return */
2513: return HTXML_new(request, param, input_format, output_format, me);
2514: }
2515:
2.6 ! kahan 2516: /* HTRDFParseFile
! 2517: ** ---------------
! 2518: ** This function parses a file of RDF in a synchronous, non-blocking
! 2519: ** way. In other words, the file is not asynchronously loaded. If
! 2520: ** the file is successfully parsed, NULL is returned; otherwise a
! 2521: ** pointer to an error message is returned. The caller must NOT
! 2522: ** free the pointer returned by this function.
! 2523: */
! 2524:
2.5 barstow 2525: PUBLIC char * HTRDFParseFile (const char *file_name, HTTripleCallback_new * new_triple_callback)
2526: {
2527: char buff[512]; /* the file input buffer */
2528: FILE *fp;
2529: XML_Parser xmlparser;
2530: HTRDF *rdfparser;
2531: HTStream * stream = NULL;
2.6 ! kahan 2532: char *uri = NULL;
2.5 barstow 2533:
2534: /* Sanity check */
2.6 ! kahan 2535: if (!file_name) {
! 2536: HTTRACE(XML_TRACE, "RDFParseFile.. file name is NULL\n");
! 2537: return "RDFParseFile: file_name is NULL";
! 2538: }
! 2539:
2.5 barstow 2540:
2541: /* If the file does not exist, return now */
2542: fp = fopen (file_name, "r");
2.6 ! kahan 2543: if (!fp) { /* annotation index file doesn't exist */
! 2544: HTTRACE(XML_TRACE, "RDFParseFile.. file open failed\n");
! 2545: return "RDFParseFile: file open failed";
! 2546: }
2.5 barstow 2547:
2548: /* We need an XML parser */
2549: #ifdef USE_NS
2550: xmlparser = XML_ParserCreateNS (NULL, ':');
2551: #else
2552: xmlparser = XML_ParserCreate (NULL);
2553: #endif /* USE_NS */
2554:
2555: if (!xmlparser) {
2.6 ! kahan 2556: fclose (fp);
! 2557: HTTRACE(XML_TRACE, "RDFParseFile.. Could not create an XML parser\n");
! 2558: return "RDFParseFile: Could not create an XML parser";
2.5 barstow 2559: }
2560:
2561: /* We need also need RDF parser to create the triples */
2562: rdfparser = HTRDF_new();
2563: if (!rdfparser) {
2564: fclose (fp);
2565: XML_ParserFree(xmlparser);
2566: return "RDFParseFile: Could not allocate memory for RDF parser";
2567: }
2568:
2569: /* Must construct a URI from file_name for the parser */
2.6 ! kahan 2570: uri = HTLocalToWWW (file_name, "file:");
2.5 barstow 2571:
2572: HTRDF_setSource(rdfparser, uri);
2573: HTRDF_createBags(rdfparser, NO);
2574:
2575: if (new_triple_callback)
2576: HTRDF_registerNewTripleCallback(rdfparser, new_triple_callback, NULL);
2577: else
2578: HTRDF_registerNewTripleCallback(rdfparser, triple_newInstance, NULL);
2579:
2580: rdf_setHandlers(xmlparser);
2581: XML_SetUserData(xmlparser, rdfparser);
2582:
2583: /* Create a stream to be used to process the triple output */
2584: if ((stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
2.6 ! kahan 2585: HT_FREE(uri);
2.5 barstow 2586: fclose (fp);
2587: XML_ParserFree(xmlparser);
2588: HTRDF_delete(rdfparser);
2589: return "RDFParseFile: Could not allocate memory for HTStream";
2590: }
2591: stream->isa = &HTRDFTriplesClass;
2592: stream->state = HT_OK;
2593: stream->request = NULL; /* Don't have a request */
2594: stream->target = NULL; /* Don't have another stream */
2595: stream->rdfparser = rdfparser;
2596:
2597: /*
2598: * The parsing occurs on one read buffer at a time instead of
2599: * reading everything into memory and then parsing
2600: */
2601: for (;;) {
2602: int done;
2603: int buff_len;
2604: fgets(buff, sizeof(buff), fp);
2605: if (ferror(fp)) {
2.6 ! kahan 2606: HT_FREE(uri);
2.5 barstow 2607: fclose (fp);
2608: XML_ParserFree(xmlparser);
2609: HTRDF_delete(rdfparser);
2610: HT_FREE(stream);
2611: return "RDFParseFile: error reading file";
2612: }
2613: done = feof(fp);
2614: if (done)
2615: buff_len = 0;
2616: else
2617: buff_len = strlen (buff);
2618: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
2619: fprintf (stderr, "Parse error at line %d:\n%s\n",
2620: XML_GetCurrentLineNumber(xmlparser),
2621: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2.6 ! kahan 2622: HT_FREE(uri);
2.5 barstow 2623: fclose(fp);
2624: XML_ParserFree(xmlparser);
2625: HTRDF_delete(rdfparser);
2626: HT_FREE(stream);
2627: return "RDFParseFile: parse error";
2628: }
2629: if (done)
2630: break;
2631: }
2632:
2633: /* The file has been parsed, generate the triples */
2634: generate_triples(stream);
2635:
2636: /* Cleanup */
2.6 ! kahan 2637: HT_FREE(uri);
2.5 barstow 2638: fclose (fp);
2639: XML_ParserFree(xmlparser);
2640: HTRDF_delete(rdfparser);
2641: HT_FREE(stream);
2642:
2643: return NULL;
2644: }
2.6 ! kahan 2645:
! 2646:
! 2647:
! 2648:
! 2649:
! 2650:
! 2651:
Webmaster