Annotation of libwww/Library/src/HTRDF.c, revision 2.7
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.7 ! barstow 4: ** @(#) $Id: HTRDF.c,v 2.6 2000/08/09 15:42:52 kahan Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
2.3 frystyk 40: char * m_sPredicate;
41: char * m_sSubject;
42: char * m_sObject;
2.1 frystyk 43: };
44:
45: struct _HTElement {
2.3 frystyk 46: char * m_sName;
2.1 frystyk 47: HTAssocList * m_attributes;
48: HTList * m_children;
2.3 frystyk 49: char * m_sID;
50: char * m_sBagID;
2.1 frystyk 51: HTList * m_vTargets;
52: BOOL m_bDone;
2.3 frystyk 53: char * m_sPrefix;
54: char * m_sContent;
2.1 frystyk 55: };
56:
57: struct _HTRDFParser {
58: HTList * m_namespaceStack;
59: HTList * m_elementStack;
60: HTElement * m_root;
61: HTList * m_triples;
2.3 frystyk 62: char * m_sSource;
2.1 frystyk 63: HTList * m_vAllNameSpaces;
64:
65: BOOL m_bCreateBags;
66: BOOL m_bFetchSchemas;
67:
68: HTList * m_parseTypeStack;
69: HTList * m_parseElementStack;
2.3 frystyk 70: char * m_sLiteral;
2.1 frystyk 71:
72: HTList * m_vResources;
73: HTList * m_vResolveQueue;
74: HTHashtable * m_hIDtable;
75: int m_iReificationCounter;
76:
77: HTStream * ostream;
78:
79: HTTripleCallback_new * newTripleInstance;
80: void * tripleContext;
81: };
82:
83: /* @@@ Should not be global but controlled by name spaces @@@ */
84: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
85: PRIVATE void * RDFInstanceContext = NULL;
86:
2.3 frystyk 87: PRIVATE char * HTRDF_processContainer (HTRDF *me, HTElement *e);
88: PRIVATE char * HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
2.1 frystyk 89: HTElement *description,
2.3 frystyk 90: char * sTarget,
2.1 frystyk 91: BOOL reificate);
2.3 frystyk 92: PRIVATE void HTRDF_processListItem (HTRDF *me,char * sID, HTElement *listitem,
2.1 frystyk 93: int iCounter);
94: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
95: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
2.3 frystyk 96: PRIVATE char * HTRDF_reificate (HTRDF *me, char * sPredicate, char * sSubject,
97: char * sObject, char * sNodeID);
2.4 barstow 98: /* ------------------------------------------------------------------------- */
99:
100: /*
101: ** Append the markup for the given element and its attribute to the
102: ** parser's "Literal" buffer. This buffer is filled in when parseType="Literal".
103: */
104: PRIVATE void addMarkupStart (HTRDF *rdfp, const char *name, const char **atts)
105: {
106: int i=0;
107:
108: if (!rdfp || !name) return;
109:
2.7 ! barstow 110: StrAllocMCat(&rdfp->m_sLiteral, "<", name, NULL);
2.4 barstow 111:
112: while (atts[i]) {
2.7 ! barstow 113: StrAllocMCat(&rdfp->m_sLiteral, " ", atts[i], "=\"", atts[i+1], "\"", NULL);
2.4 barstow 114: i+=2;
115: }
116:
117: StrAllocCat(rdfp->m_sLiteral, ">");
118: }
119:
120: /*
121: ** Terminate this element's "Literal" buffer. This buffer is filled in when
122: ** parseType="Literal".
123: */
124: PRIVATE void addMarkupEnd (HTRDF *rdfp, const char *name)
125: {
126: if (!rdfp || !name) return;
127:
2.7 ! barstow 128: StrAllocMCat(&rdfp->m_sLiteral, "</", name, ">", NULL);
2.4 barstow 129: }
2.1 frystyk 130:
131: /* ------------------------------------------------------------------------- */
132:
133: /*
134: ** Searches a whole list of Strings and returns true if the String is found.
135: */
2.3 frystyk 136: PRIVATE BOOL HTList_contains (HTList *list, char * s)
2.1 frystyk 137: {
138: HTList *cur = list;
2.3 frystyk 139: char * cs = NULL;
140: while ((cs = (char *) HTList_nextObject(cur))) {
2.1 frystyk 141: if (!strcmp(cs, s)) return YES;
142: }
143: return NO;
144: }
145:
146: /*
147: ** Useful function that Trims a string
148: ** @@@ Should use HTStrip() @@@
149: */
150: PRIVATE char * trim (char *s)
151: {
152: char *p = NULL, *t = NULL;
153: int len = s ? strlen(s) : -1;
154: if (s && len > 0) {
155: StrAllocCopy(t, s);
156: p = &(s[len-1]);
157: while(p!=s) {
158: if (!isspace((int)(*p)))
159: break;
160: p--;
161: }
162: t[(int)(p-s)+1] = '\0';
163: if (isspace((int) t[(int)(p-s)]))
164: t[(int)(p-s)] = '\0';
165: }
166: return t;
167: }
168:
169: /* ------------------------------------------------------------------------- */
170: /* TRIPLE of RDF */
171: /* ------------------------------------------------------------------------- */
172:
2.3 frystyk 173: PUBLIC HTTriple * HTTriple_new (char * p, char * s, char * o)
2.1 frystyk 174: {
175: HTTriple * me = NULL;
176: if (p && s && o) {
177: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
178: HT_OUTOFMEM("HTTriple_new");
179: StrAllocCopy(me->m_sPredicate, p);
180: StrAllocCopy(me->m_sSubject, s);
181: StrAllocCopy(me->m_sObject, o);
182: }
183: return me;
184: }
185:
186: PUBLIC BOOL HTTriple_delete (HTTriple * me)
187: {
188: if (me) {
189: HT_FREE(me->m_sPredicate);
190: HT_FREE(me->m_sSubject);
191: HT_FREE(me->m_sObject);
192: HT_FREE(me);
193: return YES;
194: }
195: return NO;
196: }
197:
198: PUBLIC void HTTriple_print (HTTriple * me)
199: {
200: if (me)
201: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
202: me->m_sObject);
203: }
204:
2.3 frystyk 205: PUBLIC char * HTTriple_subject (HTTriple * me)
2.1 frystyk 206: {
207: return me ? me->m_sSubject : NULL;
208: }
209:
2.3 frystyk 210: PUBLIC char * HTTriple_predicate (HTTriple * me)
2.1 frystyk 211: {
212: return me ? me->m_sPredicate : NULL;
213: }
214:
2.3 frystyk 215: PUBLIC char * HTTriple_object (HTTriple * me)
2.1 frystyk 216: {
217: return me ? me->m_sObject : NULL;
218: }
219:
220: /* ------------------------------------------------------------------------- */
221: /* ELEMENT of RDF */
222: /* ------------------------------------------------------------------------- */
223:
2.3 frystyk 224: PUBLIC HTElement * HTElement_new (char * sName, HTAssocList * al)
2.1 frystyk 225: {
226: HTElement * me = NULL;
227: if (sName) {
228: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
229: HT_OUTOFMEM("HTElement_new");
230: StrAllocCopy(me->m_sName, sName);
231: me->m_attributes = al ? al : HTAssocList_new();
232: me->m_children = HTList_new();
233: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
234: me->m_vTargets = HTList_new();
235: me->m_bDone = FALSE;
236: }
237: return me;
238: }
239:
240: /*
241: ** Creates a Data Element and saves the data in the Content field.
242: ** Data Element does not have attributes
243: */
2.3 frystyk 244: PUBLIC HTElement * HTElement_new2 (char * sContent)
2.1 frystyk 245: {
246: HTElement * me = NULL;
247: if (sContent) {
248: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
249: HT_OUTOFMEM("HTElement_new2");
250: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
251: me->m_attributes = NULL;
252: me->m_children = HTList_new();
253: /*me->m_nodes = HTAssocList_new();*/
254: me->m_vTargets = HTList_new();
255: me->m_bDone = FALSE;
256: StrAllocCopy(me->m_sContent, sContent);
257: }
258: return me;
259: }
260:
2.3 frystyk 261: PUBLIC BOOL HTElement_addData (HTElement *me, char * sContent)
2.1 frystyk 262: {
263: if (me && sContent) {
264: int l = strlen(me->m_sName);
265: StrAllocCat(me->m_sContent, sContent);
266: me->m_sName[l-1]='\0';
267: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
268: return YES;
269: }
270: return NO;
271: }
272:
273: PUBLIC BOOL HTElement_delete (HTElement * me)
274: {
275: if (me) {
276: HT_FREE(me->m_sName);
277: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
278: if (me->m_children) HTList_delete(me->m_children);
279: HT_FREE(me->m_sID);
280: HT_FREE(me->m_sBagID);
281: if (me->m_vTargets) HTList_delete(me->m_vTargets);
282: HT_FREE(me->m_sPrefix);
283: HT_FREE(me->m_sContent);
284: HT_FREE(me);
285: return YES;
286: }
287: return NO;
288: }
289:
290: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
291: {
292: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
293: }
294:
2.3 frystyk 295: PUBLIC BOOL HTElement_addAttribute (HTElement * me, char * sName, char * sValue)
2.1 frystyk 296: {
297: return (me && sName && sValue) ?
298: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
299: }
300:
2.3 frystyk 301: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, char * sName)
2.1 frystyk 302: {
303: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
304: }
305:
2.3 frystyk 306: PUBLIC char * HTElement_getAttribute (HTElement * me, char * sName)
2.1 frystyk 307: {
308: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
309: }
310:
2.3 frystyk 311: PUBLIC char * HTElement_getAttribute2 (HTElement * me, char * sNamespace, char * sName)
2.1 frystyk 312: {
2.3 frystyk 313: char * fValue = NULL;
314: char * fName = NULL;
2.1 frystyk 315: if (me && sNamespace && sName) {
316: StrAllocMCopy(&fName, sNamespace, sName, NULL);
317: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
318: HT_FREE(fName);
319: }
320: return fValue;
321: }
322:
323: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
324: {
325: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
326: }
327:
328: PUBLIC HTElement * HTElement_target (HTElement * me)
329: {
330: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
331: }
332:
333: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
334: {
335: return (me && me->m_sContent) ? YES : NO;
336: }
337:
338: /* ------------------------------------------------------------------------- */
339: /* EXPAT HANDLERS */
340: /* ------------------------------------------------------------------------- */
341:
342: /*
343: * Called for each new element.
344: * Build up the document tree using an element stack
345: */
346: PRIVATE void XML_startElement (void * userData,
347: const XML_Char * name, const XML_Char ** atts)
348: {
349: HTRDF * rdfp = (HTRDF *) userData;
350: HTAssocList * namespaces = HTAssocList_new();
351: HTAssocList * newAL = HTAssocList_new();
352: int i = 0;
2.5 barstow 353:
2.1 frystyk 354: /**
355: * The following loop tries to identify special xmlns prefix
356: * attributes and update the namespace stack accordingly.
357: * While doing all this, it builds another AttributeList instance
358: * which will hold the expanded names of the attributes
359: * (I think this approach is only useful for RDF which uses
360: * attributes as an abbreviated syntax for element names)
361: */
362: if (atts) {
363: while (atts[i]) {
2.3 frystyk 364: char * aName = (char * ) atts[i];
2.1 frystyk 365: if (!strcmp(aName, "xmlns")) {
2.3 frystyk 366: char * aValue = (char *) atts[i+1];
2.1 frystyk 367: int len = aValue ? strlen(aValue) : -1;
368: if (len == 0 && !rdfp->m_sSource)
369: aValue = rdfp->m_sSource;
370: HTAssocList_addObject(namespaces, aName, aValue);
371: /* save all non-RDF schema addresses */
372: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
373: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
374: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 375: char * nname = NULL;
2.1 frystyk 376: StrAllocCopy(nname, aValue);
377: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
378: }
379:
380: /* Special case: Don't save document's own address */
381: if (rdfp->m_sSource &&
382: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 383: char * nname = NULL;
2.1 frystyk 384: StrAllocCopy(nname, aValue);
385: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
386: }
387: } else if (!strncmp(aName, "xmlns:", 6)) {
2.3 frystyk 388: char * aValue = (char *) atts[i+1];
389: char * nName = NULL;
2.1 frystyk 390: int len = aValue ? strlen(aValue) : -1;
391: if (len == 0 && !rdfp->m_sSource)
392: aValue = rdfp->m_sSource;
393: StrAllocCopy(nName, &(aName[6]));
394: HTAssocList_addObject(namespaces, nName, aValue);
395: HT_FREE(nName);
396:
397: /* Save all non-RDF schema addresses */
398: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
399: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
400: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
2.3 frystyk 401: char * nname = NULL;
2.1 frystyk 402: StrAllocCopy(nname, aValue);
403: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
404: }
405:
406: /* Special case: Don't save document's own address */
407: if (rdfp->m_sSource &&
408: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
2.3 frystyk 409: char * nname = NULL;
2.1 frystyk 410: StrAllocCopy(nname, aValue);
411: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
412: }
413: }
414: i+=2;
415: } /* end of while */
416: } /* end of if */
417:
418: /*
419: ** Place new namespace declarations into the stack
420: ** (Yes, I could optimize this a bit, not it wastes space
421: ** if there are no xmlns definitions)
422: */
423: HTList_addObject(rdfp->m_namespaceStack, namespaces);
424:
425: /*
426: ** Figure out the prefix part if it exists and
427: ** determine the namespace of the element accordingly
428: */
429: {
2.3 frystyk 430: char * sNamespace = NULL;
431: char * sElementName = NULL;
432: char * sPrefix2 = NULL;
2.1 frystyk 433: HTElement *newElement = NULL;
434: char *pindex = strchr(name, ':');
435: int ix = pindex ? (int) (pindex - name) : -1 ;
436: if (ix > 0) {
437: if (!(sPrefix2 = HT_MALLOC(ix+1)))
438: HT_OUTOFMEM("XML_startELement");
439: strncpy(sPrefix2, name, ix);
440: sPrefix2[ix]='\0';
441: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
442: StrAllocCopy(sElementName, &(name[ix+1]));
443: HT_FREE(sPrefix2);
444: } else {
445: sNamespace = HTRDF_namespace(rdfp, "xmlns");
446: StrAllocCopy(sElementName, name);
447: }
448:
449: /*
450: * Finally look for attributes other than the special xmlns,
451: * expand them, and place to the new Attribute List
452: */
453: i = 0;
454: if (atts) {
455: while (atts[i]) {
2.3 frystyk 456: char * aName = (char *) atts[i];
457: char * sAttributeNamespace = NULL;
2.1 frystyk 458: if (strncmp(aName, "xmlns", 5)) {
2.3 frystyk 459: char * aValue = (char *) atts[i+1];
460: char * sPrefix = NULL;
2.1 frystyk 461: /* Expat does not have type for attributes */
462: pindex = strchr(aName, ':');
463: ix = pindex ? (int) (pindex - aName) : -1;
464: if (ix > 0) {
465: if (!(sPrefix = HT_MALLOC(ix+1)))
466: HT_OUTOFMEM("XML_startELement");
467: strncpy(sPrefix, aName, ix);
468: sPrefix[ix] = '\0';
469: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
470: aName = &(aName[ix+1]);
471: HT_FREE(sPrefix);
472: } else {
473: if (!sNamespace)
474: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
475: else
476: StrAllocCopy(sAttributeNamespace, sNamespace);
477: }
478:
479: if (HTRDF_parseLiteral(rdfp)) {
480: if (!sPrefix) {
481: if (!(sPrefix = HT_MALLOC(8)))
482: HT_OUTOFMEM("XML_startELement");
483: sprintf(sPrefix, "gen%d\n", i);
484: }
485: {
2.3 frystyk 486: char * fName = NULL;
2.1 frystyk 487: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
488: HTAssocList_addObject(newAL, fName, aValue);
489: HT_FREE(fName);
490: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
491: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
492: HT_FREE(fName);
493: }
494: } else {
2.3 frystyk 495: char * fName = NULL;
2.1 frystyk 496: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
497: HTAssocList_addObject(newAL, fName, aValue);
498: HT_FREE(fName);
499: }
500:
501: HT_FREE(sAttributeNamespace);
502:
503: /*
504: ** This call will try to see if the user is using
505: ** RDF look-alike elements from another namespace
506: **
507: ** Note: you can remove the call if you wish
508: */
509: #if 0
510: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
511: #endif
512:
513: } /* end of if */
514: i+=2;
515: } /* end of while */
516: } /* end of if atts */
517:
518: /*
519: * If we have parseType="Literal" set earlier, this element
520: * needs some additional attributes to make it stand-alone
521: * piece of XML
522: */
523: if (HTRDF_parseLiteral(rdfp)) {
2.3 frystyk 524: char * fName = NULL;
2.4 barstow 525:
2.1 frystyk 526: if (!sPrefix2) {
527: if (sNamespace)
528: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
529: StrAllocMCopy(&fName, "gen", sElementName, NULL);
530: newElement = HTElement_new(fName, newAL);
531: StrAllocCopy(newElement->m_sPrefix, "gen");
532: HT_FREE(fName);
533: } else {
2.3 frystyk 534: char * sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
2.1 frystyk 535: if (sAttributeNamespace) {
536: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
537: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
538: HT_FREE(fName);
539: }
540: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
541: newElement = HTElement_new(fName, newAL);
542: HT_FREE(fName);
543: }
544: } else {
2.3 frystyk 545: char * fName = NULL;
2.1 frystyk 546: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
547: newElement = HTElement_new(fName, newAL);
548: HT_FREE(fName);
549: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
550: }
551: HT_FREE(sElementName);
552: HT_FREE(sNamespace);
553: HTRDF_checkAttributes(rdfp, newElement);
2.4 barstow 554:
2.1 frystyk 555: /*
556: ** Check parseType
557: */
558: {
2.3 frystyk 559: char * fName = NULL;
560: char * sLiteralValue = NULL;
2.1 frystyk 561: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
562: sLiteralValue = HTElement_getAttribute(newElement, fName);
563: HT_FREE(fName);
564: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
565: /**
566: * This is the management of the element where
567: * parseType="Literal" appears
568: *
569: * You should notice RDF V1.0 conforming implementations
570: * must treat other values than Literal and Resource as
571: * Literal. This is why the condition is !equals("Resource")
572: */
573:
574: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
575: if (!HTList_isEmpty(rdfp->m_elementStack)) {
576: HTElement *e = (HTElement *)
577: HTList_lastObject(rdfp->m_elementStack);
578: HTElement_addChild(e, newElement);
579: }
580: HTList_addObject(rdfp->m_elementStack, newElement);
581: HTList_addObject(rdfp->m_parseElementStack, newElement);
582: HT_FREE(rdfp->m_sLiteral);
583: StrAllocCopy(rdfp->m_sLiteral, "");
584: return;
585: }
586:
587: if (HTRDF_parseLiteral(rdfp)) {
588: /*
589: * This is the management of any element nested within
590: * a parseType="Literal" declaration
591: */
2.4 barstow 592: /* Add the element to the parser's literal buffer */
593: addMarkupStart (rdfp, name, atts);
594:
2.1 frystyk 595: HTList_addObject(rdfp->m_elementStack, newElement);
596: return;
597: }
598:
599: /*
600: ** Update the containment hierarchy with the stack.
601: */
602: if (!HTList_isEmpty(rdfp->m_elementStack)) {
603: HTElement *e = (HTElement *)
604: HTList_lastObject(rdfp->m_elementStack);
605: HTElement_addChild(e, newElement);
606: }
607:
608: /*
609: ** Place the new element into the stack
610: */
611: HTList_addObject(rdfp->m_elementStack, newElement);
612: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
613: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
614: HTList_addObject(rdfp->m_parseElementStack, newElement);
615: HT_FREE(rdfp->m_sLiteral);
616: StrAllocCopy(rdfp->m_sLiteral, "");
617:
618: /*
619: * Since parseType="Resource" implies the following
620: * production must match Description, let's create
621: * an additional Description node here in the document tree.
622: */
623: {
2.3 frystyk 624: char * fName = NULL;
2.1 frystyk 625: HTElement *desc = NULL;
626: HTAssocList * al = HTAssocList_new ();
627: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
628: desc = HTElement_new(fName, al);
629: HT_FREE(fName);
630: if (!HTList_isEmpty(rdfp->m_elementStack)) {
631: HTElement *e = (HTElement *)
632: HTList_lastObject(rdfp->m_elementStack);
633: HTElement_addChild(e, desc);
634: }
635: HTList_addObject(rdfp->m_elementStack, desc);
636: }
637: } /* end of if */
638: } /* end of block */
639: } /* end of block */
640: }
641:
642: /*
643: * For each end of an element scope step back in the
644: * element and namespace stack
645: */
646: PRIVATE void XML_endElement (void * userData,
647: const XML_Char * name)
648: {
649: HTRDF * rdfp = (HTRDF *) userData;
650: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
651: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
652: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
653: if (namespaces) HTAssocList_delete(namespaces);
654:
655: if (bParseLiteral) {
656: HTElement *pe = (HTElement *)
657: HTList_lastObject(rdfp->m_parseElementStack);
658: if (pe != rdfp->m_root) {
2.4 barstow 659: /* Terminate the literal */
660: addMarkupEnd (rdfp, name);
2.1 frystyk 661: } else {
662: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
663: HTElement_addChild(pe, de);
2.4 barstow 664:
2.1 frystyk 665: HT_FREE(rdfp->m_sLiteral);
666: StrAllocCopy(rdfp->m_sLiteral, "");
667: HTList_removeLastObject(rdfp->m_parseElementStack);
668: HTList_removeLastObject(rdfp->m_parseTypeStack);
669: }
670: } else if (HTRDF_parseResource(rdfp)) {
671: /**
672: * If we are doing parseType="Resource"
673: * we need to explore whether the next element in
674: * the stack is the closing element in which case
675: * we remove it as well (remember, there's an
676: * extra Description element to be removed)
677: */
678: if (!HTList_isEmpty(rdfp->m_elementStack)) {
679: HTElement *pe = (HTElement *)
680: HTList_lastObject(rdfp->m_parseElementStack);
681: HTElement *e = (HTElement *)
682: HTList_lastObject(rdfp->m_elementStack);
683: if (pe == e) {
684: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
685: HTList_removeLastObject(rdfp->m_parseElementStack);
686: HTList_removeLastObject(rdfp->m_parseTypeStack);
687: }
688: }
689: }
690: }
691:
692: PRIVATE void XML_characterData (void * userData,
693: const XML_Char * s, int len)
694: {
695: /*
696: * Place all characters as Data instance to the containment
697: * hierarchy with the help of the stack.
698: */
699: HTRDF * rdfp = (HTRDF *) userData;
2.3 frystyk 700: HTElement * e = (HTElement *) HTList_lastObject(rdfp->m_elementStack);
701: char * tstr = NULL;
702: char * str = NULL;
703: if (!(str = (char *) HT_MALLOC(len+1)))
2.1 frystyk 704: HT_OUTOFMEM("XML_characterData");
705: strncpy(str, s, len);
706: str[len]='\0';
707: if (HTRDF_parseLiteral(rdfp)) {
708: StrAllocCat(rdfp->m_sLiteral, str);
709: HT_FREE(str);
710: return;
711: }
712: /* JUST FOR EXPAT */
713: {
714: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
715: if (lch && HTElement_instanceOfData(lch)) {
716: HTElement_addData(lch, str);
717: HT_FREE(str);
718: return;
719: }
720: }
721: /*
722: * Warning: this is not correct procedure according to XML spec.
723: * All whitespace matters!
724: */
725: tstr = trim(str);
726: if (strlen(tstr) > 0) {
727: HTElement * de = HTElement_new2(tstr);
728: HTElement_addChild(e, de);
729: }
730: HT_FREE(str); HT_FREE(tstr);
731: }
732:
733: PRIVATE void XML_processingInstruction (void * userData,
734: const XML_Char * target,
735: const XML_Char * data)
736: {
737: return;
738: }
739:
740: /*
741: ** This is called for any characters in the XML document for
742: ** which there is no applicable handler. This includes both
743: ** characters that are part of markup which is of a kind that is
744: ** not reported (comments, markup declarations), or characters
745: ** that are part of a construct which could be reported but
746: ** for which no handler has been supplied. The characters are passed
747: ** exactly as they were in the XML document except that
748: ** they will be encoded in UTF-8. Line boundaries are not normalized.
749: ** Note that a byte order mark character is not passed to the default handler.
750: ** If a default handler is set, internal entity references
751: ** are not expanded. There are no guarantees about
752: ** how characters are divided between calls to the default handler:
753: ** for example, a comment might be split between multiple calls.
754: */
755: PRIVATE void XML_default (void * userData,
756: const XML_Char * s, int len)
757: {
758: return;
759: }
760:
761: /*
762: ** This is called for a declaration of an unparsed (NDATA)
763: ** entity. The base argument is whatever was set by XML_SetBase.
764: ** The entityName, systemId and notationName arguments will never be null.
765: ** The other arguments may be.
766: */
767: PRIVATE void XML_unparsedEntityDecl (void * userData,
768: const XML_Char * entityName,
769: const XML_Char * base,
770: const XML_Char * systemId,
771: const XML_Char * publicId,
772: const XML_Char * notationName)
773: {
774: return;
775: }
776:
777: /*
778: ** This is called for a declaration of notation.
779: ** The base argument is whatever was set by XML_SetBase.
780: ** The notationName will never be null. The other arguments can be.
781: */
782: PRIVATE void XML_notationDecl (void * userData,
783: const XML_Char * notationName,
784: const XML_Char * base,
785: const XML_Char * systemId,
786: const XML_Char * publicId)
787: {
788: return;
789: }
790:
791: /*
792: ** This is called for a reference to an external parsed general entity.
793: ** The referenced entity is not automatically parsed.
794: ** The application can parse it immediately or later using
795: ** XML_ExternalEntityParserCreate.
796: ** The parser argument is the parser parsing the entity containing the reference;
797: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
798: ** The systemId argument is the system identifier as specified in the entity
799: ** declaration; it will not be null.
800: ** The base argument is the system identifier that should be used as the base for
801: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
802: ** it may be null.
803: ** The publicId argument is the public identifier as specified in the entity declaration,
804: ** or null if none was specified; the whitespace in the public identifier
805: ** will have been normalized as required by the XML spec.
806: ** The openEntityNames argument is a space-separated list of the names of the entities
807: ** that are open for the parse of this entity (including the name of the referenced
808: ** entity); this can be passed as the openEntityNames argument to
809: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
810: ** returns, so if the referenced entity is to be parsed later, it must be copied.
811: ** The handler should return 0 if processing should not continue because of
812: ** a fatal error in the handling of the external entity.
813: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
814: ** error.
815: ** Note that unlike other handlers the first argument is the parser, not userData.
816: */
817: PRIVATE int XML_externalEntityRef (XML_Parser parser,
818: const XML_Char * openEntityNames,
819: const XML_Char * base,
820: const XML_Char * systemId,
821: const XML_Char * publicId)
822: {
823: return 0;
824: }
825:
826: /*
827: ** This is called for an encoding that is unknown to the parser.
828: ** The encodingHandlerData argument is that which was passed as the
829: ** second argument to XML_SetUnknownEncodingHandler.
830: ** The name argument gives the name of the encoding as specified in
831: ** the encoding declaration.
832: ** If the callback can provide information about the encoding,
833: ** it must fill in the XML_Encoding structure, and return 1.
834: ** Otherwise it must return 0.
835: ** If info does not describe a suitable encoding,
836: ** then the parser will return an XML_UNKNOWN_ENCODING error.
837: */
838: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
839: const XML_Char * name,
840: XML_Encoding * info)
841: {
842: return 0;
843: }
844:
845: /* ------------------------------------------------------------------------- */
846: /* HTXML STREAM HANDLERS */
847: /* ------------------------------------------------------------------------- */
848:
849: PRIVATE void rdf_setHandlers (XML_Parser me)
850: {
851: XML_SetElementHandler(me, XML_startElement, XML_endElement);
852: XML_SetCharacterDataHandler(me, XML_characterData);
853: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
854: XML_SetDefaultHandler(me, XML_default);
855: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
856: XML_SetNotationDeclHandler(me, XML_notationDecl);
857: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
858: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
859: }
860:
861: PRIVATE void rdf_newInstance (HTStream * me,
862: HTRequest * request,
863: HTFormat target_format,
864: HTStream * target_stream,
865: XML_Parser xmlparser,
866: void * context)
867: {
868: if (me && xmlparser) {
869: rdf_setHandlers(xmlparser);
870: XML_SetUserData(xmlparser, context);
871:
872: /* Call the new RDF instance callback (if any) with this new stream */
873: if (RDFInstance)
874: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
875: }
876: }
877:
878: /* ------------------------------------------------------------------------- */
879: /* RDF PARSER */
880: /* ------------------------------------------------------------------------- */
881:
882: PRIVATE void visit_element_children (HTList *children)
883: {
884: HTElement *child = NULL;
885: HTList *cur = children;
886: while ((child = (HTElement *) HTList_nextObject(cur))) {
887: if (!HTList_isEmpty(child->m_children))
888: visit_element_children(child->m_children);
889: HTElement_delete(child);
890: }
891: }
892:
893: PRIVATE void delete_elements (HTRDF * me)
894: {
895: if (me && me->m_root) {
896: HTElement *r = me->m_root;
897: if (!HTList_isEmpty(r->m_children))
898: visit_element_children(r->m_children);
899: HTElement_delete(r);
900: }
901: }
902:
903: PUBLIC HTRDF * HTRDF_new (void)
904: {
905: HTRDF * me;
906: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
907: HT_OUTOFMEM("HTRDF_new");
908: me->m_namespaceStack = HTList_new();
909: me->m_elementStack = HTList_new();
910:
911: me->m_triples = HTList_new();
912: me->m_vAllNameSpaces = HTList_new();
913:
914: me->m_bCreateBags = FALSE;
915: me->m_bFetchSchemas = FALSE;
916:
917: me->m_parseTypeStack = HTList_new();
918: me->m_parseElementStack = HTList_new();
919:
920: me->m_vResources = HTList_new();
921: me->m_vResolveQueue = HTList_new();
922: me->m_hIDtable = HTHashtable_new(0);
923:
924: return me;
925: }
926:
927: PUBLIC BOOL HTRDF_delete (HTRDF * me)
928: {
929: if (me) {
930: delete_elements(me);
931: if (me->m_namespaceStack) {
932: HTList *cur = me->m_namespaceStack;
933: HTAssocList *alist = NULL;
934: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
935: HTAssocList_delete(alist);
936: }
937: HTList_delete(me->m_namespaceStack);
938: }
939: if (me->m_elementStack) HTList_delete(me->m_elementStack);
940: me->m_root = NULL;
941: if (me->m_triples) {
942: HTList *cur = me->m_triples;
943: HTTriple *t = NULL;
944: while ((t = (HTTriple *) HTList_nextObject(cur))) {
945: /*HTTriple_print(t);*/
946: HTTriple_delete(t);
947: }
948: HTList_delete(me->m_triples);
949: }
950: HT_FREE(me->m_sSource);
951: if (me->m_vAllNameSpaces) {
952: HTList *cur = me->m_vAllNameSpaces;
2.3 frystyk 953: char * s = NULL;
954: while ((s = (char *) HTList_nextObject(cur))) {
2.1 frystyk 955: HT_FREE(s);
956: }
957: HTList_delete(me->m_vAllNameSpaces);
958: }
959: if (me->m_parseTypeStack)
960: HTList_delete(me->m_parseTypeStack);
961: if (me->m_parseElementStack)
962: HTList_delete(me->m_parseElementStack);
963: if (me->m_vResources)
964: HTList_delete(me->m_vResources);
965: if (me->m_vResolveQueue)
966: HTList_delete(me->m_vResolveQueue);
967: if (me->m_hIDtable)
968: HTHashtable_delete(me->m_hIDtable);
969: HT_FREE(me->m_sLiteral);
970: HT_FREE(me);
971: return YES;
972: }
973: return NO;
974: }
975:
976: /*
977: * setSource method saves the name of the source document for
978: * later inspection if needed
979: */
2.3 frystyk 980: PUBLIC BOOL HTRDF_setSource(HTRDF *me, char * source)
2.1 frystyk 981: {
982: if (me && source) {
983: StrAllocCopy (me->m_sSource, source);
984: return YES;
985: }
986: return NO;
987: }
988:
989: /*
990: * Go through the m_vResolveQueue and assign
991: * direct object reference for each symbolic reference
992: */
993: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
994: {
995: if (me) {
996: HTList * cur = me->m_vResolveQueue;
997: HTElement *e = NULL;
998: HTElement *e2 = NULL;
999: while ((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 1000: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
1001: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
1002: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
1003: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 1004: "aboutEachPrefix");
1005: if (sAbout) {
1006: if (sAbout[0]=='#')
1007: sAbout = &(sAbout[1]);
1008: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
1009: if (e2)
1010: HTElement_addTarget(e, e2);
1011: else
1012: HTPrint("Unresolved internal reference %s\n", sAbout);
1013: }
1014: if (sResource) {
1015: if (sResource[0]=='#')
1016: sResource = &(sResource[1]);
1017: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
1018: if (e2)
1019: HTElement_addTarget(e, e2);
1020: }
1021:
1022: if (sAboutEach) {
1023: sAboutEach = &(sAboutEach[1]);
1024: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
1025: if (e2)
1026: HTElement_addTarget(e, e2);
1027: }
1028: if (sAboutEachPrefix) {
1029: HTList * curr = me->m_vResources;
1030: HTElement *ele = NULL;
1031: while ((ele = (HTElement *) HTList_nextObject(curr))) {
2.3 frystyk 1032: char * sA = HTElement_getAttribute2(ele, RDFMS, "about");
2.1 frystyk 1033: if (sA &&
1034: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
1035: HTElement_addTarget(e, ele);
1036: }
1037: }
1038: }
1039: }
1040: HTList_delete(me->m_vResources);
1041: me->m_vResources = HTList_new();
1042: return YES;
1043: }
1044: return NO;
1045: }
1046:
1047: /**
1048: * Check if the element e is from the namespace
1049: * of the RDF schema by comparing only the beginning of
1050: * the expanded element name with the canonical RDFMS
1051: * URI
1052: */
1053: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1054: {
1055: return (me && e && e->m_sName) ?
1056: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1057: }
1058:
1059: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1060: {
1061: if (me && e && e->m_sName) {
1062: int len = strlen(e->m_sName);
1063: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1064: }
1065: return NO;
1066: }
1067:
1068: /**
1069: * Is the element a Description
1070: */
1071: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1072: {
1073: if (me && e && e->m_sName) {
1074: int len = strlen(e->m_sName);
1075: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1076: }
1077: return NO;
1078: }
1079:
1080: /*
1081: * Is the element a ListItem
1082: */
1083: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1084: {
1085: if (me && e && e->m_sName) {
1086: int len = strlen(e->m_sName);
1087: if (len > 2)
1088: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1089: }
1090: return NO;
1091: }
1092:
1093: /**
1094: * Is the element a Sequence
1095: */
1096: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1097: {
1098: if (me && e && e->m_sName) {
1099: int len = strlen(e->m_sName);
1100: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1101: }
1102: return NO;
1103: }
1104:
1105: /*
1106: * Is the element an Alternative
1107: */
1108: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1109: {
1110: if (me && e && e->m_sName) {
1111: int len = strlen(e->m_sName);
1112: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1113: }
1114: return NO;
1115: }
1116:
1117: /*
1118: * Is the element a Bag
1119: */
1120: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1121: {
1122: if (me && e && e->m_sName) {
1123: int len = strlen(e->m_sName);
1124: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1125: }
1126: return NO;
1127: }
1128:
1129: /**
1130: * Is the element a Container
1131: */
1132: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1133: {
1134: return (HTRDF_isSequence(me, e) ||
1135: HTRDF_isAlternative(me, e) ||
1136: HTRDF_isBag(me, e));
1137: }
1138:
1139: /*
1140: * This method matches all properties but those from RDF namespace
1141: */
1142: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1143: {
1144: if (me && e && e->m_sName) {
1145: int len = strlen(e->m_sName);
2.3 frystyk 1146: char * tp[] = {"predicate", "subject", "object",
2.1 frystyk 1147: "value", "type", "Property", "Statement"};
1148: int i;
1149: if (HTRDF_isRDF(me, e)) {
1150: for(i = 0; i< 7; i++) {
1151: int ntp = strlen(tp[i]);
1152: if (len > ntp) {
1153: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1154: return YES;
1155: }
1156: }
1157: return NO;
1158: }
1159: if (len > 0) return YES;
1160: }
1161: return NO;
1162: }
1163:
2.3 frystyk 1164: PRIVATE void HTRDF_processListItem (HTRDF * me, char * sID, HTElement *listitem,
2.1 frystyk 1165: int iCounter)
1166: {
1167: /*
1168: * Two different cases for
1169: * 1. LI element without content (resource available)
1170: * 2. LI element with content (resource unavailable)
1171: */
2.3 frystyk 1172: char * cName = NULL;
1173: char * sResource = HTRDF_getResource(me, listitem);
2.1 frystyk 1174: char sdig[20];
1175: sprintf(sdig, "_%d", iCounter);
1176: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1177: if (sResource) {
1178: HTRDF_addTriple(me, cName, sID, sResource);
1179: /* validity checking */
1180: if (!HTList_isEmpty(listitem->m_children)){
1181: HTPrint("Listitem with resource attribute can not have child nodes");
1182: }
1183: StrAllocCopy(listitem->m_sID, sResource);
1184: } else {
1185: HTList *cur = listitem->m_children;
1186: HTElement *n = NULL;
1187: while ((n = (HTElement *) HTList_nextObject(cur))) {
1188: if (HTElement_instanceOfData(n)) {
1189: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1190: } else if (HTRDF_isDescription(me, n)) {
2.3 frystyk 1191: char * sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
2.1 frystyk 1192: HTRDF_addTriple(me, cName, sID, sNodeID);
1193: StrAllocCopy(listitem->m_sID, sNodeID);
1194: } else if (HTRDF_isListItem(me, n)) {
1195: HTPrint("Can not nest list item inside list item\n");
1196: } else if (HTRDF_isContainer(me, n)) {
2.3 frystyk 1197: char * c = HTRDF_processContainer(me, n);
2.1 frystyk 1198: HTRDF_addTriple(me, cName, sID, n->m_sID);
1199: HT_FREE(c);
1200: } else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1201: char * sNodeID = HTRDF_processTypedNode(me, n);
2.1 frystyk 1202: HTRDF_addTriple(me, cName, sID, sNodeID);
1203: HT_FREE(sNodeID);
1204: }
1205: }
1206: }
1207: HT_FREE(cName);
1208: }
1209:
2.3 frystyk 1210: PRIVATE char * HTRDF_processContainer(HTRDF *me, HTElement *n)
2.1 frystyk 1211: {
2.3 frystyk 1212: char * sID = NULL;
1213: char * tName = NULL;
1214: char * aName = NULL;
1215: char * sName = NULL;
1216: char * bName = NULL;
2.1 frystyk 1217: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1218: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1219: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1220: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1221:
1222: StrAllocCopy(sID, n->m_sID);
1223: if (!sID)
1224: sID = HTRDF_newReificationID(me);
1225: /*
1226: * Do the instantiation only once
1227: */
1228: if (!n->m_bDone) {
1229: if (HTRDF_isSequence(me, n)) {
1230: HTRDF_addTriple(me, tName, sID, sName);
1231: } else if (HTRDF_isAlternative(me, n)) {
1232: HTRDF_addTriple(me, tName, sID, aName);
1233: } else if (HTRDF_isBag(me, n)) {
1234: HTRDF_addTriple(me, tName, sID, bName);
1235: }
1236: n->m_bDone = YES;
1237: }
1238: HTRDF_expandAttributes(me, n, n);
1239:
1240: {
1241: HTList *cur = n->m_children;
1242: HTElement *n2 = NULL;
1243: int iCounter = 1;
1244: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1245: HTPrint("An RDF:Alt container must have at least one list item\n");
1246: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1247: if (HTRDF_isListItem(me, n2)) {
1248: HTRDF_processListItem(me, sID, n2, iCounter);
1249: iCounter++;
1250: } else {
1251: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1252: }
1253: }
1254: } /* end of block */
1255:
1256: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1257:
1258: return sID;
1259: }
1260: /*
1261: * Manage the typedNode production in the RDF grammar.
1262: *
1263: */
2.3 frystyk 1264: PUBLIC char * HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
2.1 frystyk 1265: {
2.3 frystyk 1266: char * sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1267: char * sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1268: char * sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1269: char * sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1270: /*char * sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
2.1 frystyk 1271: "aboutEachPrefix");*/
2.3 frystyk 1272: char * resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1273: char * iName = NULL;
1274: char * bName = NULL;
1275: char * tName = NULL;
2.1 frystyk 1276:
2.3 frystyk 1277: char * sObject = NULL;
2.1 frystyk 1278:
1279: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1280: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1281: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1282:
1283: if (resource)
1284: HTPrint("resource attribute not allowed for a typedNode %s\n",
1285: typedNode->m_sName);
1286:
1287: /*
1288: * We are going to manage this typedNode using the processDescription
1289: * routine later on. Before that, place all properties encoded as
1290: * attributes to separate child nodes.
1291: */
1292: {
1293: HTAssoc * assoc;
1294: HTAssocList *cur = typedNode->m_attributes;
2.3 frystyk 1295: char * sAttribute = NULL;
1296: char * tValue = NULL;
1297: char * sValue = NULL;
2.1 frystyk 1298: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1299: sAttribute = HTAssoc_name(assoc);
1300: sValue = HTAssoc_value(assoc);
1301: tValue = trim(sValue);
1302: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1303: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1304: if (strlen(tValue) > 0) {
1305: HTAssocList *newAL = HTAssocList_new();
1306: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1307: HTElement *d = NULL;
1308: HTElement_addAttribute(newPredicate, iName,
1309: sAbout ? sAbout : sID);
1310: HTElement_addAttribute(newPredicate, bName, sBagID);
1311: d = HTElement_new2(tValue);
1312: HTElement_addChild(newPredicate, d);
1313: HTElement_addChild(typedNode, newPredicate);
1314: HTElement_removeAttribute(typedNode, sAttribute);
1315: }
1316: }
1317: HT_FREE(tValue);
1318: } /* end of while */
1319: }/* end of block */
1320: {
1321: if (sAbout)
1322: StrAllocCopy(sObject, sAbout);
1323: else if (sID)
1324: StrAllocCopy(sObject, sID);
1325: else
1326: sObject = HTRDF_newReificationID(me);
1327: StrAllocCopy(typedNode->m_sID, sObject);
1328:
1329: /* special case: should the typedNode have aboutEach attribute,
1330: ** the type predicate should distribute to pointed
1331: ** collection also -> create a child node to the typedNode
1332: */
1333: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1334: HTAssocList *newAL = HTAssocList_new();
1335: HTElement *newPredicate = HTElement_new(tName, newAL);
1336: HTElement *d = HTElement_new2(typedNode->m_sName);
1337: HTElement_addChild(newPredicate, d);
1338: HTElement_addChild(typedNode, newPredicate);
1339: } else {
1340: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1341: }
1342: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1343: }/* end of block */
1344:
1345: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1346:
1347: return sObject;
1348: }
1349:
1350: /*
1351: * Start processing an RDF/XML document instance from the
1352: * root element rdf.
1353: *
1354: */
1355: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1356: {
1357: if (me && e) {
1358: HTList *cur = e->m_children;
1359: HTElement *ele = NULL;
1360: if (HTList_isEmpty(e->m_children)) {
1361: HTPrint("Empty RDF Element\n");
1362: return NO;
1363: }
1364: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1365: if (HTRDF_isDescription(me, ele)) {
1366: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1367: me->m_bCreateBags);
1368: } else if (HTRDF_isContainer(me, ele)) {
2.3 frystyk 1369: char * c = HTRDF_processContainer(me, ele);
2.1 frystyk 1370: HT_FREE(c);
1371: } else if (HTRDF_isTypedPredicate(me, ele)) {
2.3 frystyk 1372: char * t = HTRDF_processTypedNode(me, ele);
2.1 frystyk 1373: HT_FREE(t);
1374: }
1375: }
1376: return YES;
1377: }
1378: return NO;
1379: }
1380:
1381: /*
1382: * processPredicate handles all elements not defined as special
1383: * RDF elements.
1384: *
1385: * predicate The predicate element itself
1386: * description Context for the predicate
1387: * sTarget The target resource
1388: * reificate Should this predicate be reificated
1389: *
1390: * return the new ID which can be used to identify the predicate
1391: *
1392: */
2.3 frystyk 1393: PRIVATE char * HTRDF_processPredicate (HTRDF * me,
2.1 frystyk 1394: HTElement * predicate,
1395: HTElement * description,
2.3 frystyk 1396: char * sTarget,
2.1 frystyk 1397: BOOL reificate)
1398: {
2.3 frystyk 1399: char * sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1400: char * nsStatementID = NULL;
1401: char * sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1402: char * sResource = HTRDF_getResource(me, predicate);
2.1 frystyk 1403:
1404: /*
1405: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1406: ** or xmlns... -> generate new triples according to the spec.
1407: ** (See end of Section 6)
1408: */
1409: {
1410: HTElement * place_holder = NULL;
1411: HTAssocList * newAL = HTAssocList_new();
2.3 frystyk 1412: char * fName = NULL;
1413: char * aName = NULL;
2.1 frystyk 1414:
1415: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1416: place_holder = HTElement_new(fName, newAL);
1417: HT_FREE(fName);
1418:
1419: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1420:
1421: /* error checking */
1422: if (!HTList_isEmpty(predicate->m_children)) {
1423: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1424: HTElement_delete(place_holder);
1425: return NULL;
1426: }
1427: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1428:
1429: /* determine the 'about' part for the new statements */
1430: if (sStatementID) {
1431: HTElement *data = HTElement_new2(sStatementID);
1432: HTElement_addAttribute(place_holder, aName, sStatementID);
1433:
1434: /* hack: make rdf:ID the value of the predicate */
1435: HTElement_addChild(predicate, data);
1436: } else if (sResource) {
1437: HTElement_addAttribute(place_holder, aName, sResource);
1438: } else {
1439: nsStatementID = HTRDF_newReificationID(me);
1440: HTElement_addAttribute(place_holder, aName, nsStatementID);
1441: HT_FREE(nsStatementID);
1442: }
1443: HT_FREE(aName);
1444:
1445: if (sBagID) {
1446: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1447: HTElement_addAttribute(place_holder, fName, sBagID);
1448: HT_FREE(fName);
1449: StrAllocCopy(place_holder->m_sBagID, sBagID);
1450: }
1451: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1452: } else {
1453:
1454: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1455: HTElement_delete(place_holder);
1456: }
1457: }
1458:
1459: /*
1460: ** Tricky part: if the resource attribute is present for a predicate
1461: ** AND there are no children, the value of the predicate is either
1462: ** 1. the URI in the resource attribute OR
1463: ** 2. the node ID of the resolved #resource attribute
1464: */
1465: if (sResource && HTList_isEmpty(predicate->m_children)) {
1466: if (!HTElement_target(predicate)) {
1467: if (reificate) {
1468: HT_FREE(nsStatementID);
1469: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1470: sTarget, sResource,
1471: predicate->m_sID);
1472: StrAllocCopy(predicate->m_sID, nsStatementID);
1473: } else {
1474: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1475: }
1476: } else {
1477: HTElement *target = HTElement_target(predicate);
1478: if (reificate) {
1479: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1480: sTarget,
1481: target->m_sID,
1482: predicate->m_sID);
1483: StrAllocCopy(predicate->m_sID, nsStatementID);
1484: } else {
1485: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1486: }
1487: }
2.7 ! barstow 1488: if (nsStatementID && predicate->m_sID)
! 1489: StrAllocCopy(nsStatementID, predicate->m_sID);
2.1 frystyk 1490: return nsStatementID;
1491: }
1492:
1493: /*
1494: ** Does this predicate make a reference somewhere using the
1495: ** sResource attribute
1496: */
1497: if (sResource && HTElement_target(predicate)) {
2.3 frystyk 1498: char * dStatementID = HTRDF_processDescription(me,
2.1 frystyk 1499: HTElement_target(predicate),
1500: YES, NO, NO);
1501: if (reificate) {
1502: HT_FREE(nsStatementID);
1503: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1504: sTarget, dStatementID,
1505: predicate->m_sID);
1506: StrAllocCopy(predicate->m_sID, nsStatementID);
1507: } else {
1508: StrAllocCopy(nsStatementID, dStatementID);
1509: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1510: }
1511: return nsStatementID;
1512: }
1513:
1514: /*
1515: ** Before looping through the children, let's check
1516: ** if there are any. If not, the value of the predicate is
1517: ** an anonymous node
1518: */
1519: {
1520: HTList *cur = predicate->m_children;
1521: BOOL bUsedTypedNodeProduction = NO;
1522: HTElement *n2;
2.7 ! barstow 1523: if (nsStatementID && sStatementID)
! 1524: StrAllocCopy(nsStatementID, sStatementID);
2.1 frystyk 1525: if (HTList_isEmpty(cur)) {
1526: if (reificate) {
2.3 frystyk 1527: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1528: HT_FREE(nsStatementID);
1529: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1530: sTarget, nr,
1531: predicate->m_sID);
1532: HT_FREE(nr);
1533: } else {
2.3 frystyk 1534: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1535: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1536: HT_FREE(nr);
1537: }
1538: }
1539: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1540: if (HTRDF_isDescription(me, n2)) {
1541: HTElement *d2 = n2;
2.3 frystyk 1542: char * dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
2.7 ! barstow 1543: if (d2->m_sID && dStatementID && d2->m_sID != dStatementID)
! 1544: StrAllocCopy(d2->m_sID, dStatementID);
2.1 frystyk 1545:
1546: if (reificate) {
1547: HT_FREE(nsStatementID);
1548: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1549: sTarget, dStatementID,
1550: predicate->m_sID);
1551: } else {
1552: StrAllocCopy(nsStatementID, dStatementID);
1553: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1554: nsStatementID);
1555: }
1556: } else if (HTElement_instanceOfData(n2)) {
2.3 frystyk 1557: char * tValue = NULL;
1558: char * sValue = n2->m_sContent;
2.1 frystyk 1559: /* we've got real data */
1560: /*
1561: * Only if the content is not empty PCDATA (whitespace that is)
1562: * print the triple
1563: */
1564: tValue = trim(sValue);
1565: if (tValue && strlen(tValue) > 0) {
1566: if (reificate) {
1567: HT_FREE(nsStatementID);
1568: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1569: sTarget, tValue,
1570: predicate->m_sID);
1571: StrAllocCopy(predicate->m_sID, nsStatementID);
1572: } else {
1573: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1574: }
1575: }
1576: HT_FREE(tValue);
1577: } else if (HTRDF_isContainer(me, n2)) {
1578: HTElement *target = HTElement_target(description);
2.3 frystyk 1579: char * aboutTarget =
2.1 frystyk 1580: target ?
1581: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
2.3 frystyk 1582: char * sCollectionID = HTRDF_processContainer(me, n2);
2.1 frystyk 1583: StrAllocCopy(nsStatementID, sCollectionID);
1584: /* Attach the collection to the current predicate */
1585: if (target) {
1586: if (reificate) {
1587: HT_FREE(nsStatementID);
1588: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1589: aboutTarget,
1590: sCollectionID,
1591: predicate->m_sID);
1592: StrAllocCopy(predicate->m_sID, nsStatementID);
1593: } else {
1594: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1595: sCollectionID);
1596: }
1597: } else {
1598: if (reificate) {
1599: HT_FREE(nsStatementID);
1600: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1601: sTarget, sCollectionID,
1602: predicate->m_sID);
1603: StrAllocCopy(predicate->m_sID, nsStatementID);
1604: } else {
1605: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1606: sCollectionID);
1607: }
1608: }
1609: HT_FREE(sCollectionID);
1610: } else if (HTRDF_isTypedPredicate(me, n2)) {
1611: if (bUsedTypedNodeProduction) {
1612: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1613: } else {
1614: bUsedTypedNodeProduction = YES;
1615: }
1616: HT_FREE(nsStatementID);
1617: nsStatementID = HTRDF_processTypedNode(me, n2);
1618: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1619: }
1620: }
1621: return nsStatementID;
1622: } /* end of block */
1623: return NULL;
1624: }
1625:
1626: /*
1627: * processDescription manages Description elements
1628: *
1629: * description The Description element itself
1630: * inPredicate Is this is a nested description
1631: * reificate Do we need to reificate
1632: * createBag Do we create a bag container
1633: *
1634: * return An ID for the description
1635: *
1636: */
2.3 frystyk 1637: PUBLIC char * HTRDF_processDescription (HTRDF * me,
2.1 frystyk 1638: HTElement * description,
1639: BOOL inPredicate,
1640: BOOL reificate,
1641: BOOL createBag)
1642: {
1643: int iChildCount = 1;
1644: BOOL bOnce = YES;
1645:
2.3 frystyk 1646: char * sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1647: char * sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1648: char * sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
2.1 frystyk 1649: "aboutEachPrefix");
2.3 frystyk 1650: char * sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1651: char * sID = HTElement_getAttribute2(description, RDFMS, "ID");
2.1 frystyk 1652: HTElement *target = HTElement_target(description);
1653: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1654: BOOL targetIsContainer = NO;
2.3 frystyk 1655: char * sTargetAbout = NULL;
1656: char * sTargetBagID = NULL;
1657: char * sTargetID = NULL;
1658: char * dName = NULL;
1659: char * aName = NULL;
2.1 frystyk 1660:
1661: /*
1662: ** Return immediately if the description has already been managed
1663: */
1664: if (description->m_bDone) return description->m_sID;
1665:
1666: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1667: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1668:
1669: /*
1670: ** Determine what the target of the Description reference is
1671: */
1672: if (hasTarget) {
2.3 frystyk 1673: char * sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
2.1 frystyk 1674: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1675: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1676: if (me->m_sSource && sTargetID2) {
1677: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1678: } else {
1679: StrAllocCopy(sTargetID, sTargetID2);
1680: }
1681: /*
1682: * Target is collection if
1683: * 1. it is identified with bagID attribute
1684: * 2. it is identified with ID attribute and is a collection
1685: */
1686: if (sTargetBagID && sAbout) {
1687: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1688: } else {
1689: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1690: HTRDF_isContainer(me, target))
1691: targetIsContainer = YES;
1692: }
1693: HT_FREE(sTargetID);
1694: }
1695:
1696: /*
1697: * Check if there are properties encoded using the abbreviated
1698: * syntax
1699: */
1700: HTRDF_expandAttributes(me, description, description);
1701:
1702: /*
1703: * Manage the aboutEach attribute here
1704: */
1705: if (sAboutEach && hasTarget) {
1706: if (HTRDF_isContainer(me, target)) {
1707: HTList *cur = target->m_children;
1708: HTElement *ele = NULL;
1709: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1710: if (HTRDF_isListItem(me, ele)) {
2.3 frystyk 1711: char * sResource = HTRDF_getResource(me, ele);
2.1 frystyk 1712: if (sResource) {
1713: HTElement * newDescription = NULL;
1714: HTElement * ele2;
1715: HTList * cur2 = description->m_children;
1716:
1717: /*
1718: * Manage <li resource="..." /> case
1719: */
1720: if (sResource) {
1721: HTAssocList *newAL = HTAssocList_new();
1722: newDescription = HTElement_new(dName, newAL);
1723: HTElement_addAttribute(newDescription, aName, sResource);
1724: }
1725:
1726: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1727: if (newDescription) HTElement_addChild(newDescription, ele2);
1728: }
1729:
1730: if (newDescription)
1731: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1732:
1733: /* Not needed anymore */
1734: HTElement_delete(newDescription);
1735:
1736: } else {
1737: /**
1738: * Otherwise we have a structured value inside <li>
1739: *
1740: * loop through the children of <li>
1741: * (can be only one)
1742: */
1743: HTList *cur2 = ele->m_children;
1744: HTElement *ele2 = NULL;
1745: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1746: HTAssocList *newAL = HTAssocList_new();
1747: HTElement *newNode = HTElement_new(dName, newAL);
1748: HTList *cur3 = description->m_children;
1749: HTElement *ele3 = NULL;
1750: /* loop through the items in the
1751: * description with aboutEach
1752: * and add them to the target
1753: */
1754: while ((ele3 = (HTElement *)
1755: HTList_nextObject(cur3))) {
1756: HTElement_addChild(newNode, ele3);
1757: }
1758: HTElement_addTarget(newNode, ele2);
1759: HTRDF_processDescription(me, newNode, YES, NO, NO);
1760: }
1761: }
1762: } else if (HTRDF_isTypedPredicate(me, ele)) {
1763: HTAssocList *newAL = HTAssocList_new();
1764: HTElement *newNode = HTElement_new(dName, newAL);
1765: HTList *cur2 = description->m_children;
1766: HTElement *ele2 = NULL;
1767: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1768: HTElement_addChild(newNode, ele2);
1769: }
1770: HTElement_addTarget(newNode, ele);
1771: HTRDF_processDescription(me, newNode, YES, NO, NO);
1772: }
1773: } /* end of while */
1774: } else if (HTRDF_isDescription(me, target)) {
1775: HTList *cur = target->m_children;
1776: HTElement *ele = NULL;
1777: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1778: HTAssocList *newAL = HTAssocList_new();
1779: HTElement *newNode = HTElement_new(dName, newAL);
1780: HTList *cur2 = description->m_children;
1781: HTElement *ele2 = NULL;
1782: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1783: HTElement_addChild(newNode, ele2);
1784: }
1785: HTElement_addTarget(newNode, ele);
1786: HTRDF_processDescription(me, newNode, YES, NO, NO);
1787: } /* end of while */
1788: }
1789:
1790: HT_FREE(dName);
1791: HT_FREE(aName);
1792: return NULL;
1793: }
1794:
1795: /*
1796: * Manage the aboutEachPrefix attribute here
1797: */
1798: if (sAboutEachPrefix) {
1799: if (hasTarget) {
1800: HTList *cur = description->m_vTargets;
1801: HTElement *target = NULL;
1802: while ((target = (HTElement *) HTList_nextObject(cur))) {
1803: HTList *cur2 = description->m_children;
1804: HTElement *ele2 = NULL;
1805: HTElement *newDescription = NULL;
1806: HTAssocList *newAL = HTAssocList_new();
1807: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1808: newDescription = HTElement_new(dName, newAL);
1809: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1810: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1811: HTElement_addChild(newDescription, ele2);
1812: }
1813: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1814: }
1815: }
1816:
1817: HT_FREE(dName);
1818: HT_FREE(aName);
1819: return NULL;
1820: }
1821: /*
1822: * Enumerate through the children
1823: */
1824: {
1825: HTList *cur = description->m_children;
1826: HTElement *n = NULL;
1827: while ((n = (HTElement *) HTList_nextObject(cur))) {
1828: if (HTRDF_isDescription(me, n))
1829: HTPrint("Can not nest Description inside Description\n");
1830: else if (HTRDF_isListItem(me, n))
1831: HTPrint("Can not nest List Item inside Description\n");
1832: else if (HTRDF_isContainer(me, n))
1833: HTPrint("Can not nest Container inside Description\n");
1834: else if (HTRDF_isTypedPredicate(me, n)) {
2.3 frystyk 1835: char * sChildID = NULL;
2.1 frystyk 1836: if (hasTarget && targetIsContainer) {
1837: sChildID = HTRDF_processPredicate(me, n, description,
1838: target->m_sBagID ?
1839: target->m_sBagID :
1840: target->m_sID, NO);
1841: StrAllocCopy(description->m_sID, sChildID);
1842: createBag = NO;
1843: } else if (hasTarget) {
1844: sChildID = HTRDF_processPredicate(me, n, description,
1845: target->m_sBagID ?
1846: target->m_sBagID :
1847: target->m_sID, reificate);
1848: StrAllocCopy(description->m_sID, sChildID);
1849: } else if (!hasTarget && !inPredicate) {
1850: if (!description->m_sID) {
2.3 frystyk 1851: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1852: StrAllocCopy(description->m_sID, nr);
1853: HT_FREE(nr);
1854: }
1855: if (!sAbout) {
1856: if (sID)
1857: sAbout = sID;
1858: else
1859: sAbout = description->m_sID;
1860: }
1861: sChildID = HTRDF_processPredicate(me, n, description,
1862: sAbout, sBagid ?
1863: YES : reificate);
1864:
1865: } else if (!hasTarget && inPredicate) {
1866: if (!sAbout) {
1867: if (sID) {
1868: StrAllocCopy(description->m_sID, sID);
1869: sAbout = sID;
1870: } else {
1871: if (!description->m_sID) {
2.3 frystyk 1872: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1873: StrAllocCopy(description->m_sID, nr);
1874: HT_FREE(nr);
1875: }
1876: sAbout = description->m_sID;
1877: }
1878: } else {
2.7 ! barstow 1879: if (description->m_sID != sAbout)
! 1880: StrAllocCopy(description->m_sID, sAbout);
2.1 frystyk 1881: }
1882: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1883: }
1884: /*
1885: * Each Description block creates also a Bag node which
1886: * has links to all properties within the block IF
1887: * the m_bCreateBags variable is true
1888: */
1889: if (sBagid || (me->m_bCreateBags && createBag)) {
2.3 frystyk 1890: char * sNamespace = RDFMS;
2.1 frystyk 1891: if (bOnce && sChildID) {
2.3 frystyk 1892: char * tName = NULL;
1893: char * bName = NULL;
2.1 frystyk 1894: bOnce = NO;
1895: if (!description->m_sBagID) {
2.3 frystyk 1896: char * nr = HTRDF_newReificationID(me);
2.1 frystyk 1897: StrAllocCopy(description->m_sBagID, nr);
1898: HT_FREE(nr);
1899: }
1900: if (!description->m_sID)
1901: StrAllocCopy(description->m_sID,
1902: description->m_sBagID);
1903: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1904: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1905: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1906: HT_FREE(tName);
1907: HT_FREE(bName);
1908:
1909: }
1910: if (sChildID) {
2.3 frystyk 1911: char * tName = NULL;
2.1 frystyk 1912: char si[20];
1913: sprintf(si, "%d", iChildCount);
1914: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1915: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1916: iChildCount++;
1917: HT_FREE(tName);
1918: }
1919: }
1920: HT_FREE(sChildID);
1921: }
1922: }
1923: } /* end of block*/
1924:
1925: description->m_bDone = YES;
1926:
1927: HT_FREE(dName);
1928: HT_FREE(aName);
1929: return (description->m_sID);
1930: }
1931:
1932: /*
1933: * Given an XML document (well-formed HTML, for example),
1934: * look for a suitable element to start parsing from
1935: *
1936: */
1937: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1938: {
1939: if (me && ele) {
1940: if (HTRDF_isRDF(me, ele)) {
1941: if (HTRDF_isRDFroot(me, ele)) {
1942: HTRDF_processRDF(me, ele);
1943: } else if (HTRDF_isDescription(me, ele)) {
1944: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1945: me->m_bCreateBags);
1946: }
1947: } else {
1948: HTList *cur = ele->m_children;
1949: HTElement *child = NULL;
1950: while ((child = (HTElement *) HTList_nextObject(cur))) {
1951: HTRDF_processXML(me, child);
1952: }
1953: }
1954:
1955: /* MISSING RECURSION */
1956:
1957: return YES;
1958: }
1959: return NO;
1960: }
1961:
1962: /*
1963: * Return the root element pointer. This requires the parsing
1964: * has been already done.
1965: */
1966: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1967: {
1968: return me ? me->m_root : NULL;
1969: }
1970:
1971: /*
1972: * Return the full namespace URI for a given prefix sPrefix.
1973: * The default namespace is identified with xmlns prefix.
1974: * The namespace of xmlns attribute is an empty string.
1975: */
1976:
2.3 frystyk 1977: PUBLIC char * HTRDF_namespace(HTRDF * me, char * sPrefix)
2.1 frystyk 1978: {
2.3 frystyk 1979: char * nPrefix = NULL;
2.1 frystyk 1980: HTAssocList * calist;
1981: HTList * cur = me->m_namespaceStack;
1982:
1983: if (!sPrefix)
1984: StrAllocCopy(nPrefix, "xmlns");
1985:
1986: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
2.3 frystyk 1987: char * sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
2.1 frystyk 1988: if (sValue) {
1989: StrAllocCopy(nPrefix, sValue);
1990: return nPrefix;
1991: }
1992: }
1993: /*
1994: * Give error only if
1995: * 1. the prefix is not from the reserved xml namespace
1996: * 2. the prefix is not xmlns which is to look for the default
1997: * namespace
1998: */
1999: if (!strcmp(sPrefix, XMLSCHEMA)) {
2000: StrAllocCopy(nPrefix, sPrefix);
2001: return nPrefix;
2002: } else if (!strcmp(sPrefix, "xmlns")) {
2003: StrAllocCopy(nPrefix, "");
2004: return nPrefix;
2005: } else
2006: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
2007:
2008: StrAllocCopy(nPrefix, "");
2009: return nPrefix;
2010: }
2011:
2012: /*
2013: * Methods to determine whether we are parsing
2014: * parseType="Literal" or parseType="Resource"
2015: */
2016:
2017: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
2018: {
2019: HTElement *e = NULL;
2020: HTList *cur = me->m_elementStack;
2021: if (!HTList_isEmpty(me->m_elementStack)) {
2022: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2023: char * sParseType = NULL;
2.1 frystyk 2024: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2025: if (sParseType) {
2026: if (strcmp(sParseType, "Resource"))
2027: return YES;
2028: }
2029: }
2030: }
2031: return NO;
2032: }
2033:
2034: /*
2035: * Methods to determine whether we are parsing
2036: * parseType="Literal" or parseType="Resource"
2037: */
2038:
2039: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
2040: {
2041: HTElement *e = NULL;
2042: HTList *cur = me->m_elementStack;
2043: if (!HTList_isEmpty(me->m_elementStack)) {
2044: while((e = (HTElement *) HTList_nextObject(cur))) {
2.3 frystyk 2045: char * sParseType = NULL;
2.1 frystyk 2046: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2047: if (sParseType) {
2048: if (!strcmp(sParseType, "Resource"))
2049: return YES;
2050: }
2051: }
2052: }
2053: return NO;
2054: }
2055: /*
2056: * checkAttributes goes through the attributes of element e<
2057: * to see
2058: * 1. if there are symbolic references to other nodes in the data model.
2059: * in which case they must be stored for later resolving with
2060: * resolveLater method.
2061: * 2. if there is an identity attribute, it is registered using
2062: * registerResource or registerID method.
2063: *
2064: */
2065:
2066: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2067: {
2068: {
2.3 frystyk 2069: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2070:
2071: if (sResource && sResource[0] == '#')
2072: HTRDF_resolveLater(me, e);
2073: }
2074: {
2.3 frystyk 2075: char * sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2.1 frystyk 2076:
2077: if (sAboutEach && sAboutEach[0] == '#')
2078: HTRDF_resolveLater(me, e);
2079: }
2080: {
2.3 frystyk 2081: char * sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2.1 frystyk 2082: "aboutEachPrefix");
2083:
2084: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2085: HTRDF_resolveLater(me, e);
2086: }
2087: {
2.3 frystyk 2088: char * sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2.1 frystyk 2089: if (sAbout) {
2090: if (sAbout[0] == '#')
2091: HTRDF_resolveLater(me, e);
2092: else
2093: HTRDF_registerResource(me, e);
2094: }
2095: }
2096:
2097: {
2.3 frystyk 2098: char * sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2.1 frystyk 2099:
2100: if (sBagID) {
2101: HTRDF_registerID(me, sBagID, e);
2102: StrAllocCopy(e->m_sBagID, sBagID);
2103: }
2104: }
2105: {
2.3 frystyk 2106: char * sID = HTElement_getAttribute2(e, RDFMS, "ID");
2.1 frystyk 2107: if (sID) {
2108: HTRDF_registerID(me, sID, e);
2109: StrAllocCopy(e->m_sID, sID);
2110: }
2111: }
2112: }
2113: /*
2114: * Add the element e to the m_vResolveQueue
2115: * to be resolved later.
2116: */
2117: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2118: {
2119: HTList_addObject(me->m_vResolveQueue, e);
2120: }
2121: /*
2122: * Add an element e to the Hashtable m_hIDtable
2123: * which stores all nodes with an ID
2124: */
2125:
2.3 frystyk 2126: PUBLIC void HTRDF_registerID(HTRDF *me, char * sID, HTElement *e)
2.1 frystyk 2127: {
2128: if (HTHashtable_object(me->m_hIDtable, sID))
2129: HTPrint("Node ID %s redefined", sID);
2130: HTHashtable_addObject(me->m_hIDtable, sID, e);
2131: }
2132: /*
2133: * Add an element e to the Vector m_vResources
2134: * which stores all nodes with an URI
2135: */
2136: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2137: {
2138: HTList_addObject(me->m_vResources, e);
2139: }
2140:
2141: /*
2142: * Look for a node by name sID from the Hashtable
2143: * m_hIDtable of all registered IDs.
2144: */
2145:
2.3 frystyk 2146: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, char * sID)
2.1 frystyk 2147: {
2148: if (sID)
2149: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2150: return NULL;
2151: }
2152:
2153: /*
2154: ** Special method to deal with rdf:resource attribute
2155: */
2.3 frystyk 2156: PUBLIC char * HTRDF_getResource(HTRDF *me, HTElement *e)
2.1 frystyk 2157: {
2.3 frystyk 2158: char * sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2.1 frystyk 2159: if (sResource != NULL && sResource[0] == '\0')
2160: sResource = me->m_sSource;
2161: return sResource;
2162: }
2163:
2164: /*
2165: ** Take an element ele with its parent element parent
2166: ** and evaluate all its attributes to see if they are non-RDF specific
2167: ** and non-XML specific in which case they must become children of
2168: ** the ele node.
2169: */
2170: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2171: {
2172: BOOL foundAbbreviation = NO;
2.3 frystyk 2173: char * sAttribute = NULL;
2174: char * sValue = NULL;
2.1 frystyk 2175: HTAssoc * assoc;
2176: HTAssocList * cur = ele->m_attributes;
2177: int lxmlschema = strlen(XMLSCHEMA);
2178: int lrdfms = strlen(RDFMS);
2179:
2180: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2181: int latt;
2182: sAttribute = HTAssoc_name(assoc);
2183: sValue = HTAssoc_value(assoc);
2.4 barstow 2184:
2.1 frystyk 2185: latt = strlen(sAttribute);
2186: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2187: continue;
2188:
2189: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2190: (sAttribute[lrdfms]!='_') &&
2191: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2192: strcmp(&(sAttribute[latt-4]), "type"))
2193: continue;
2194:
2195: if (strlen(sValue) > 0) {
2196: HTAssocList * newAL = HTAssocList_new();
2197: HTElement * newElement = HTElement_new(sAttribute, newAL);
2198: HTElement * newData = HTElement_new2(sValue);
2199: HTElement_addChild(newElement, newData);
2200: HTElement_addChild(parent, newElement);
2201: foundAbbreviation = YES;
2202: }
2203: }
2204: return foundAbbreviation;
2205: }
2206:
2207: /**
2208: * Create a new reification ID by using a name part and an
2209: * incremental counter m_iReificationCounter.
2210: */
2.3 frystyk 2211: PUBLIC char * HTRDF_newReificationID (HTRDF *me)
2.1 frystyk 2212: {
2.3 frystyk 2213: char * nsid = NULL;
2.1 frystyk 2214: char nsrc[20];
2215: me->m_iReificationCounter++;
2216: sprintf(nsrc, "%d", me->m_iReificationCounter);
2217: if (!me->m_sSource) {
2218: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2219: } else {
2220: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2221: }
2222: return nsid;
2223: }
2224:
2225: /*
2226: * reificate creates one new node and four new triples
2227: * and returns the ID of the new node
2228: */
2229:
2.3 frystyk 2230: PRIVATE char * HTRDF_reificate(HTRDF *me, char * sPredicate, char * sSubject,
2231: char * sObject, char * sNodeID)
2.1 frystyk 2232: {
2.3 frystyk 2233: char * sName = NULL;
2234: char * pName = NULL;
2235: char * oName = NULL;
2236: char * tName = NULL;
2237: char * stName = NULL;
2238: char * tNodeID = NULL;
2.1 frystyk 2239:
2240: if (!sNodeID)
2241: tNodeID = HTRDF_newReificationID(me);
2242: else
2243: StrAllocCopy(tNodeID, sNodeID);
2244:
2245: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2246: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2247: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2248: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2249: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2250:
2251: /*
2252: * The original statement must remain in the data model
2253: */
2254: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2255:
2256: /*
2257: * Do not reificate reificated properties
2258: */
2259: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2260: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2261:
2262: /* Reificate by creating 4 new triples */
2263: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2264: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2265: HTRDF_addTriple(me, oName, tNodeID, sObject);
2266: HTRDF_addTriple(me, tName, tNodeID, stName);
2267: } else
2268: HT_FREE(tNodeID);
2269:
2270: HT_FREE(sName);
2271: HT_FREE(pName);
2272: HT_FREE(oName);
2273: HT_FREE(tName);
2274: HT_FREE(stName);
2275:
2276: return tNodeID;
2277: }
2278: /*
2279: * Create a new triple and add it to the m_triples List
2280: * Send the triple to the Output stream
2281: */
2282:
2.3 frystyk 2283: PUBLIC void HTRDF_addTriple (HTRDF *me, char * sPredicate, char * sSubject,
2284: char * sObject)
2.1 frystyk 2285: {
2286: HTTriple *t = NULL;
2287:
2288: /*
2289: * If there is no subject (about=""), then use the URI/filename where
2290: * the RDF description came from
2291: */
2292: if (!sPredicate || !sSubject || !sObject) {
2293: HTPrint("Predicate %s when subject %s and object %s \n",
2294: sPredicate ? sPredicate : "null",
2295: sSubject ? sSubject : "null",
2296: sObject ? sObject : "null");
2297: return;
2298: }
2299:
2300: if (sSubject[0]=='\0')
2301: sSubject = me->m_sSource;
2302:
2303: t = HTTriple_new(sPredicate, sSubject, sObject);
2304:
2305: /* Call the triple callback handler (if any) with this new triple */
2306: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2307:
2308: HTList_addObject(me->m_triples, t);
2309: }
2310:
2311: /*
2312: * createBags method allows one to determine whether SiRPAC
2313: * produces Bag instances for each Description block.
2314: * The default setting is not to generate them.
2315: */
2316:
2317: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2318: {
2319: if (me)
2320: me->m_bCreateBags = b;
2321: }
2322:
2323: /*
2324: Set output stream for RDF parser
2325: */
2326:
2327: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2328: {
2329: if (me)
2330: me->ostream = ostream;
2331: }
2332:
2333: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2334: {
2335: if (me) {
2336: me->newTripleInstance = cbf;
2337: me->tripleContext = context;
2338: return YES;
2339: }
2340: return NO;
2341: }
2342:
2343: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2344: {
2345: RDFInstance = me;
2346: RDFInstanceContext = context;
2347: return YES;
2348: }
2349:
2350: /* ------------------------------------------------------------------------- */
2351: /* HTRDFTriples STREAM HANDLERS */
2352: /* ------------------------------------------------------------------------- */
2353:
2354: PRIVATE int generate_triples(HTStream *me)
2355: {
2356: HTRDF *rdfp = me ? me->rdfparser : NULL;
2357: if (rdfp) {
2358:
2359: HTRDF_resolve(rdfp);
2360:
2361: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2362:
2363: return HT_OK;
2364: }
2365: return HT_ERROR;
2366: }
2367:
2368: PRIVATE int HTRDFTriples_flush (HTStream * me)
2369: {
2370: if (me->target)
2371: return (*me->target->isa->flush)(me->target);
2372: return HT_OK;
2373: }
2374:
2375: PRIVATE int HTRDFTriples_free (HTStream * me)
2376: {
2377: int status = HT_OK;
2378:
2379: status = generate_triples(me);
2380:
2381: HTRDF_delete(me->rdfparser);
2382:
2383: if (me->target) {
2384: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2385: return HT_WOULD_BLOCK;
2386: }
2387: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2388: HT_FREE(me);
2389: return status;
2390: }
2391:
2392: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2393: {
2394: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2395: HTRDF_delete(me->rdfparser);
2396: if (me->target)
2397: (*me->target->isa->abort)(me->target, NULL);
2398: HT_FREE(me);
2399: return HT_ERROR;
2400: }
2401:
2402: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2403: {
2404: return HT_OK;
2405: }
2406:
2407: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2408: {
2409: return HTRDFTriples_write(me, &c, 1);
2410: }
2411:
2412: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2413: {
2414: return HTRDFTriples_write(me, s, (int) strlen(s));
2415: }
2416:
2417: PRIVATE const HTStreamClass HTRDFTriplesClass =
2418: {
2419: "rdf",
2420: HTRDFTriples_flush,
2421: HTRDFTriples_free,
2422: HTRDFTriples_abort,
2423: HTRDFTriples_putCharacter,
2424: HTRDFTriples_putString,
2425: HTRDFTriples_write
2426: };
2427:
2428: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2429: void * param,
2430: HTFormat input_format,
2431: HTFormat output_format,
2432: HTStream * output_stream)
2433: {
2434: HTStream * me = NULL;
2435: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2436: HT_OUTOFMEM("HTRDFTriples_new");
2437: me->isa = &HTRDFTriplesClass;
2438: me->state = HT_OK;
2439: me->request = request;
2440: me->target = output_stream ? output_stream : HTErrorStream();
2441:
2442: /* Now create the RDF parser instance */
2443: if ((me->rdfparser = HTRDF_new()) == NULL) {
2444: HT_FREE(me);
2445: return HTErrorStream();
2446: }
2447:
2448: /* Set the source (I guess mostly to follow SiRPAC API) */
2449: {
2450: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2451: HTRDF_setSource(me->rdfparser, uri);
2452: HT_FREE(uri);
2453: }
2454:
2455: /* Where are we putting data? */
2456: HTRDF_setOutputStream(me->rdfparser, me);
2457:
2458: /* If you want to create Bags, change it to YES */
2459: HTRDF_createBags(me->rdfparser, NO);
2460:
2461: /* Register our new XML Instance handler */
2462: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2463: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2464:
2465: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2466:
2467: return me;
2468: }
2469:
2470: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2471: void * param,
2472: HTFormat input_format,
2473: HTFormat output_format,
2474: HTStream * output_stream)
2475: {
2476: return HTXML_new(request, param, input_format, output_format,
2477: RDFParser_new(request, param, input_format, output_format, output_stream));
2478: }
2479:
2480: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2481: {
2482: if (rdfp && t) {
2483: HTStream *ostream = rdfp->ostream;
2484: if (ostream) {
2485: PUTC(ostream,'(');
2486: PUTS(ostream, t->m_sPredicate);
2487: PUTC(ostream,',');
2488: PUTS(ostream, t->m_sSubject);
2489: PUTC(ostream,',');
2490: PUTS(ostream, t->m_sObject);
2491: PUTC(ostream,')');
2492: PUTC(ostream,'\n');
2493: }
2494: }
2495: }
2496:
2497: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2498: void * param,
2499: HTFormat input_format,
2500: HTFormat output_format,
2501: HTStream * output_stream)
2502: {
2.2 frystyk 2503: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2504: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2505:
2506: /* Register our own tripple instance handler */
2507: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2508:
2509: /* Create an XML parser instance and return */
2510: return HTXML_new(request, param, input_format, output_format, me);
2511: }
2512:
2.6 kahan 2513: /* HTRDFParseFile
2514: ** ---------------
2515: ** This function parses a file of RDF in a synchronous, non-blocking
2516: ** way. In other words, the file is not asynchronously loaded. If
2517: ** the file is successfully parsed, NULL is returned; otherwise a
2518: ** pointer to an error message is returned. The caller must NOT
2519: ** free the pointer returned by this function.
2520: */
2521:
2.5 barstow 2522: PUBLIC char * HTRDFParseFile (const char *file_name, HTTripleCallback_new * new_triple_callback)
2523: {
2524: char buff[512]; /* the file input buffer */
2525: FILE *fp;
2526: XML_Parser xmlparser;
2527: HTRDF *rdfparser;
2528: HTStream * stream = NULL;
2.6 kahan 2529: char *uri = NULL;
2.5 barstow 2530:
2531: /* Sanity check */
2.6 kahan 2532: if (!file_name) {
2533: HTTRACE(XML_TRACE, "RDFParseFile.. file name is NULL\n");
2534: return "RDFParseFile: file_name is NULL";
2535: }
2536:
2.5 barstow 2537:
2538: /* If the file does not exist, return now */
2539: fp = fopen (file_name, "r");
2.6 kahan 2540: if (!fp) { /* annotation index file doesn't exist */
2541: HTTRACE(XML_TRACE, "RDFParseFile.. file open failed\n");
2542: return "RDFParseFile: file open failed";
2543: }
2.5 barstow 2544:
2545: /* We need an XML parser */
2546: #ifdef USE_NS
2547: xmlparser = XML_ParserCreateNS (NULL, ':');
2548: #else
2549: xmlparser = XML_ParserCreate (NULL);
2550: #endif /* USE_NS */
2551:
2552: if (!xmlparser) {
2.6 kahan 2553: fclose (fp);
2554: HTTRACE(XML_TRACE, "RDFParseFile.. Could not create an XML parser\n");
2555: return "RDFParseFile: Could not create an XML parser";
2.5 barstow 2556: }
2557:
2558: /* We need also need RDF parser to create the triples */
2559: rdfparser = HTRDF_new();
2560: if (!rdfparser) {
2561: fclose (fp);
2562: XML_ParserFree(xmlparser);
2563: return "RDFParseFile: Could not allocate memory for RDF parser";
2564: }
2565:
2566: /* Must construct a URI from file_name for the parser */
2.6 kahan 2567: uri = HTLocalToWWW (file_name, "file:");
2.5 barstow 2568:
2569: HTRDF_setSource(rdfparser, uri);
2570: HTRDF_createBags(rdfparser, NO);
2571:
2572: if (new_triple_callback)
2573: HTRDF_registerNewTripleCallback(rdfparser, new_triple_callback, NULL);
2574: else
2575: HTRDF_registerNewTripleCallback(rdfparser, triple_newInstance, NULL);
2576:
2577: rdf_setHandlers(xmlparser);
2578: XML_SetUserData(xmlparser, rdfparser);
2579:
2580: /* Create a stream to be used to process the triple output */
2581: if ((stream = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) {
2.6 kahan 2582: HT_FREE(uri);
2.5 barstow 2583: fclose (fp);
2584: XML_ParserFree(xmlparser);
2585: HTRDF_delete(rdfparser);
2586: return "RDFParseFile: Could not allocate memory for HTStream";
2587: }
2588: stream->isa = &HTRDFTriplesClass;
2589: stream->state = HT_OK;
2590: stream->request = NULL; /* Don't have a request */
2591: stream->target = NULL; /* Don't have another stream */
2592: stream->rdfparser = rdfparser;
2593:
2594: /*
2595: * The parsing occurs on one read buffer at a time instead of
2596: * reading everything into memory and then parsing
2597: */
2598: for (;;) {
2599: int done;
2600: int buff_len;
2601: fgets(buff, sizeof(buff), fp);
2602: if (ferror(fp)) {
2.6 kahan 2603: HT_FREE(uri);
2.5 barstow 2604: fclose (fp);
2605: XML_ParserFree(xmlparser);
2606: HTRDF_delete(rdfparser);
2607: HT_FREE(stream);
2608: return "RDFParseFile: error reading file";
2609: }
2610: done = feof(fp);
2611: if (done)
2612: buff_len = 0;
2613: else
2614: buff_len = strlen (buff);
2615: if (! XML_Parse(xmlparser, buff, buff_len, done)) {
2616: fprintf (stderr, "Parse error at line %d:\n%s\n",
2617: XML_GetCurrentLineNumber(xmlparser),
2618: XML_ErrorString(XML_GetErrorCode(xmlparser)));
2.6 kahan 2619: HT_FREE(uri);
2.5 barstow 2620: fclose(fp);
2621: XML_ParserFree(xmlparser);
2622: HTRDF_delete(rdfparser);
2623: HT_FREE(stream);
2624: return "RDFParseFile: parse error";
2625: }
2626: if (done)
2627: break;
2628: }
2629:
2630: /* The file has been parsed, generate the triples */
2631: generate_triples(stream);
2632:
2633: /* Cleanup */
2.6 kahan 2634: HT_FREE(uri);
2.5 barstow 2635: fclose (fp);
2636: XML_ParserFree(xmlparser);
2637: HTRDF_delete(rdfparser);
2638: HT_FREE(stream);
2639:
2640: return NULL;
2641: }
2.6 kahan 2642:
2643:
2644:
2645:
2646:
2647:
2648:
Webmaster