Annotation of libwww/Library/src/HTRDF.c, revision 2.2
2.1 frystyk 1: /*
2: ** RDF PARSER
3: **
2.2 ! frystyk 4: ** @(#) $Id: HTRDF.c,v 2.1 1999/04/18 20:23:51 frystyk Exp $
2.1 frystyk 5: **
6: ** Copyright © 1995-1998 World Wide Web Consortium, (Massachusetts
7: ** Institute of Technology, Institut National de Recherche en
8: ** Informatique et en Automatique, Keio University). All Rights
9: ** Reserved. This program is distributed under the W3C's Software
10: ** Intellectual Property License. This program is distributed in the hope
11: ** that it will be useful, but WITHOUT ANY WARRANTY; without even the
12: ** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
13: ** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more
14: ** details.
15: **
16: ** This module requires expat
17: **
18: ** AUTHOR:
19: ** John Punin <puninj@cs.rpi.edu>,
20: ** Henrik
21: */
22:
23: #include "WWWLib.h"
24: #include "WWWInit.h"
25: #include "WWWXML.h"
26: #include "HTRDF.h"
27:
28: struct _HTStream {
29: const HTStreamClass * isa;
30: int state;
31: HTRequest * request;
32: HTStream * target;
33: HTRDF * rdfparser;
34: };
35:
36: #define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))
37: #define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))
38:
39: struct _HTTriple {
40: String m_sPredicate;
41: String m_sSubject;
42: String m_sObject;
43: };
44:
45: struct _HTElement {
46: String m_sName;
47: HTAssocList * m_attributes;
48: HTList * m_children;
49: String m_sID;
50: String m_sBagID;
51: HTList * m_vTargets;
52: BOOL m_bDone;
53: String m_sPrefix;
54: String m_sContent;
55: };
56:
57: struct _HTRDFParser {
58: HTList * m_namespaceStack;
59: HTList * m_elementStack;
60: HTElement * m_root;
61: HTList * m_triples;
62: String m_sSource;
63: HTList * m_vAllNameSpaces;
64:
65: BOOL m_bCreateBags;
66: BOOL m_bFetchSchemas;
67:
68: HTList * m_parseTypeStack;
69: HTList * m_parseElementStack;
70: String m_sLiteral;
71:
72: HTList * m_vResources;
73: HTList * m_vResolveQueue;
74: HTHashtable * m_hIDtable;
75: int m_iReificationCounter;
76:
77: HTStream * ostream;
78:
79: HTTripleCallback_new * newTripleInstance;
80: void * tripleContext;
81: };
82:
83: /* @@@ Should not be global but controlled by name spaces @@@ */
84: PRIVATE HTRDFCallback_new * RDFInstance = NULL;
85: PRIVATE void * RDFInstanceContext = NULL;
86:
87: PRIVATE String HTRDF_processContainer (HTRDF *me, HTElement *e);
88: PRIVATE String HTRDF_processPredicate (HTRDF *me, HTElement *predicate,
89: HTElement *description,
90: String sTarget,
91: BOOL reificate);
92: PRIVATE void HTRDF_processListItem (HTRDF *me,String sID, HTElement *listitem,
93: int iCounter);
94: PRIVATE void HTRDF_checkAttributes (HTRDF *me,HTElement *Element);
95: PRIVATE BOOL HTRDF_expandAttributes (HTRDF *me, HTElement *parent, HTElement *ele);
96: PRIVATE String HTRDF_reificate (HTRDF *me, String sPredicate, String sSubject,
97: String sObject, String sNodeID);
98:
99: /* ------------------------------------------------------------------------- */
100:
101: /*
102: ** Searches a whole list of Strings and returns true if the String is found.
103: */
104: PRIVATE BOOL HTList_contains (HTList *list, String s)
105: {
106: HTList *cur = list;
107: String cs = NULL;
108: while ((cs = (String) HTList_nextObject(cur))) {
109: if (!strcmp(cs, s)) return YES;
110: }
111: return NO;
112: }
113:
114: /*
115: ** Useful function that Trims a string
116: ** @@@ Should use HTStrip() @@@
117: */
118: PRIVATE char * trim (char *s)
119: {
120: char *p = NULL, *t = NULL;
121: int len = s ? strlen(s) : -1;
122: if (s && len > 0) {
123: StrAllocCopy(t, s);
124: p = &(s[len-1]);
125: while(p!=s) {
126: if (!isspace((int)(*p)))
127: break;
128: p--;
129: }
130: t[(int)(p-s)+1] = '\0';
131: if (isspace((int) t[(int)(p-s)]))
132: t[(int)(p-s)] = '\0';
133: }
134: return t;
135: }
136:
137: /* ------------------------------------------------------------------------- */
138: /* TRIPLE of RDF */
139: /* ------------------------------------------------------------------------- */
140:
141: PUBLIC HTTriple * HTTriple_new (String p, String s, String o)
142: {
143: HTTriple * me = NULL;
144: if (p && s && o) {
145: if ((me = (HTTriple *) HT_CALLOC(1, sizeof(HTTriple))) == NULL)
146: HT_OUTOFMEM("HTTriple_new");
147: StrAllocCopy(me->m_sPredicate, p);
148: StrAllocCopy(me->m_sSubject, s);
149: StrAllocCopy(me->m_sObject, o);
150: }
151: return me;
152: }
153:
154: PUBLIC BOOL HTTriple_delete (HTTriple * me)
155: {
156: if (me) {
157: HT_FREE(me->m_sPredicate);
158: HT_FREE(me->m_sSubject);
159: HT_FREE(me->m_sObject);
160: HT_FREE(me);
161: return YES;
162: }
163: return NO;
164: }
165:
166: PUBLIC void HTTriple_print (HTTriple * me)
167: {
168: if (me)
169: HTPrint("TRIPLE(%s,%s,%s)\n", me->m_sPredicate, me->m_sSubject,
170: me->m_sObject);
171: }
172:
173: PUBLIC String HTTriple_subject (HTTriple * me)
174: {
175: return me ? me->m_sSubject : NULL;
176: }
177:
178: PUBLIC String HTTriple_predicate (HTTriple * me)
179: {
180: return me ? me->m_sPredicate : NULL;
181: }
182:
183: PUBLIC String HTTriple_object (HTTriple * me)
184: {
185: return me ? me->m_sObject : NULL;
186: }
187:
188: /* ------------------------------------------------------------------------- */
189: /* ELEMENT of RDF */
190: /* ------------------------------------------------------------------------- */
191:
192: PUBLIC HTElement * HTElement_new (String sName, HTAssocList * al)
193: {
194: HTElement * me = NULL;
195: if (sName) {
196: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
197: HT_OUTOFMEM("HTElement_new");
198: StrAllocCopy(me->m_sName, sName);
199: me->m_attributes = al ? al : HTAssocList_new();
200: me->m_children = HTList_new();
201: /*me->m_nodes = HTAssocList_new();*/ /* Sirpac does not use nodes list */
202: me->m_vTargets = HTList_new();
203: me->m_bDone = FALSE;
204: }
205: return me;
206: }
207:
208: /*
209: ** Creates a Data Element and saves the data in the Content field.
210: ** Data Element does not have attributes
211: */
212: PUBLIC HTElement * HTElement_new2 (String sContent)
213: {
214: HTElement * me = NULL;
215: if (sContent) {
216: if ((me = (HTElement *) HT_CALLOC(1, sizeof(HTElement))) == NULL)
217: HT_OUTOFMEM("HTElement_new2");
218: StrAllocMCopy(&me->m_sName, "[DATA: ", sContent, "]", NULL);
219: me->m_attributes = NULL;
220: me->m_children = HTList_new();
221: /*me->m_nodes = HTAssocList_new();*/
222: me->m_vTargets = HTList_new();
223: me->m_bDone = FALSE;
224: StrAllocCopy(me->m_sContent, sContent);
225: }
226: return me;
227: }
228:
229: PUBLIC BOOL HTElement_addData (HTElement *me, String sContent)
230: {
231: if (me && sContent) {
232: int l = strlen(me->m_sName);
233: StrAllocCat(me->m_sContent, sContent);
234: me->m_sName[l-1]='\0';
235: StrAllocMCat(&me->m_sName, sContent, "]", NULL);
236: return YES;
237: }
238: return NO;
239: }
240:
241: PUBLIC BOOL HTElement_delete (HTElement * me)
242: {
243: if (me) {
244: HT_FREE(me->m_sName);
245: if (me->m_attributes) HTAssocList_delete(me->m_attributes);
246: if (me->m_children) HTList_delete(me->m_children);
247: HT_FREE(me->m_sID);
248: HT_FREE(me->m_sBagID);
249: if (me->m_vTargets) HTList_delete(me->m_vTargets);
250: HT_FREE(me->m_sPrefix);
251: HT_FREE(me->m_sContent);
252: HT_FREE(me);
253: return YES;
254: }
255: return NO;
256: }
257:
258: PUBLIC BOOL HTElement_addChild (HTElement * me, HTElement * element)
259: {
260: return (me && element) ? HTList_appendObject(me->m_children, element) : NO;
261: }
262:
263: PUBLIC BOOL HTElement_addAttribute (HTElement * me, String sName, String sValue)
264: {
265: return (me && sName && sValue) ?
266: HTAssocList_addObject(me->m_attributes, sName, sValue) : NO;
267: }
268:
269: PUBLIC BOOL HTElement_removeAttribute (HTElement * me, String sName)
270: {
271: return (me && sName) ? HTAssocList_removeObject(me->m_attributes, sName) : NO;
272: }
273:
274: PUBLIC String HTElement_getAttribute (HTElement * me, String sName)
275: {
276: return (me && sName) ? HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, sName) : NULL;
277: }
278:
279: PUBLIC String HTElement_getAttribute2 (HTElement * me, String sNamespace, String sName)
280: {
281: String fValue = NULL;
282: String fName = NULL;
283: if (me && sNamespace && sName) {
284: StrAllocMCopy(&fName, sNamespace, sName, NULL);
285: fValue = HTAssocList_findObjectCaseSensitiveExact(me->m_attributes, fName);
286: HT_FREE(fName);
287: }
288: return fValue;
289: }
290:
291: PUBLIC BOOL HTElement_addTarget (HTElement * me, HTElement * element)
292: {
293: return (me && element) ? HTList_addObject(me->m_vTargets, element) : NO;
294: }
295:
296: PUBLIC HTElement * HTElement_target (HTElement * me)
297: {
298: return me ? (HTElement *) HTList_lastObject(me->m_vTargets) : NULL;
299: }
300:
301: PUBLIC BOOL HTElement_instanceOfData (HTElement * me)
302: {
303: return (me && me->m_sContent) ? YES : NO;
304: }
305:
306: /* ------------------------------------------------------------------------- */
307: /* EXPAT HANDLERS */
308: /* ------------------------------------------------------------------------- */
309:
310: /*
311: * Called for each new element.
312: * Build up the document tree using an element stack
313: */
314: PRIVATE void XML_startElement (void * userData,
315: const XML_Char * name, const XML_Char ** atts)
316: {
317: HTRDF * rdfp = (HTRDF *) userData;
318: HTAssocList * namespaces = HTAssocList_new();
319: HTAssocList * newAL = HTAssocList_new();
320: int i = 0;
321: /**
322: * The following loop tries to identify special xmlns prefix
323: * attributes and update the namespace stack accordingly.
324: * While doing all this, it builds another AttributeList instance
325: * which will hold the expanded names of the attributes
326: * (I think this approach is only useful for RDF which uses
327: * attributes as an abbreviated syntax for element names)
328: */
329:
330: if (atts) {
331: while (atts[i]) {
332: String aName = (String ) atts[i];
333: if (!strcmp(aName, "xmlns")) {
334: String aValue = (String) atts[i+1];
335: int len = aValue ? strlen(aValue) : -1;
336: if (len == 0 && !rdfp->m_sSource)
337: aValue = rdfp->m_sSource;
338: HTAssocList_addObject(namespaces, aName, aValue);
339: /* save all non-RDF schema addresses */
340: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
341: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
342: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
343: String nname = NULL;
344: StrAllocCopy(nname, aValue);
345: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
346: }
347:
348: /* Special case: Don't save document's own address */
349: if (rdfp->m_sSource &&
350: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
351: String nname = NULL;
352: StrAllocCopy(nname, aValue);
353: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
354: }
355: } else if (!strncmp(aName, "xmlns:", 6)) {
356: String aValue = (String) atts[i+1];
357: String nName = NULL;
358: int len = aValue ? strlen(aValue) : -1;
359: if (len == 0 && !rdfp->m_sSource)
360: aValue = rdfp->m_sSource;
361: StrAllocCopy(nName, &(aName[6]));
362: HTAssocList_addObject(namespaces, nName, aValue);
363: HT_FREE(nName);
364:
365: /* Save all non-RDF schema addresses */
366: if (!HTList_contains(rdfp->m_vAllNameSpaces, aValue) &&
367: strncmp(aValue, RDFMS, strlen(RDFMS)) &&
368: strncmp(aValue, RDFSCHEMA, strlen(RDFSCHEMA))) {
369: String nname = NULL;
370: StrAllocCopy(nname, aValue);
371: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
372: }
373:
374: /* Special case: Don't save document's own address */
375: if (rdfp->m_sSource &&
376: !strncmp(aValue, rdfp->m_sSource, strlen(rdfp->m_sSource))) {
377: String nname = NULL;
378: StrAllocCopy(nname, aValue);
379: HTList_addObject(rdfp->m_vAllNameSpaces, nname);
380: }
381: }
382: i+=2;
383: } /* end of while */
384: } /* end of if */
385:
386: /*
387: ** Place new namespace declarations into the stack
388: ** (Yes, I could optimize this a bit, not it wastes space
389: ** if there are no xmlns definitions)
390: */
391: HTList_addObject(rdfp->m_namespaceStack, namespaces);
392:
393: /*
394: ** Figure out the prefix part if it exists and
395: ** determine the namespace of the element accordingly
396: */
397: {
398: String sNamespace = NULL;
399: String sElementName = NULL;
400: String sPrefix2 = NULL;
401: HTElement *newElement = NULL;
402: char *pindex = strchr(name, ':');
403: int ix = pindex ? (int) (pindex - name) : -1 ;
404: if (ix > 0) {
405: if (!(sPrefix2 = HT_MALLOC(ix+1)))
406: HT_OUTOFMEM("XML_startELement");
407: strncpy(sPrefix2, name, ix);
408: sPrefix2[ix]='\0';
409: sNamespace = HTRDF_namespace(rdfp, sPrefix2);
410: StrAllocCopy(sElementName, &(name[ix+1]));
411: HT_FREE(sPrefix2);
412: } else {
413: sNamespace = HTRDF_namespace(rdfp, "xmlns");
414: StrAllocCopy(sElementName, name);
415: }
416:
417: /*
418: * Finally look for attributes other than the special xmlns,
419: * expand them, and place to the new Attribute List
420: */
421: i = 0;
422: if (atts) {
423: while (atts[i]) {
424: String aName = (String) atts[i];
425: String sAttributeNamespace = NULL;
426: if (strncmp(aName, "xmlns", 5)) {
427: String aValue = (String) atts[i+1];
428: String sPrefix = NULL;
429: /* Expat does not have type for attributes */
430: pindex = strchr(aName, ':');
431: ix = pindex ? (int) (pindex - aName) : -1;
432: if (ix > 0) {
433: if (!(sPrefix = HT_MALLOC(ix+1)))
434: HT_OUTOFMEM("XML_startELement");
435: strncpy(sPrefix, aName, ix);
436: sPrefix[ix] = '\0';
437: sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix);
438: aName = &(aName[ix+1]);
439: HT_FREE(sPrefix);
440: } else {
441: if (!sNamespace)
442: sAttributeNamespace = HTRDF_namespace(rdfp, "xmlns");
443: else
444: StrAllocCopy(sAttributeNamespace, sNamespace);
445: }
446:
447: if (HTRDF_parseLiteral(rdfp)) {
448: if (!sPrefix) {
449: if (!(sPrefix = HT_MALLOC(8)))
450: HT_OUTOFMEM("XML_startELement");
451: sprintf(sPrefix, "gen%d\n", i);
452: }
453: {
454: String fName = NULL;
455: StrAllocMCopy(&fName, sPrefix, ":", aValue, NULL);
456: HTAssocList_addObject(newAL, fName, aValue);
457: HT_FREE(fName);
458: StrAllocMCopy(&fName, "xmlns:", sPrefix, NULL);
459: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
460: HT_FREE(fName);
461: }
462: } else {
463: String fName = NULL;
464: StrAllocMCopy(&fName, sAttributeNamespace, aName, NULL);
465: HTAssocList_addObject(newAL, fName, aValue);
466: HT_FREE(fName);
467: }
468:
469: HT_FREE(sAttributeNamespace);
470:
471: /*
472: ** This call will try to see if the user is using
473: ** RDF look-alike elements from another namespace
474: **
475: ** Note: you can remove the call if you wish
476: */
477: #if 0
478: HTRDF_likeRDF (rdfp, sAttributeNamespace, aName);
479: #endif
480:
481: } /* end of if */
482: i+=2;
483: } /* end of while */
484: } /* end of if atts */
485:
486: /*
487: * If we have parseType="Literal" set earlier, this element
488: * needs some additional attributes to make it stand-alone
489: * piece of XML
490: */
491: if (HTRDF_parseLiteral(rdfp)) {
492: String fName = NULL;
493: if (!sPrefix2) {
494: if (sNamespace)
495: HTAssocList_addObject(newAL, "xmlns:gen", sNamespace);
496: StrAllocMCopy(&fName, "gen", sElementName, NULL);
497: newElement = HTElement_new(fName, newAL);
498: StrAllocCopy(newElement->m_sPrefix, "gen");
499: HT_FREE(fName);
500: } else {
501: String sAttributeNamespace = HTRDF_namespace(rdfp, sPrefix2);
502: if (sAttributeNamespace) {
503: StrAllocMCopy(&fName, "xmlns:", sPrefix2, NULL);
504: HTAssocList_addObject(newAL, fName, sAttributeNamespace);
505: HT_FREE(fName);
506: }
507: StrAllocMCopy(&fName, sPrefix2, ":", sElementName, NULL);
508: newElement = HTElement_new(fName, newAL);
509: HT_FREE(fName);
510: }
511: } else {
512: String fName = NULL;
513: StrAllocMCopy(&fName, sNamespace, sElementName, NULL);
514: newElement = HTElement_new(fName, newAL);
515: HT_FREE(fName);
516: /* HTRDF_likeRDF (rdfp, sNamespace, sElementName); */
517: }
518: HT_FREE(sElementName);
519: HT_FREE(sNamespace);
520: HTRDF_checkAttributes(rdfp, newElement);
521:
522: /*
523: ** Check parseType
524: */
525: {
526: String fName = NULL;
527: String sLiteralValue = NULL;
528: StrAllocMCopy(&fName, RDFMS, "parseType", NULL);
529: sLiteralValue = HTElement_getAttribute(newElement, fName);
530: HT_FREE(fName);
531: if (sLiteralValue && strcmp(sLiteralValue, "Resource")) {
532: /**
533: * This is the management of the element where
534: * parseType="Literal" appears
535: *
536: * You should notice RDF V1.0 conforming implementations
537: * must treat other values than Literal and Resource as
538: * Literal. This is why the condition is !equals("Resource")
539: */
540:
541: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
542: if (!HTList_isEmpty(rdfp->m_elementStack)) {
543: HTElement *e = (HTElement *)
544: HTList_lastObject(rdfp->m_elementStack);
545: HTElement_addChild(e, newElement);
546: }
547: HTList_addObject(rdfp->m_elementStack, newElement);
548: HTList_addObject(rdfp->m_parseElementStack, newElement);
549: HT_FREE(rdfp->m_sLiteral);
550: StrAllocCopy(rdfp->m_sLiteral, "");
551: return;
552: }
553:
554: if (HTRDF_parseLiteral(rdfp)) {
555: /*
556: * This is the management of any element nested within
557: * a parseType="Literal" declaration
558: */
559: HTList_addObject(rdfp->m_elementStack, newElement);
560: return;
561: }
562:
563: /*
564: ** Update the containment hierarchy with the stack.
565: */
566: if (!HTList_isEmpty(rdfp->m_elementStack)) {
567: HTElement *e = (HTElement *)
568: HTList_lastObject(rdfp->m_elementStack);
569: HTElement_addChild(e, newElement);
570: }
571:
572: /*
573: ** Place the new element into the stack
574: */
575: HTList_addObject(rdfp->m_elementStack, newElement);
576: if (sLiteralValue && !strcmp(sLiteralValue, "Resource")) {
577: HTList_addObject(rdfp->m_parseTypeStack, sLiteralValue);
578: HTList_addObject(rdfp->m_parseElementStack, newElement);
579: HT_FREE(rdfp->m_sLiteral);
580: StrAllocCopy(rdfp->m_sLiteral, "");
581:
582: /*
583: * Since parseType="Resource" implies the following
584: * production must match Description, let's create
585: * an additional Description node here in the document tree.
586: */
587: {
588: String fName = NULL;
589: HTElement *desc = NULL;
590: HTAssocList * al = HTAssocList_new ();
591: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
592: desc = HTElement_new(fName, al);
593: HT_FREE(fName);
594: if (!HTList_isEmpty(rdfp->m_elementStack)) {
595: HTElement *e = (HTElement *)
596: HTList_lastObject(rdfp->m_elementStack);
597: HTElement_addChild(e, desc);
598: }
599: HTList_addObject(rdfp->m_elementStack, desc);
600: }
601: } /* end of if */
602: } /* end of block */
603: } /* end of block */
604: }
605:
606: /*
607: * For each end of an element scope step back in the
608: * element and namespace stack
609: */
610: PRIVATE void XML_endElement (void * userData,
611: const XML_Char * name)
612: {
613: HTRDF * rdfp = (HTRDF *) userData;
614: BOOL bParseLiteral = rdfp ? HTRDF_parseLiteral(rdfp) : NO;
615: HTAssocList * namespaces = HTList_removeLastObject(rdfp->m_namespaceStack);
616: rdfp->m_root = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
617: if (namespaces) HTAssocList_delete(namespaces);
618:
619: if (bParseLiteral) {
620: HTElement *pe = (HTElement *)
621: HTList_lastObject(rdfp->m_parseElementStack);
622: if (pe != rdfp->m_root) {
623: /* do nothing */
624: } else {
625: HTElement *de = HTElement_new2(rdfp->m_sLiteral);
626: HTElement_addChild(pe, de);
627: HT_FREE(rdfp->m_sLiteral);
628: StrAllocCopy(rdfp->m_sLiteral, "");
629: HTList_removeLastObject(rdfp->m_parseElementStack);
630: HTList_removeLastObject(rdfp->m_parseTypeStack);
631: }
632: } else if (HTRDF_parseResource(rdfp)) {
633: /**
634: * If we are doing parseType="Resource"
635: * we need to explore whether the next element in
636: * the stack is the closing element in which case
637: * we remove it as well (remember, there's an
638: * extra Description element to be removed)
639: */
640:
641: if (!HTList_isEmpty(rdfp->m_elementStack)) {
642: HTElement *pe = (HTElement *)
643: HTList_lastObject(rdfp->m_parseElementStack);
644: HTElement *e = (HTElement *)
645: HTList_lastObject(rdfp->m_elementStack);
646: if (pe == e) {
647: e = (HTElement *) HTList_removeLastObject(rdfp->m_elementStack);
648: HTList_removeLastObject(rdfp->m_parseElementStack);
649: HTList_removeLastObject(rdfp->m_parseTypeStack);
650: }
651: }
652: }
653: }
654:
655: PRIVATE void XML_characterData (void * userData,
656: const XML_Char * s, int len)
657: {
658: /*
659: * Place all characters as Data instance to the containment
660: * hierarchy with the help of the stack.
661: */
662: HTRDF * rdfp = (HTRDF *) userData;
663: HTElement *e = (HTElement *)
664: HTList_lastObject(rdfp->m_elementStack);
665: String tstr = NULL, str = NULL;
666: if (!(str = HT_MALLOC(len+1)))
667: HT_OUTOFMEM("XML_characterData");
668: strncpy(str, s, len);
669: str[len]='\0';
670: if (HTRDF_parseLiteral(rdfp)) {
671: StrAllocCat(rdfp->m_sLiteral, str);
672: HT_FREE(str);
673: return;
674: }
675: /* JUST FOR EXPAT */
676: {
677: HTElement *lch = (HTElement *) HTList_lastObject(e->m_children);
678: if (lch && HTElement_instanceOfData(lch)) {
679: HTElement_addData(lch, str);
680: HT_FREE(str);
681: return;
682: }
683: }
684: /*
685: * Warning: this is not correct procedure according to XML spec.
686: * All whitespace matters!
687: */
688: tstr = trim(str);
689: if (strlen(tstr) > 0) {
690: HTElement * de = HTElement_new2(tstr);
691: HTElement_addChild(e, de);
692: }
693: HT_FREE(str); HT_FREE(tstr);
694: }
695:
696: PRIVATE void XML_processingInstruction (void * userData,
697: const XML_Char * target,
698: const XML_Char * data)
699: {
700: return;
701: }
702:
703: /*
704: ** This is called for any characters in the XML document for
705: ** which there is no applicable handler. This includes both
706: ** characters that are part of markup which is of a kind that is
707: ** not reported (comments, markup declarations), or characters
708: ** that are part of a construct which could be reported but
709: ** for which no handler has been supplied. The characters are passed
710: ** exactly as they were in the XML document except that
711: ** they will be encoded in UTF-8. Line boundaries are not normalized.
712: ** Note that a byte order mark character is not passed to the default handler.
713: ** If a default handler is set, internal entity references
714: ** are not expanded. There are no guarantees about
715: ** how characters are divided between calls to the default handler:
716: ** for example, a comment might be split between multiple calls.
717: */
718: PRIVATE void XML_default (void * userData,
719: const XML_Char * s, int len)
720: {
721: return;
722: }
723:
724: /*
725: ** This is called for a declaration of an unparsed (NDATA)
726: ** entity. The base argument is whatever was set by XML_SetBase.
727: ** The entityName, systemId and notationName arguments will never be null.
728: ** The other arguments may be.
729: */
730: PRIVATE void XML_unparsedEntityDecl (void * userData,
731: const XML_Char * entityName,
732: const XML_Char * base,
733: const XML_Char * systemId,
734: const XML_Char * publicId,
735: const XML_Char * notationName)
736: {
737: return;
738: }
739:
740: /*
741: ** This is called for a declaration of notation.
742: ** The base argument is whatever was set by XML_SetBase.
743: ** The notationName will never be null. The other arguments can be.
744: */
745: PRIVATE void XML_notationDecl (void * userData,
746: const XML_Char * notationName,
747: const XML_Char * base,
748: const XML_Char * systemId,
749: const XML_Char * publicId)
750: {
751: return;
752: }
753:
754: /*
755: ** This is called for a reference to an external parsed general entity.
756: ** The referenced entity is not automatically parsed.
757: ** The application can parse it immediately or later using
758: ** XML_ExternalEntityParserCreate.
759: ** The parser argument is the parser parsing the entity containing the reference;
760: ** it can be passed as the parser argument to XML_ExternalEntityParserCreate.
761: ** The systemId argument is the system identifier as specified in the entity
762: ** declaration; it will not be null.
763: ** The base argument is the system identifier that should be used as the base for
764: ** resolving systemId if systemId was relative; this is set by XML_SetBase;
765: ** it may be null.
766: ** The publicId argument is the public identifier as specified in the entity declaration,
767: ** or null if none was specified; the whitespace in the public identifier
768: ** will have been normalized as required by the XML spec.
769: ** The openEntityNames argument is a space-separated list of the names of the entities
770: ** that are open for the parse of this entity (including the name of the referenced
771: ** entity); this can be passed as the openEntityNames argument to
772: ** XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
773: ** returns, so if the referenced entity is to be parsed later, it must be copied.
774: ** The handler should return 0 if processing should not continue because of
775: ** a fatal error in the handling of the external entity.
776: ** In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
777: ** error.
778: ** Note that unlike other handlers the first argument is the parser, not userData.
779: */
780: PRIVATE int XML_externalEntityRef (XML_Parser parser,
781: const XML_Char * openEntityNames,
782: const XML_Char * base,
783: const XML_Char * systemId,
784: const XML_Char * publicId)
785: {
786: return 0;
787: }
788:
789: /*
790: ** This is called for an encoding that is unknown to the parser.
791: ** The encodingHandlerData argument is that which was passed as the
792: ** second argument to XML_SetUnknownEncodingHandler.
793: ** The name argument gives the name of the encoding as specified in
794: ** the encoding declaration.
795: ** If the callback can provide information about the encoding,
796: ** it must fill in the XML_Encoding structure, and return 1.
797: ** Otherwise it must return 0.
798: ** If info does not describe a suitable encoding,
799: ** then the parser will return an XML_UNKNOWN_ENCODING error.
800: */
801: PRIVATE int XML_unknownEncoding (void * encodingHandlerData,
802: const XML_Char * name,
803: XML_Encoding * info)
804: {
805: return 0;
806: }
807:
808: /* ------------------------------------------------------------------------- */
809: /* HTXML STREAM HANDLERS */
810: /* ------------------------------------------------------------------------- */
811:
812: PRIVATE void rdf_setHandlers (XML_Parser me)
813: {
814: XML_SetElementHandler(me, XML_startElement, XML_endElement);
815: XML_SetCharacterDataHandler(me, XML_characterData);
816: XML_SetProcessingInstructionHandler(me, XML_processingInstruction);
817: XML_SetDefaultHandler(me, XML_default);
818: XML_SetUnparsedEntityDeclHandler(me, XML_unparsedEntityDecl);
819: XML_SetNotationDeclHandler(me, XML_notationDecl);
820: XML_SetExternalEntityRefHandler(me, XML_externalEntityRef);
821: XML_SetUnknownEncodingHandler(me, XML_unknownEncoding, NULL);
822: }
823:
824: PRIVATE void rdf_newInstance (HTStream * me,
825: HTRequest * request,
826: HTFormat target_format,
827: HTStream * target_stream,
828: XML_Parser xmlparser,
829: void * context)
830: {
831: if (me && xmlparser) {
832: rdf_setHandlers(xmlparser);
833: XML_SetUserData(xmlparser, context);
834:
835: /* Call the new RDF instance callback (if any) with this new stream */
836: if (RDFInstance)
837: (*RDFInstance)(me, request, target_format, target_stream, context, RDFInstanceContext);
838: }
839: }
840:
841: /* ------------------------------------------------------------------------- */
842: /* RDF PARSER */
843: /* ------------------------------------------------------------------------- */
844:
845: PRIVATE void visit_element_children (HTList *children)
846: {
847: HTElement *child = NULL;
848: HTList *cur = children;
849: while ((child = (HTElement *) HTList_nextObject(cur))) {
850: if (!HTList_isEmpty(child->m_children))
851: visit_element_children(child->m_children);
852: HTElement_delete(child);
853: }
854: }
855:
856: PRIVATE void delete_elements (HTRDF * me)
857: {
858: if (me && me->m_root) {
859: HTElement *r = me->m_root;
860: if (!HTList_isEmpty(r->m_children))
861: visit_element_children(r->m_children);
862: HTElement_delete(r);
863: }
864: }
865:
866: PUBLIC HTRDF * HTRDF_new (void)
867: {
868: HTRDF * me;
869: if ((me = (HTRDF *) HT_CALLOC(1, sizeof(HTRDF))) == NULL)
870: HT_OUTOFMEM("HTRDF_new");
871: me->m_namespaceStack = HTList_new();
872: me->m_elementStack = HTList_new();
873:
874: me->m_triples = HTList_new();
875: me->m_vAllNameSpaces = HTList_new();
876:
877: me->m_bCreateBags = FALSE;
878: me->m_bFetchSchemas = FALSE;
879:
880: me->m_parseTypeStack = HTList_new();
881: me->m_parseElementStack = HTList_new();
882:
883: me->m_vResources = HTList_new();
884: me->m_vResolveQueue = HTList_new();
885: me->m_hIDtable = HTHashtable_new(0);
886:
887: return me;
888: }
889:
890: PUBLIC BOOL HTRDF_delete (HTRDF * me)
891: {
892: if (me) {
893: delete_elements(me);
894: if (me->m_namespaceStack) {
895: HTList *cur = me->m_namespaceStack;
896: HTAssocList *alist = NULL;
897: while ((alist = (HTAssocList *) HTList_nextObject(cur))) {
898: HTAssocList_delete(alist);
899: }
900: HTList_delete(me->m_namespaceStack);
901: }
902: if (me->m_elementStack) HTList_delete(me->m_elementStack);
903: me->m_root = NULL;
904: if (me->m_triples) {
905: HTList *cur = me->m_triples;
906: HTTriple *t = NULL;
907: while ((t = (HTTriple *) HTList_nextObject(cur))) {
908: /*HTTriple_print(t);*/
909: HTTriple_delete(t);
910: }
911: HTList_delete(me->m_triples);
912: }
913: HT_FREE(me->m_sSource);
914: if (me->m_vAllNameSpaces) {
915: HTList *cur = me->m_vAllNameSpaces;
916: String s = NULL;
917: while ((s = (String) HTList_nextObject(cur))) {
918: HT_FREE(s);
919: }
920: HTList_delete(me->m_vAllNameSpaces);
921: }
922: if (me->m_parseTypeStack)
923: HTList_delete(me->m_parseTypeStack);
924: if (me->m_parseElementStack)
925: HTList_delete(me->m_parseElementStack);
926: if (me->m_vResources)
927: HTList_delete(me->m_vResources);
928: if (me->m_vResolveQueue)
929: HTList_delete(me->m_vResolveQueue);
930: if (me->m_hIDtable)
931: HTHashtable_delete(me->m_hIDtable);
932: HT_FREE(me->m_sLiteral);
933: HT_FREE(me);
934: return YES;
935: }
936: return NO;
937: }
938:
939: /*
940: * setSource method saves the name of the source document for
941: * later inspection if needed
942: */
943: PUBLIC BOOL HTRDF_setSource(HTRDF *me, String source)
944: {
945: if (me && source) {
946: StrAllocCopy (me->m_sSource, source);
947: return YES;
948: }
949: return NO;
950: }
951:
952: /*
953: * Go through the m_vResolveQueue and assign
954: * direct object reference for each symbolic reference
955: */
956: PUBLIC BOOL HTRDF_resolve (HTRDF * me)
957: {
958: if (me) {
959: HTList * cur = me->m_vResolveQueue;
960: HTElement *e = NULL;
961: HTElement *e2 = NULL;
962: while ((e = (HTElement *) HTList_nextObject(cur))) {
963: String sAbout = HTElement_getAttribute2(e, RDFMS, "about");
964: String sResource = HTElement_getAttribute2(e, RDFMS, "resource");
965: String sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
966: String sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
967: "aboutEachPrefix");
968: if (sAbout) {
969: if (sAbout[0]=='#')
970: sAbout = &(sAbout[1]);
971: e2 = (HTElement *) HTRDF_lookforNode(me, sAbout);
972: if (e2)
973: HTElement_addTarget(e, e2);
974: else
975: HTPrint("Unresolved internal reference %s\n", sAbout);
976: }
977: if (sResource) {
978: if (sResource[0]=='#')
979: sResource = &(sResource[1]);
980: e2 = (HTElement *) HTRDF_lookforNode(me, sResource);
981: if (e2)
982: HTElement_addTarget(e, e2);
983: }
984:
985: if (sAboutEach) {
986: sAboutEach = &(sAboutEach[1]);
987: e2 = (HTElement *) HTRDF_lookforNode(me, sAboutEach);
988: if (e2)
989: HTElement_addTarget(e, e2);
990: }
991: if (sAboutEachPrefix) {
992: HTList * curr = me->m_vResources;
993: HTElement *ele = NULL;
994: while ((ele = (HTElement *) HTList_nextObject(curr))) {
995: String sA = HTElement_getAttribute2(ele, RDFMS, "about");
996: if (sA &&
997: !strncmp(sA, sAboutEachPrefix, strlen(sAboutEachPrefix))) {
998: HTElement_addTarget(e, ele);
999: }
1000: }
1001: }
1002: }
1003: HTList_delete(me->m_vResources);
1004: me->m_vResources = HTList_new();
1005: return YES;
1006: }
1007: return NO;
1008: }
1009:
1010: /**
1011: * Check if the element e is from the namespace
1012: * of the RDF schema by comparing only the beginning of
1013: * the expanded element name with the canonical RDFMS
1014: * URI
1015: */
1016: PUBLIC BOOL HTRDF_isRDF(HTRDF * me, HTElement *e)
1017: {
1018: return (me && e && e->m_sName) ?
1019: (!strncmp(e->m_sName, RDFMS, strlen(RDFMS))) : NO;
1020: }
1021:
1022: PUBLIC BOOL HTRDF_isRDFroot (HTRDF * me, HTElement *e)
1023: {
1024: if (me && e && e->m_sName) {
1025: int len = strlen(e->m_sName);
1026: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "RDF"));
1027: }
1028: return NO;
1029: }
1030:
1031: /**
1032: * Is the element a Description
1033: */
1034: PUBLIC BOOL HTRDF_isDescription (HTRDF *me, HTElement *e)
1035: {
1036: if (me && e && e->m_sName) {
1037: int len = strlen(e->m_sName);
1038: if (len > 11) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-11]), "Description"));
1039: }
1040: return NO;
1041: }
1042:
1043: /*
1044: * Is the element a ListItem
1045: */
1046: PUBLIC BOOL HTRDF_isListItem (HTRDF *me, HTElement *e)
1047: {
1048: if (me && e && e->m_sName) {
1049: int len = strlen(e->m_sName);
1050: if (len > 2)
1051: return (HTRDF_isRDF(me, e) && (!strcmp(&(e->m_sName[len-2]), "li") || strchr(e->m_sName,'_')));
1052: }
1053: return NO;
1054: }
1055:
1056: /**
1057: * Is the element a Sequence
1058: */
1059: PUBLIC BOOL HTRDF_isSequence (HTRDF *me, HTElement *e)
1060: {
1061: if (me && e && e->m_sName) {
1062: int len = strlen(e->m_sName);
1063: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Seq"));
1064: }
1065: return NO;
1066: }
1067:
1068: /*
1069: * Is the element an Alternative
1070: */
1071: PUBLIC BOOL HTRDF_isAlternative (HTRDF *me, HTElement *e)
1072: {
1073: if (me && e && e->m_sName) {
1074: int len = strlen(e->m_sName);
1075: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Alt"));
1076: }
1077: return NO;
1078: }
1079:
1080: /*
1081: * Is the element a Bag
1082: */
1083: PUBLIC BOOL HTRDF_isBag (HTRDF *me, HTElement *e)
1084: {
1085: if (me && e && e->m_sName) {
1086: int len = strlen(e->m_sName);
1087: if (len > 3) return (HTRDF_isRDF(me, e) && !strcmp(&(e->m_sName[len-3]), "Bag"));
1088: }
1089: return NO;
1090: }
1091:
1092: /**
1093: * Is the element a Container
1094: */
1095: PUBLIC BOOL HTRDF_isContainer (HTRDF *me, HTElement *e)
1096: {
1097: return (HTRDF_isSequence(me, e) ||
1098: HTRDF_isAlternative(me, e) ||
1099: HTRDF_isBag(me, e));
1100: }
1101:
1102: /*
1103: * This method matches all properties but those from RDF namespace
1104: */
1105: PUBLIC BOOL HTRDF_isTypedPredicate(HTRDF *me, HTElement *e)
1106: {
1107: if (me && e && e->m_sName) {
1108: int len = strlen(e->m_sName);
1109: String tp[] = {"predicate", "subject", "object",
1110: "value", "type", "Property", "Statement"};
1111: int i;
1112: if (HTRDF_isRDF(me, e)) {
1113: for(i = 0; i< 7; i++) {
1114: int ntp = strlen(tp[i]);
1115: if (len > ntp) {
1116: if (!strcmp(&(e->m_sName[len-ntp]), tp[i]))
1117: return YES;
1118: }
1119: }
1120: return NO;
1121: }
1122: if (len > 0) return YES;
1123: }
1124: return NO;
1125: }
1126:
1127: PRIVATE void HTRDF_processListItem (HTRDF * me, String sID, HTElement *listitem,
1128: int iCounter)
1129: {
1130: /*
1131: * Two different cases for
1132: * 1. LI element without content (resource available)
1133: * 2. LI element with content (resource unavailable)
1134: */
1135: String cName = NULL;
1136: String sResource = HTRDF_getResource(me, listitem);
1137: char sdig[20];
1138: sprintf(sdig, "_%d", iCounter);
1139: StrAllocMCopy(&cName, RDFMS, sdig, NULL);
1140: if (sResource) {
1141: HTRDF_addTriple(me, cName, sID, sResource);
1142: /* validity checking */
1143: if (!HTList_isEmpty(listitem->m_children)){
1144: HTPrint("Listitem with resource attribute can not have child nodes");
1145: }
1146: StrAllocCopy(listitem->m_sID, sResource);
1147: } else {
1148: HTList *cur = listitem->m_children;
1149: HTElement *n = NULL;
1150: while ((n = (HTElement *) HTList_nextObject(cur))) {
1151: if (HTElement_instanceOfData(n)) {
1152: HTRDF_addTriple(me, cName, sID, n->m_sContent);
1153: } else if (HTRDF_isDescription(me, n)) {
1154: String sNodeID = HTRDF_processDescription(me, n, NO, YES, NO);
1155: HTRDF_addTriple(me, cName, sID, sNodeID);
1156: StrAllocCopy(listitem->m_sID, sNodeID);
1157: } else if (HTRDF_isListItem(me, n)) {
1158: HTPrint("Can not nest list item inside list item\n");
1159: } else if (HTRDF_isContainer(me, n)) {
1160: String c = HTRDF_processContainer(me, n);
1161: HTRDF_addTriple(me, cName, sID, n->m_sID);
1162: HT_FREE(c);
1163: } else if (HTRDF_isTypedPredicate(me, n)) {
1164: String sNodeID = HTRDF_processTypedNode(me, n);
1165: HTRDF_addTriple(me, cName, sID, sNodeID);
1166: HT_FREE(sNodeID);
1167: }
1168: }
1169: }
1170: HT_FREE(cName);
1171: }
1172:
1173: PRIVATE String HTRDF_processContainer(HTRDF *me, HTElement *n)
1174: {
1175: String sID = NULL;
1176: String tName = NULL;
1177: String aName = NULL;
1178: String sName = NULL;
1179: String bName = NULL;
1180: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1181: StrAllocMCopy(&aName, RDFMS, "Alt", NULL);
1182: StrAllocMCopy(&sName, RDFMS, "Seq", NULL);
1183: StrAllocMCopy(&bName, RDFMS, "Bag", NULL);
1184:
1185: StrAllocCopy(sID, n->m_sID);
1186: if (!sID)
1187: sID = HTRDF_newReificationID(me);
1188: /*
1189: * Do the instantiation only once
1190: */
1191: if (!n->m_bDone) {
1192: if (HTRDF_isSequence(me, n)) {
1193: HTRDF_addTriple(me, tName, sID, sName);
1194: } else if (HTRDF_isAlternative(me, n)) {
1195: HTRDF_addTriple(me, tName, sID, aName);
1196: } else if (HTRDF_isBag(me, n)) {
1197: HTRDF_addTriple(me, tName, sID, bName);
1198: }
1199: n->m_bDone = YES;
1200: }
1201: HTRDF_expandAttributes(me, n, n);
1202:
1203: {
1204: HTList *cur = n->m_children;
1205: HTElement *n2 = NULL;
1206: int iCounter = 1;
1207: if (HTList_isEmpty(cur) && HTRDF_isAlternative(me, n))
1208: HTPrint("An RDF:Alt container must have at least one list item\n");
1209: while ((n2 = (HTElement *) HTList_nextObject(cur))) {
1210: if (HTRDF_isListItem(me, n2)) {
1211: HTRDF_processListItem(me, sID, n2, iCounter);
1212: iCounter++;
1213: } else {
1214: HTPrint("Can not nest %s, inside container\n", n2->m_sName);
1215: }
1216: }
1217: } /* end of block */
1218:
1219: HT_FREE(tName); HT_FREE(sName); HT_FREE(aName); HT_FREE(bName);
1220:
1221: return sID;
1222: }
1223: /*
1224: * Manage the typedNode production in the RDF grammar.
1225: *
1226: */
1227: PUBLIC String HTRDF_processTypedNode(HTRDF *me, HTElement *typedNode)
1228: {
1229: String sID = HTElement_getAttribute2(typedNode, RDFMS, "ID");
1230: String sBagID = HTElement_getAttribute2(typedNode, RDFMS, "bagID");
1231: String sAbout = HTElement_getAttribute2(typedNode, RDFMS, "about");
1232: String sAboutEach = HTElement_getAttribute2(typedNode, RDFMS, "aboutEach");
1233: /*String sAboutEachPrefix = HTElement_getAttribute2(typedNode, RDFMS,
1234: "aboutEachPrefix");*/
1235: String resource = HTElement_getAttribute2(typedNode, RDFMS, "resource");
1236: String iName = NULL;
1237: String bName = NULL;
1238: String tName = NULL;
1239:
1240: String sObject = NULL;
1241:
1242: StrAllocMCopy(&iName, RDFMS, "ID", NULL);
1243: StrAllocMCopy(&bName, RDFMS, "bagID", NULL);
1244: StrAllocMCopy(&tName, RDFMS, "type", NULL);
1245:
1246: if (resource)
1247: HTPrint("resource attribute not allowed for a typedNode %s\n",
1248: typedNode->m_sName);
1249:
1250: /*
1251: * We are going to manage this typedNode using the processDescription
1252: * routine later on. Before that, place all properties encoded as
1253: * attributes to separate child nodes.
1254: */
1255: {
1256: HTAssoc * assoc;
1257: HTAssocList *cur = typedNode->m_attributes;
1258: String sAttribute = NULL;
1259: String tValue = NULL, sValue = NULL;
1260: while((assoc= (HTAssoc *) HTList_nextObject(cur))) {
1261: sAttribute = HTAssoc_name(assoc);
1262: sValue = HTAssoc_value(assoc);
1263: tValue = trim(sValue);
1264: if (strncmp(sAttribute, RDFMS, strlen(RDFMS)) &&
1265: strncmp(sAttribute, XMLSCHEMA, strlen(XMLSCHEMA))) {
1266: if (strlen(tValue) > 0) {
1267: HTAssocList *newAL = HTAssocList_new();
1268: HTElement *newPredicate = HTElement_new(sAttribute, newAL);
1269: HTElement *d = NULL;
1270: HTElement_addAttribute(newPredicate, iName,
1271: sAbout ? sAbout : sID);
1272: HTElement_addAttribute(newPredicate, bName, sBagID);
1273: d = HTElement_new2(tValue);
1274: HTElement_addChild(newPredicate, d);
1275: HTElement_addChild(typedNode, newPredicate);
1276: HTElement_removeAttribute(typedNode, sAttribute);
1277: }
1278: }
1279: HT_FREE(tValue);
1280: } /* end of while */
1281: }/* end of block */
1282: {
1283: if (sAbout)
1284: StrAllocCopy(sObject, sAbout);
1285: else if (sID)
1286: StrAllocCopy(sObject, sID);
1287: else
1288: sObject = HTRDF_newReificationID(me);
1289: StrAllocCopy(typedNode->m_sID, sObject);
1290:
1291: /* special case: should the typedNode have aboutEach attribute,
1292: ** the type predicate should distribute to pointed
1293: ** collection also -> create a child node to the typedNode
1294: */
1295: if (sAboutEach && !HTList_isEmpty(typedNode->m_vTargets)) {
1296: HTAssocList *newAL = HTAssocList_new();
1297: HTElement *newPredicate = HTElement_new(tName, newAL);
1298: HTElement *d = HTElement_new2(typedNode->m_sName);
1299: HTElement_addChild(newPredicate, d);
1300: HTElement_addChild(typedNode, newPredicate);
1301: } else {
1302: HTRDF_addTriple(me, tName, sObject, typedNode->m_sName);
1303: }
1304: HTRDF_processDescription(me, typedNode, NO, NO, YES);
1305: }/* end of block */
1306:
1307: HT_FREE(iName); HT_FREE(bName); HT_FREE(tName);
1308:
1309: return sObject;
1310: }
1311:
1312: /*
1313: * Start processing an RDF/XML document instance from the
1314: * root element rdf.
1315: *
1316: */
1317: PUBLIC BOOL HTRDF_processRDF (HTRDF *me, HTElement *e)
1318: {
1319: if (me && e) {
1320: HTList *cur = e->m_children;
1321: HTElement *ele = NULL;
1322: if (HTList_isEmpty(e->m_children)) {
1323: HTPrint("Empty RDF Element\n");
1324: return NO;
1325: }
1326: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1327: if (HTRDF_isDescription(me, ele)) {
1328: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1329: me->m_bCreateBags);
1330: } else if (HTRDF_isContainer(me, ele)) {
1331: String c = HTRDF_processContainer(me, ele);
1332: HT_FREE(c);
1333: } else if (HTRDF_isTypedPredicate(me, ele)) {
1334: String t = HTRDF_processTypedNode(me, ele);
1335: HT_FREE(t);
1336: }
1337: }
1338: return YES;
1339: }
1340: return NO;
1341: }
1342:
1343: /*
1344: * processPredicate handles all elements not defined as special
1345: * RDF elements.
1346: *
1347: * predicate The predicate element itself
1348: * description Context for the predicate
1349: * sTarget The target resource
1350: * reificate Should this predicate be reificated
1351: *
1352: * return the new ID which can be used to identify the predicate
1353: *
1354: */
1355: PRIVATE String HTRDF_processPredicate (HTRDF * me,
1356: HTElement * predicate,
1357: HTElement * description,
1358: String sTarget,
1359: BOOL reificate)
1360: {
1361: String sStatementID = HTElement_getAttribute2(predicate, RDFMS, "ID");
1362: String nsStatementID = NULL;
1363: String sBagID = HTElement_getAttribute2(predicate, RDFMS, "bagID");
1364: String sResource = HTRDF_getResource(me, predicate);
1365:
1366: /*
1367: ** If a predicate has other attributes than rdf:ID, rdf:bagID,
1368: ** or xmlns... -> generate new triples according to the spec.
1369: ** (See end of Section 6)
1370: */
1371: {
1372: HTElement * place_holder = NULL;
1373: HTAssocList * newAL = HTAssocList_new();
1374: String fName = NULL;
1375: String aName = NULL;
1376:
1377: StrAllocMCopy(&fName, RDFMS, "Description", NULL);
1378: place_holder = HTElement_new(fName, newAL);
1379: HT_FREE(fName);
1380:
1381: if (HTRDF_expandAttributes(me, place_holder, predicate)) {
1382:
1383: /* error checking */
1384: if (!HTList_isEmpty(predicate->m_children)) {
1385: HTPrint("%s must be an empty element since it uses propAttr grammar production", predicate->m_sName);
1386: HTElement_delete(place_holder);
1387: return NULL;
1388: }
1389: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1390:
1391: /* determine the 'about' part for the new statements */
1392: if (sStatementID) {
1393: HTElement *data = HTElement_new2(sStatementID);
1394: HTElement_addAttribute(place_holder, aName, sStatementID);
1395:
1396: /* hack: make rdf:ID the value of the predicate */
1397: HTElement_addChild(predicate, data);
1398: } else if (sResource) {
1399: HTElement_addAttribute(place_holder, aName, sResource);
1400: } else {
1401: nsStatementID = HTRDF_newReificationID(me);
1402: HTElement_addAttribute(place_holder, aName, nsStatementID);
1403: HT_FREE(nsStatementID);
1404: }
1405: HT_FREE(aName);
1406:
1407: if (sBagID) {
1408: StrAllocMCopy(&fName, RDFMS, "bagID", NULL);
1409: HTElement_addAttribute(place_holder, fName, sBagID);
1410: HT_FREE(fName);
1411: StrAllocCopy(place_holder->m_sBagID, sBagID);
1412: }
1413: HTRDF_processDescription(me, place_holder, NO, NO, me->m_bCreateBags);
1414: } else {
1415:
1416: /* Nothing but xmlns or RDF stuff, so we don't need new element */
1417: HTElement_delete(place_holder);
1418: }
1419: }
1420:
1421: /*
1422: ** Tricky part: if the resource attribute is present for a predicate
1423: ** AND there are no children, the value of the predicate is either
1424: ** 1. the URI in the resource attribute OR
1425: ** 2. the node ID of the resolved #resource attribute
1426: */
1427: if (sResource && HTList_isEmpty(predicate->m_children)) {
1428: if (!HTElement_target(predicate)) {
1429: if (reificate) {
1430: HT_FREE(nsStatementID);
1431: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1432: sTarget, sResource,
1433: predicate->m_sID);
1434: StrAllocCopy(predicate->m_sID, nsStatementID);
1435: } else {
1436: HTRDF_addTriple(me, predicate->m_sName, sTarget, sResource);
1437: }
1438: } else {
1439: HTElement *target = HTElement_target(predicate);
1440: if (reificate) {
1441: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1442: sTarget,
1443: target->m_sID,
1444: predicate->m_sID);
1445: StrAllocCopy(predicate->m_sID, nsStatementID);
1446: } else {
1447: HTRDF_addTriple(me, predicate->m_sName, sTarget, target->m_sID);
1448: }
1449: }
1450: StrAllocCopy(nsStatementID, predicate->m_sID);
1451: return nsStatementID;
1452: }
1453:
1454: /*
1455: ** Does this predicate make a reference somewhere using the
1456: ** sResource attribute
1457: */
1458: if (sResource && HTElement_target(predicate)) {
1459: String dStatementID = HTRDF_processDescription(me,
1460: HTElement_target(predicate),
1461: YES, NO, NO);
1462: if (reificate) {
1463: HT_FREE(nsStatementID);
1464: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1465: sTarget, dStatementID,
1466: predicate->m_sID);
1467: StrAllocCopy(predicate->m_sID, nsStatementID);
1468: } else {
1469: StrAllocCopy(nsStatementID, dStatementID);
1470: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1471: }
1472: return nsStatementID;
1473: }
1474:
1475: /*
1476: ** Before looping through the children, let's check
1477: ** if there are any. If not, the value of the predicate is
1478: ** an anonymous node
1479: */
1480: {
1481: HTList *cur = predicate->m_children;
1482: BOOL bUsedTypedNodeProduction = NO;
1483: HTElement *n2;
1484: StrAllocCopy(nsStatementID, sStatementID);
1485: if (HTList_isEmpty(cur)) {
1486: if (reificate) {
1487: String nr = HTRDF_newReificationID(me);
1488: HT_FREE(nsStatementID);
1489: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1490: sTarget, nr,
1491: predicate->m_sID);
1492: HT_FREE(nr);
1493: } else {
1494: String nr = HTRDF_newReificationID(me);
1495: HTRDF_addTriple(me, predicate->m_sName, sTarget, nr);
1496: HT_FREE(nr);
1497: }
1498: }
1499: while ((n2= (HTElement *) HTList_nextObject(cur))) {
1500: if (HTRDF_isDescription(me, n2)) {
1501: HTElement *d2 = n2;
1502: String dStatementID =HTRDF_processDescription(me, d2, YES, NO, NO);
1503: StrAllocCopy(d2->m_sID, dStatementID);
1504:
1505: if (reificate) {
1506: HT_FREE(nsStatementID);
1507: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1508: sTarget, dStatementID,
1509: predicate->m_sID);
1510: } else {
1511: StrAllocCopy(nsStatementID, dStatementID);
1512: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1513: nsStatementID);
1514: }
1515: } else if (HTElement_instanceOfData(n2)) {
1516: String tValue = NULL, sValue = n2->m_sContent;
1517: /* we've got real data */
1518: /*
1519: * Only if the content is not empty PCDATA (whitespace that is)
1520: * print the triple
1521: */
1522: tValue = trim(sValue);
1523: if (tValue && strlen(tValue) > 0) {
1524: if (reificate) {
1525: HT_FREE(nsStatementID);
1526: nsStatementID = HTRDF_reificate (me, predicate->m_sName,
1527: sTarget, tValue,
1528: predicate->m_sID);
1529: StrAllocCopy(predicate->m_sID, nsStatementID);
1530: } else {
1531: HTRDF_addTriple(me, predicate->m_sName, sTarget, tValue);
1532: }
1533: }
1534: HT_FREE(tValue);
1535: } else if (HTRDF_isContainer(me, n2)) {
1536: HTElement *target = HTElement_target(description);
1537: String aboutTarget =
1538: target ?
1539: HTElement_getAttribute2(target, RDFMS, "about") : NULL;
1540: String sCollectionID = HTRDF_processContainer(me, n2);
1541: StrAllocCopy(nsStatementID, sCollectionID);
1542: /* Attach the collection to the current predicate */
1543: if (target) {
1544: if (reificate) {
1545: HT_FREE(nsStatementID);
1546: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1547: aboutTarget,
1548: sCollectionID,
1549: predicate->m_sID);
1550: StrAllocCopy(predicate->m_sID, nsStatementID);
1551: } else {
1552: HTRDF_addTriple(me, predicate->m_sName, aboutTarget,
1553: sCollectionID);
1554: }
1555: } else {
1556: if (reificate) {
1557: HT_FREE(nsStatementID);
1558: nsStatementID=HTRDF_reificate (me, predicate->m_sName,
1559: sTarget, sCollectionID,
1560: predicate->m_sID);
1561: StrAllocCopy(predicate->m_sID, nsStatementID);
1562: } else {
1563: HTRDF_addTriple(me, predicate->m_sName, sTarget,
1564: sCollectionID);
1565: }
1566: }
1567: HT_FREE(sCollectionID);
1568: } else if (HTRDF_isTypedPredicate(me, n2)) {
1569: if (bUsedTypedNodeProduction) {
1570: HTPrint("Only one typedNode allowed inside a predicate (Extra typedNode: %s )\n", n2->m_sName);
1571: } else {
1572: bUsedTypedNodeProduction = YES;
1573: }
1574: HT_FREE(nsStatementID);
1575: nsStatementID = HTRDF_processTypedNode(me, n2);
1576: HTRDF_addTriple(me, predicate->m_sName, sTarget, nsStatementID);
1577: }
1578: }
1579: return nsStatementID;
1580: } /* end of block */
1581: return NULL;
1582: }
1583:
1584: /*
1585: * processDescription manages Description elements
1586: *
1587: * description The Description element itself
1588: * inPredicate Is this is a nested description
1589: * reificate Do we need to reificate
1590: * createBag Do we create a bag container
1591: *
1592: * return An ID for the description
1593: *
1594: */
1595: PUBLIC String HTRDF_processDescription (HTRDF * me,
1596: HTElement * description,
1597: BOOL inPredicate,
1598: BOOL reificate,
1599: BOOL createBag)
1600: {
1601: int iChildCount = 1;
1602: BOOL bOnce = YES;
1603:
1604: String sAbout = HTElement_getAttribute2(description, RDFMS, "about");
1605: String sAboutEach = HTElement_getAttribute2(description, RDFMS, "aboutEach");
1606: String sAboutEachPrefix = HTElement_getAttribute2(description, RDFMS,
1607: "aboutEachPrefix");
1608: String sBagid = HTElement_getAttribute2(description, RDFMS, "bagID");
1609: String sID = HTElement_getAttribute2(description, RDFMS, "ID");
1610: HTElement *target = HTElement_target(description);
1611: BOOL hasTarget = HTList_isEmpty(description->m_vTargets) ? NO : YES;
1612: BOOL targetIsContainer = NO;
1613: String sTargetAbout = NULL;
1614: String sTargetBagID = NULL;
1615: String sTargetID = NULL;
1616: String dName = NULL;
1617: String aName = NULL;
1618:
1619: /*
1620: ** Return immediately if the description has already been managed
1621: */
1622: if (description->m_bDone) return description->m_sID;
1623:
1624: StrAllocMCopy(&dName, RDFMS, "Description", NULL);
1625: StrAllocMCopy(&aName, RDFMS, "about", NULL);
1626:
1627: /*
1628: ** Determine what the target of the Description reference is
1629: */
1630: if (hasTarget) {
1631: String sTargetID2 = HTElement_getAttribute2(target, RDFMS, "ID");
1632: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1633: sTargetBagID = HTElement_getAttribute2(target, RDFMS, "bagID");
1634: if (me->m_sSource && sTargetID2) {
1635: StrAllocMCopy(&sTargetID, me->m_sSource, sTargetID2, NULL);
1636: } else {
1637: StrAllocCopy(sTargetID, sTargetID2);
1638: }
1639: /*
1640: * Target is collection if
1641: * 1. it is identified with bagID attribute
1642: * 2. it is identified with ID attribute and is a collection
1643: */
1644: if (sTargetBagID && sAbout) {
1645: targetIsContainer = !strcmp(&(sAbout[1]), sTargetBagID);
1646: } else {
1647: if (sTargetID && sAbout && !strcmp(&(sAbout[1]), sTargetID) &&
1648: HTRDF_isContainer(me, target))
1649: targetIsContainer = YES;
1650: }
1651: HT_FREE(sTargetID);
1652: }
1653:
1654: /*
1655: * Check if there are properties encoded using the abbreviated
1656: * syntax
1657: */
1658: HTRDF_expandAttributes(me, description, description);
1659:
1660: /*
1661: * Manage the aboutEach attribute here
1662: */
1663: if (sAboutEach && hasTarget) {
1664: if (HTRDF_isContainer(me, target)) {
1665: HTList *cur = target->m_children;
1666: HTElement *ele = NULL;
1667: while ((ele= (HTElement *) HTList_nextObject(cur))) {
1668: if (HTRDF_isListItem(me, ele)) {
1669: String sResource = HTRDF_getResource(me, ele);
1670: if (sResource) {
1671: HTElement * newDescription = NULL;
1672: HTElement * ele2;
1673: HTList * cur2 = description->m_children;
1674:
1675: /*
1676: * Manage <li resource="..." /> case
1677: */
1678: if (sResource) {
1679: HTAssocList *newAL = HTAssocList_new();
1680: newDescription = HTElement_new(dName, newAL);
1681: HTElement_addAttribute(newDescription, aName, sResource);
1682: }
1683:
1684: while ((ele2 = (HTElement *) HTList_nextObject(cur2))){
1685: if (newDescription) HTElement_addChild(newDescription, ele2);
1686: }
1687:
1688: if (newDescription)
1689: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1690:
1691: /* Not needed anymore */
1692: HTElement_delete(newDescription);
1693:
1694: } else {
1695: /**
1696: * Otherwise we have a structured value inside <li>
1697: *
1698: * loop through the children of <li>
1699: * (can be only one)
1700: */
1701: HTList *cur2 = ele->m_children;
1702: HTElement *ele2 = NULL;
1703: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1704: HTAssocList *newAL = HTAssocList_new();
1705: HTElement *newNode = HTElement_new(dName, newAL);
1706: HTList *cur3 = description->m_children;
1707: HTElement *ele3 = NULL;
1708: /* loop through the items in the
1709: * description with aboutEach
1710: * and add them to the target
1711: */
1712: while ((ele3 = (HTElement *)
1713: HTList_nextObject(cur3))) {
1714: HTElement_addChild(newNode, ele3);
1715: }
1716: HTElement_addTarget(newNode, ele2);
1717: HTRDF_processDescription(me, newNode, YES, NO, NO);
1718: }
1719: }
1720: } else if (HTRDF_isTypedPredicate(me, ele)) {
1721: HTAssocList *newAL = HTAssocList_new();
1722: HTElement *newNode = HTElement_new(dName, newAL);
1723: HTList *cur2 = description->m_children;
1724: HTElement *ele2 = NULL;
1725: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1726: HTElement_addChild(newNode, ele2);
1727: }
1728: HTElement_addTarget(newNode, ele);
1729: HTRDF_processDescription(me, newNode, YES, NO, NO);
1730: }
1731: } /* end of while */
1732: } else if (HTRDF_isDescription(me, target)) {
1733: HTList *cur = target->m_children;
1734: HTElement *ele = NULL;
1735: while ((ele = (HTElement *) HTList_nextObject(cur))) {
1736: HTAssocList *newAL = HTAssocList_new();
1737: HTElement *newNode = HTElement_new(dName, newAL);
1738: HTList *cur2 = description->m_children;
1739: HTElement *ele2 = NULL;
1740: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1741: HTElement_addChild(newNode, ele2);
1742: }
1743: HTElement_addTarget(newNode, ele);
1744: HTRDF_processDescription(me, newNode, YES, NO, NO);
1745: } /* end of while */
1746: }
1747:
1748: HT_FREE(dName);
1749: HT_FREE(aName);
1750: return NULL;
1751: }
1752:
1753: /*
1754: * Manage the aboutEachPrefix attribute here
1755: */
1756: if (sAboutEachPrefix) {
1757: if (hasTarget) {
1758: HTList *cur = description->m_vTargets;
1759: HTElement *target = NULL;
1760: while ((target = (HTElement *) HTList_nextObject(cur))) {
1761: HTList *cur2 = description->m_children;
1762: HTElement *ele2 = NULL;
1763: HTElement *newDescription = NULL;
1764: HTAssocList *newAL = HTAssocList_new();
1765: sTargetAbout = HTElement_getAttribute2(target, RDFMS, "about");
1766: newDescription = HTElement_new(dName, newAL);
1767: HTElement_addAttribute(newDescription, aName, sTargetAbout);
1768: while ((ele2 = (HTElement *) HTList_nextObject(cur2))) {
1769: HTElement_addChild(newDescription, ele2);
1770: }
1771: HTRDF_processDescription(me, newDescription, NO, NO, NO);
1772: }
1773: }
1774:
1775: HT_FREE(dName);
1776: HT_FREE(aName);
1777: return NULL;
1778: }
1779: /*
1780: * Enumerate through the children
1781: */
1782: {
1783: HTList *cur = description->m_children;
1784: HTElement *n = NULL;
1785: while ((n = (HTElement *) HTList_nextObject(cur))) {
1786: if (HTRDF_isDescription(me, n))
1787: HTPrint("Can not nest Description inside Description\n");
1788: else if (HTRDF_isListItem(me, n))
1789: HTPrint("Can not nest List Item inside Description\n");
1790: else if (HTRDF_isContainer(me, n))
1791: HTPrint("Can not nest Container inside Description\n");
1792: else if (HTRDF_isTypedPredicate(me, n)) {
1793: String sChildID = NULL;
1794: if (hasTarget && targetIsContainer) {
1795: sChildID = HTRDF_processPredicate(me, n, description,
1796: target->m_sBagID ?
1797: target->m_sBagID :
1798: target->m_sID, NO);
1799: StrAllocCopy(description->m_sID, sChildID);
1800: createBag = NO;
1801: } else if (hasTarget) {
1802: sChildID = HTRDF_processPredicate(me, n, description,
1803: target->m_sBagID ?
1804: target->m_sBagID :
1805: target->m_sID, reificate);
1806: StrAllocCopy(description->m_sID, sChildID);
1807: } else if (!hasTarget && !inPredicate) {
1808: if (!description->m_sID) {
1809: String nr = HTRDF_newReificationID(me);
1810: StrAllocCopy(description->m_sID, nr);
1811: HT_FREE(nr);
1812: }
1813: if (!sAbout) {
1814: if (sID)
1815: sAbout = sID;
1816: else
1817: sAbout = description->m_sID;
1818: }
1819: sChildID = HTRDF_processPredicate(me, n, description,
1820: sAbout, sBagid ?
1821: YES : reificate);
1822:
1823: } else if (!hasTarget && inPredicate) {
1824: if (!sAbout) {
1825: if (sID) {
1826: StrAllocCopy(description->m_sID, sID);
1827: sAbout = sID;
1828: } else {
1829: if (!description->m_sID) {
1830: String nr = HTRDF_newReificationID(me);
1831: StrAllocCopy(description->m_sID, nr);
1832: HT_FREE(nr);
1833: }
1834: sAbout = description->m_sID;
1835: }
1836: } else {
1837: StrAllocCopy(description->m_sID, sAbout);
1838: }
1839: sChildID = HTRDF_processPredicate(me, n, description, sAbout, NO);
1840: }
1841: /*
1842: * Each Description block creates also a Bag node which
1843: * has links to all properties within the block IF
1844: * the m_bCreateBags variable is true
1845: */
1846: if (sBagid || (me->m_bCreateBags && createBag)) {
1847: String sNamespace = RDFMS;
1848: if (bOnce && sChildID) {
1849: String tName = NULL;
1850: String bName = NULL;
1851: bOnce = NO;
1852: if (!description->m_sBagID) {
1853: String nr = HTRDF_newReificationID(me);
1854: StrAllocCopy(description->m_sBagID, nr);
1855: HT_FREE(nr);
1856: }
1857: if (!description->m_sID)
1858: StrAllocCopy(description->m_sID,
1859: description->m_sBagID);
1860: StrAllocMCopy(&tName, sNamespace, "type", NULL);
1861: StrAllocMCopy(&bName, sNamespace, "Bag", NULL);
1862: HTRDF_addTriple(me, tName, description->m_sBagID, bName);
1863: HT_FREE(tName);
1864: HT_FREE(bName);
1865:
1866: }
1867: if (sChildID) {
1868: String tName = NULL;
1869: char si[20];
1870: sprintf(si, "%d", iChildCount);
1871: StrAllocMCopy(&tName, sNamespace, "_", si, NULL);
1872: HTRDF_addTriple(me, tName, description->m_sBagID, sChildID);
1873: iChildCount++;
1874: HT_FREE(tName);
1875: }
1876: }
1877: HT_FREE(sChildID);
1878: }
1879: }
1880: } /* end of block*/
1881:
1882: description->m_bDone = YES;
1883:
1884: HT_FREE(dName);
1885: HT_FREE(aName);
1886: return (description->m_sID);
1887: }
1888:
1889: /*
1890: * Given an XML document (well-formed HTML, for example),
1891: * look for a suitable element to start parsing from
1892: *
1893: */
1894: PUBLIC BOOL HTRDF_processXML (HTRDF *me, HTElement *ele)
1895: {
1896: if (me && ele) {
1897: if (HTRDF_isRDF(me, ele)) {
1898: if (HTRDF_isRDFroot(me, ele)) {
1899: HTRDF_processRDF(me, ele);
1900: } else if (HTRDF_isDescription(me, ele)) {
1901: HTRDF_processDescription(me, ele, NO, me->m_bCreateBags,
1902: me->m_bCreateBags);
1903: }
1904: } else {
1905: HTList *cur = ele->m_children;
1906: HTElement *child = NULL;
1907: while ((child = (HTElement *) HTList_nextObject(cur))) {
1908: HTRDF_processXML(me, child);
1909: }
1910: }
1911:
1912: /* MISSING RECURSION */
1913:
1914: return YES;
1915: }
1916: return NO;
1917: }
1918:
1919: /*
1920: * Return the root element pointer. This requires the parsing
1921: * has been already done.
1922: */
1923: PUBLIC HTElement * HTRDF_root (HTRDF *me)
1924: {
1925: return me ? me->m_root : NULL;
1926: }
1927:
1928: /*
1929: * Return the full namespace URI for a given prefix sPrefix.
1930: * The default namespace is identified with xmlns prefix.
1931: * The namespace of xmlns attribute is an empty string.
1932: */
1933:
1934: PUBLIC String HTRDF_namespace(HTRDF * me, String sPrefix)
1935: {
1936: String nPrefix = NULL;
1937: HTAssocList * calist;
1938: HTList * cur = me->m_namespaceStack;
1939:
1940: if (!sPrefix)
1941: StrAllocCopy(nPrefix, "xmlns");
1942:
1943: while ((calist = (HTAssocList *) HTList_nextObject(cur))) {
1944: String sValue = HTAssocList_findObjectCaseSensitiveExact(calist, sPrefix);
1945: if (sValue) {
1946: StrAllocCopy(nPrefix, sValue);
1947: return nPrefix;
1948: }
1949: }
1950: /*
1951: * Give error only if
1952: * 1. the prefix is not from the reserved xml namespace
1953: * 2. the prefix is not xmlns which is to look for the default
1954: * namespace
1955: */
1956: if (!strcmp(sPrefix, XMLSCHEMA)) {
1957: StrAllocCopy(nPrefix, sPrefix);
1958: return nPrefix;
1959: } else if (!strcmp(sPrefix, "xmlns")) {
1960: StrAllocCopy(nPrefix, "");
1961: return nPrefix;
1962: } else
1963: HTPrint("Unresolved Namespace prefix %s\n", sPrefix);
1964:
1965: StrAllocCopy(nPrefix, "");
1966: return nPrefix;
1967: }
1968:
1969: /*
1970: * Methods to determine whether we are parsing
1971: * parseType="Literal" or parseType="Resource"
1972: */
1973:
1974: PUBLIC BOOL HTRDF_parseLiteral(HTRDF *me)
1975: {
1976: HTElement *e = NULL;
1977: HTList *cur = me->m_elementStack;
1978: if (!HTList_isEmpty(me->m_elementStack)) {
1979: while((e = (HTElement *) HTList_nextObject(cur))) {
1980: String sParseType = NULL;
1981: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
1982: if (sParseType) {
1983: if (strcmp(sParseType, "Resource"))
1984: return YES;
1985: }
1986: }
1987: }
1988: return NO;
1989: }
1990:
1991: /*
1992: * Methods to determine whether we are parsing
1993: * parseType="Literal" or parseType="Resource"
1994: */
1995:
1996: PUBLIC BOOL HTRDF_parseResource(HTRDF *me)
1997: {
1998: HTElement *e = NULL;
1999: HTList *cur = me->m_elementStack;
2000: if (!HTList_isEmpty(me->m_elementStack)) {
2001: while((e = (HTElement *) HTList_nextObject(cur))) {
2002: String sParseType = NULL;
2003: sParseType = HTElement_getAttribute2(e, RDFMS, "parseType");
2004: if (sParseType) {
2005: if (!strcmp(sParseType, "Resource"))
2006: return YES;
2007: }
2008: }
2009: }
2010: return NO;
2011: }
2012: /*
2013: * checkAttributes goes through the attributes of element e<
2014: * to see
2015: * 1. if there are symbolic references to other nodes in the data model.
2016: * in which case they must be stored for later resolving with
2017: * resolveLater method.
2018: * 2. if there is an identity attribute, it is registered using
2019: * registerResource or registerID method.
2020: *
2021: */
2022:
2023: PRIVATE void HTRDF_checkAttributes(HTRDF *me, HTElement *e)
2024: {
2025: {
2026: String sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2027:
2028: if (sResource && sResource[0] == '#')
2029: HTRDF_resolveLater(me, e);
2030: }
2031: {
2032: String sAboutEach = HTElement_getAttribute2(e, RDFMS, "aboutEach");
2033:
2034: if (sAboutEach && sAboutEach[0] == '#')
2035: HTRDF_resolveLater(me, e);
2036: }
2037: {
2038: String sAboutEachPrefix = HTElement_getAttribute2(e, RDFMS,
2039: "aboutEachPrefix");
2040:
2041: if (sAboutEachPrefix && sAboutEachPrefix[0] == '#')
2042: HTRDF_resolveLater(me, e);
2043: }
2044: {
2045: String sAbout = HTElement_getAttribute2(e, RDFMS, "about");
2046: if (sAbout) {
2047: if (sAbout[0] == '#')
2048: HTRDF_resolveLater(me, e);
2049: else
2050: HTRDF_registerResource(me, e);
2051: }
2052: }
2053:
2054: {
2055: String sBagID = HTElement_getAttribute2(e, RDFMS, "bagID");
2056:
2057: if (sBagID) {
2058: HTRDF_registerID(me, sBagID, e);
2059: StrAllocCopy(e->m_sBagID, sBagID);
2060: }
2061: }
2062: {
2063: String sID = HTElement_getAttribute2(e, RDFMS, "ID");
2064: if (sID) {
2065: HTRDF_registerID(me, sID, e);
2066: StrAllocCopy(e->m_sID, sID);
2067: }
2068: }
2069: }
2070: /*
2071: * Add the element e to the m_vResolveQueue
2072: * to be resolved later.
2073: */
2074: PUBLIC void HTRDF_resolveLater(HTRDF *me, HTElement *e)
2075: {
2076: HTList_addObject(me->m_vResolveQueue, e);
2077: }
2078: /*
2079: * Add an element e to the Hashtable m_hIDtable
2080: * which stores all nodes with an ID
2081: */
2082:
2083: PUBLIC void HTRDF_registerID(HTRDF *me, String sID, HTElement *e)
2084: {
2085: if (HTHashtable_object(me->m_hIDtable, sID))
2086: HTPrint("Node ID %s redefined", sID);
2087: HTHashtable_addObject(me->m_hIDtable, sID, e);
2088: }
2089: /*
2090: * Add an element e to the Vector m_vResources
2091: * which stores all nodes with an URI
2092: */
2093: PUBLIC void HTRDF_registerResource(HTRDF *me, HTElement *e)
2094: {
2095: HTList_addObject(me->m_vResources, e);
2096: }
2097:
2098: /*
2099: * Look for a node by name sID from the Hashtable
2100: * m_hIDtable of all registered IDs.
2101: */
2102:
2103: PUBLIC HTElement *HTRDF_lookforNode(HTRDF *me, String sID)
2104: {
2105: if (sID)
2106: return (HTElement *) HTHashtable_object(me->m_hIDtable, sID);
2107: return NULL;
2108: }
2109:
2110: /*
2111: ** Special method to deal with rdf:resource attribute
2112: */
2113: PUBLIC String HTRDF_getResource(HTRDF *me, HTElement *e)
2114: {
2115: String sResource = HTElement_getAttribute2(e, RDFMS, "resource");
2116: if (sResource != NULL && sResource[0] == '\0')
2117: sResource = me->m_sSource;
2118: return sResource;
2119: }
2120:
2121: /*
2122: ** Take an element ele with its parent element parent
2123: ** and evaluate all its attributes to see if they are non-RDF specific
2124: ** and non-XML specific in which case they must become children of
2125: ** the ele node.
2126: */
2127: PRIVATE BOOL HTRDF_expandAttributes (HTRDF * me, HTElement * parent, HTElement * ele)
2128: {
2129: BOOL foundAbbreviation = NO;
2130: String sAttribute = NULL;
2131: String sValue = NULL;
2132: HTAssoc * assoc;
2133: HTAssocList * cur = ele->m_attributes;
2134: int lxmlschema = strlen(XMLSCHEMA);
2135: int lrdfms = strlen(RDFMS);
2136:
2137: while ((assoc= (HTAssoc *) HTList_nextObject(cur))) {
2138: int latt;
2139: sAttribute = HTAssoc_name(assoc);
2140: sValue = HTAssoc_value(assoc);
2141: latt = strlen(sAttribute);
2142: if (!strncmp(sAttribute, XMLSCHEMA, lxmlschema))
2143: continue;
2144:
2145: if (!strncmp(sAttribute, RDFMS, lrdfms) &&
2146: (sAttribute[lrdfms]!='_') &&
2147: latt > 5 && strcmp(&(sAttribute[latt-5]), "value") &&
2148: strcmp(&(sAttribute[latt-4]), "type"))
2149: continue;
2150:
2151: if (strlen(sValue) > 0) {
2152: HTAssocList * newAL = HTAssocList_new();
2153: HTElement * newElement = HTElement_new(sAttribute, newAL);
2154: HTElement * newData = HTElement_new2(sValue);
2155: HTElement_addChild(newElement, newData);
2156: HTElement_addChild(parent, newElement);
2157: foundAbbreviation = YES;
2158: }
2159: }
2160: return foundAbbreviation;
2161: }
2162:
2163: /**
2164: * Create a new reification ID by using a name part and an
2165: * incremental counter m_iReificationCounter.
2166: */
2167: PUBLIC String HTRDF_newReificationID (HTRDF *me)
2168: {
2169: String nsid = NULL;
2170: char nsrc[20];
2171: me->m_iReificationCounter++;
2172: sprintf(nsrc, "%d", me->m_iReificationCounter);
2173: if (!me->m_sSource) {
2174: StrAllocMCopy(&nsid, "genid", nsrc, NULL);
2175: } else {
2176: StrAllocMCopy(&nsid, me->m_sSource, "#genid", nsrc, NULL);
2177: }
2178: return nsid;
2179: }
2180:
2181: /*
2182: * reificate creates one new node and four new triples
2183: * and returns the ID of the new node
2184: */
2185:
2186: PRIVATE String HTRDF_reificate(HTRDF *me, String sPredicate, String sSubject,
2187: String sObject, String sNodeID)
2188: {
2189: String sName = NULL;
2190: String pName = NULL;
2191: String oName = NULL;
2192: String tName = NULL;
2193: String stName = NULL;
2194: String tNodeID = NULL;
2195:
2196: if (!sNodeID)
2197: tNodeID = HTRDF_newReificationID(me);
2198: else
2199: StrAllocCopy(tNodeID, sNodeID);
2200:
2201: StrAllocMCopy(&sName, RDFMS, "subject", NULL);
2202: StrAllocMCopy(&pName, RDFMS, "predicate", NULL);
2203: StrAllocMCopy(&oName, RDFMS, "object", NULL);
2204: StrAllocMCopy(&tName, RDFMS, "type", NULL);
2205: StrAllocMCopy(&stName, RDFMS, "Statement", NULL);
2206:
2207: /*
2208: * The original statement must remain in the data model
2209: */
2210: HTRDF_addTriple(me, sPredicate, sSubject, sObject);
2211:
2212: /*
2213: * Do not reificate reificated properties
2214: */
2215: if (strcmp(sPredicate, sName) && strcmp(sPredicate, pName) &&
2216: strcmp(sPredicate, oName) && strcmp(sPredicate, tName)) {
2217:
2218: /* Reificate by creating 4 new triples */
2219: HTRDF_addTriple(me, pName, tNodeID, sPredicate);
2220: HTRDF_addTriple(me, sName, tNodeID, (sSubject[0]=='\0' ? me->m_sSource: sSubject));
2221: HTRDF_addTriple(me, oName, tNodeID, sObject);
2222: HTRDF_addTriple(me, tName, tNodeID, stName);
2223: } else
2224: HT_FREE(tNodeID);
2225:
2226: HT_FREE(sName);
2227: HT_FREE(pName);
2228: HT_FREE(oName);
2229: HT_FREE(tName);
2230: HT_FREE(stName);
2231:
2232: return tNodeID;
2233: }
2234: /*
2235: * Create a new triple and add it to the m_triples List
2236: * Send the triple to the Output stream
2237: */
2238:
2239: PUBLIC void HTRDF_addTriple (HTRDF *me, String sPredicate, String sSubject,
2240: String sObject)
2241: {
2242: HTTriple *t = NULL;
2243:
2244: /*
2245: * If there is no subject (about=""), then use the URI/filename where
2246: * the RDF description came from
2247: */
2248: if (!sPredicate || !sSubject || !sObject) {
2249: HTPrint("Predicate %s when subject %s and object %s \n",
2250: sPredicate ? sPredicate : "null",
2251: sSubject ? sSubject : "null",
2252: sObject ? sObject : "null");
2253: return;
2254: }
2255:
2256: if (sSubject[0]=='\0')
2257: sSubject = me->m_sSource;
2258:
2259: t = HTTriple_new(sPredicate, sSubject, sObject);
2260:
2261: /* Call the triple callback handler (if any) with this new triple */
2262: if (me->newTripleInstance && t) (*(me->newTripleInstance))(me, t, me->tripleContext);
2263:
2264: HTList_addObject(me->m_triples, t);
2265: }
2266:
2267: /*
2268: * createBags method allows one to determine whether SiRPAC
2269: * produces Bag instances for each Description block.
2270: * The default setting is not to generate them.
2271: */
2272:
2273: PUBLIC void HTRDF_createBags(HTRDF *me, BOOL b)
2274: {
2275: if (me)
2276: me->m_bCreateBags = b;
2277: }
2278:
2279: /*
2280: Set output stream for RDF parser
2281: */
2282:
2283: PUBLIC void HTRDF_setOutputStream(HTRDF *me, HTStream *ostream)
2284: {
2285: if (me)
2286: me->ostream = ostream;
2287: }
2288:
2289: PUBLIC BOOL HTRDF_registerNewTripleCallback (HTRDF * me, HTTripleCallback_new * cbf, void * context)
2290: {
2291: if (me) {
2292: me->newTripleInstance = cbf;
2293: me->tripleContext = context;
2294: return YES;
2295: }
2296: return NO;
2297: }
2298:
2299: PUBLIC BOOL HTRDF_registerNewParserCallback (HTRDFCallback_new * me, void * context)
2300: {
2301: RDFInstance = me;
2302: RDFInstanceContext = context;
2303: return YES;
2304: }
2305:
2306: /* ------------------------------------------------------------------------- */
2307: /* HTRDFTriples STREAM HANDLERS */
2308: /* ------------------------------------------------------------------------- */
2309:
2310: PRIVATE int generate_triples(HTStream *me)
2311: {
2312: HTRDF *rdfp = me ? me->rdfparser : NULL;
2313: if (rdfp) {
2314:
2315: HTRDF_resolve(rdfp);
2316:
2317: HTRDF_processXML(rdfp, HTRDF_root(rdfp));
2318:
2319: return HT_OK;
2320: }
2321: return HT_ERROR;
2322: }
2323:
2324: PRIVATE int HTRDFTriples_flush (HTStream * me)
2325: {
2326: if (me->target)
2327: return (*me->target->isa->flush)(me->target);
2328: return HT_OK;
2329: }
2330:
2331: PRIVATE int HTRDFTriples_free (HTStream * me)
2332: {
2333: int status = HT_OK;
2334:
2335: status = generate_triples(me);
2336:
2337: HTRDF_delete(me->rdfparser);
2338:
2339: if (me->target) {
2340: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
2341: return HT_WOULD_BLOCK;
2342: }
2343: HTTRACE(XML_TRACE, "RDF Parser.. FREEING...\n");
2344: HT_FREE(me);
2345: return status;
2346: }
2347:
2348: PRIVATE int HTRDFTriples_abort (HTStream * me, HTList * e)
2349: {
2350: HTTRACE(XML_TRACE, "RDF Parser.. ABORTING...\n");
2351: HTRDF_delete(me->rdfparser);
2352: if (me->target)
2353: (*me->target->isa->abort)(me->target, NULL);
2354: HT_FREE(me);
2355: return HT_ERROR;
2356: }
2357:
2358: PRIVATE int HTRDFTriples_write (HTStream * me, const char * buf, int len)
2359: {
2360: return HT_OK;
2361: }
2362:
2363: PRIVATE int HTRDFTriples_putCharacter (HTStream * me, char c)
2364: {
2365: return HTRDFTriples_write(me, &c, 1);
2366: }
2367:
2368: PRIVATE int HTRDFTriples_putString (HTStream * me, const char * s)
2369: {
2370: return HTRDFTriples_write(me, s, (int) strlen(s));
2371: }
2372:
2373: PRIVATE const HTStreamClass HTRDFTriplesClass =
2374: {
2375: "rdf",
2376: HTRDFTriples_flush,
2377: HTRDFTriples_free,
2378: HTRDFTriples_abort,
2379: HTRDFTriples_putCharacter,
2380: HTRDFTriples_putString,
2381: HTRDFTriples_write
2382: };
2383:
2384: PRIVATE HTStream * RDFParser_new (HTRequest * request,
2385: void * param,
2386: HTFormat input_format,
2387: HTFormat output_format,
2388: HTStream * output_stream)
2389: {
2390: HTStream * me = NULL;
2391: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
2392: HT_OUTOFMEM("HTRDFTriples_new");
2393: me->isa = &HTRDFTriplesClass;
2394: me->state = HT_OK;
2395: me->request = request;
2396: me->target = output_stream ? output_stream : HTErrorStream();
2397:
2398: /* Now create the RDF parser instance */
2399: if ((me->rdfparser = HTRDF_new()) == NULL) {
2400: HT_FREE(me);
2401: return HTErrorStream();
2402: }
2403:
2404: /* Set the source (I guess mostly to follow SiRPAC API) */
2405: {
2406: char * uri = HTAnchor_address((HTAnchor *) HTRequest_anchor(request));
2407: HTRDF_setSource(me->rdfparser, uri);
2408: HT_FREE(uri);
2409: }
2410:
2411: /* Where are we putting data? */
2412: HTRDF_setOutputStream(me->rdfparser, me);
2413:
2414: /* If you want to create Bags, change it to YES */
2415: HTRDF_createBags(me->rdfparser, NO);
2416:
2417: /* Register our new XML Instance handler */
2418: /* @@@ THIS SHOULD BE DONE USING XML NAMESPACE SO THAT WE DON'T CONFLICT @@@ */
2419: HTXMLCallback_registerNew(rdf_newInstance, me->rdfparser);
2420:
2421: HTTRACE(XML_TRACE, "RDF Parser.. Stream created\n");
2422:
2423: return me;
2424: }
2425:
2426: PUBLIC HTStream * HTRDFParser_new (HTRequest * request,
2427: void * param,
2428: HTFormat input_format,
2429: HTFormat output_format,
2430: HTStream * output_stream)
2431: {
2432: return HTXML_new(request, param, input_format, output_format,
2433: RDFParser_new(request, param, input_format, output_format, output_stream));
2434: }
2435:
2436: PRIVATE void triple_newInstance (HTRDF * rdfp, HTTriple * t, void * context)
2437: {
2438: if (rdfp && t) {
2439: HTStream *ostream = rdfp->ostream;
2440: if (ostream) {
2441: PUTC(ostream,'(');
2442: PUTS(ostream, t->m_sPredicate);
2443: PUTC(ostream,',');
2444: PUTS(ostream, t->m_sSubject);
2445: PUTC(ostream,',');
2446: PUTS(ostream, t->m_sObject);
2447: PUTC(ostream,')');
2448: PUTC(ostream,'\n');
2449: }
2450: }
2451: }
2452:
2453: PUBLIC HTStream * HTRDFToTriples (HTRequest * request,
2454: void * param,
2455: HTFormat input_format,
2456: HTFormat output_format,
2457: HTStream * output_stream)
2458: {
2.2 ! frystyk 2459: HTStream * me = RDFParser_new(request, param, input_format, output_format, output_stream);
2.1 frystyk 2460: HTTRACE(XML_TRACE, "RDF Converter. To Triples\n");
2461:
2462: /* Register our own tripple instance handler */
2463: HTRDF_registerNewTripleCallback(me->rdfparser, triple_newInstance, NULL);
2464:
2465: /* Create an XML parser instance and return */
2466: return HTXML_new(request, param, input_format, output_format, me);
2467: }
2468:
Webmaster