Annotation of libwww/Library/src/HTMLPDTD.c, revision 2.15
2.12 frystyk 1: /* HTMLPDTD.c
2: ** STATIC DTD FOR HTML
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** 6 Nov 93 MD Increased size of img_attr array to make space
8: ** for terminator.
9: */
10:
2.14 frystyk 11: /* Library include files */
12: #include "tcp.h"
13: #include "HTUtils.h"
2.1 timbl 14: #include "HTMLPDTD.h"
15:
16: /* Entity Names
17: ** ------------
18: **
19: ** This table must be matched exactly with ALL the translation tables
20: */
2.11 timbl 21:
2.10 timbl 22: static CONST char* entities[HTML_ENTITIES] = {
2.1 timbl 23: "AElig", /* capital AE diphthong (ligature) */
24: "Aacute", /* capital A, acute accent */
25: "Acirc", /* capital A, circumflex accent */
26: "Agrave", /* capital A, grave accent */
27: "Aring", /* capital A, ring */
28: "Atilde", /* capital A, tilde */
29: "Auml", /* capital A, dieresis or umlaut mark */
30: "Ccedil", /* capital C, cedilla */
31: "ETH", /* capital Eth, Icelandic */
32: "Eacute", /* capital E, acute accent */
33: "Ecirc", /* capital E, circumflex accent */
34: "Egrave", /* capital E, grave accent */
35: "Euml", /* capital E, dieresis or umlaut mark */
36: "Iacute", /* capital I, acute accent */
37: "Icirc", /* capital I, circumflex accent */
38: "Igrave", /* capital I, grave accent */
39: "Iuml", /* capital I, dieresis or umlaut mark */
40: "Ntilde", /* capital N, tilde */
41: "Oacute", /* capital O, acute accent */
42: "Ocirc", /* capital O, circumflex accent */
43: "Ograve", /* capital O, grave accent */
44: "Oslash", /* capital O, slash */
45: "Otilde", /* capital O, tilde */
46: "Ouml", /* capital O, dieresis or umlaut mark */
47: "THORN", /* capital THORN, Icelandic */
48: "Uacute", /* capital U, acute accent */
49: "Ucirc", /* capital U, circumflex accent */
50: "Ugrave", /* capital U, grave accent */
51: "Uuml", /* capital U, dieresis or umlaut mark */
52: "Yacute", /* capital Y, acute accent */
53: "aacute", /* small a, acute accent */
54: "acirc", /* small a, circumflex accent */
55: "aelig", /* small ae diphthong (ligature) */
56: "agrave", /* small a, grave accent */
57: "amp", /* ampersand */
58: "aring", /* small a, ring */
59: "atilde", /* small a, tilde */
60: "auml", /* small a, dieresis or umlaut mark */
61: "ccedil", /* small c, cedilla */
62: "eacute", /* small e, acute accent */
63: "ecirc", /* small e, circumflex accent */
64: "egrave", /* small e, grave accent */
65: "eth", /* small eth, Icelandic */
66: "euml", /* small e, dieresis or umlaut mark */
67: "gt", /* greater than */
68: "iacute", /* small i, acute accent */
69: "icirc", /* small i, circumflex accent */
70: "igrave", /* small i, grave accent */
71: "iuml", /* small i, dieresis or umlaut mark */
72: "lt", /* less than */
73: "ntilde", /* small n, tilde */
74: "oacute", /* small o, acute accent */
75: "ocirc", /* small o, circumflex accent */
76: "ograve", /* small o, grave accent */
77: "oslash", /* small o, slash */
78: "otilde", /* small o, tilde */
2.10 timbl 79: "ouml", /* small o, dieresis or umlaut mark */
80: "quot", /* double quote sign - June 1994 */
2.1 timbl 81: "szlig", /* small sharp s, German (sz ligature) */
82: "thorn", /* small thorn, Icelandic */
83: "uacute", /* small u, acute accent */
84: "ucirc", /* small u, circumflex accent */
85: "ugrave", /* small u, grave accent */
86: "uuml", /* small u, dieresis or umlaut mark */
87: "yacute", /* small y, acute accent */
88: "yuml", /* small y, dieresis or umlaut mark */
89: };
90:
91:
92:
93: /* Attribute Lists
94: ** ---------------
95: **
96: ** Lists must be in alphatbetical order by attribute name
97: ** The tag elements contain the number of attributes
98: */
99: static attr no_attr[1] =
100: {{ 0 }};
101:
102: static attr a_attr[HTML_A_ATTRIBUTES+1] = { /* Anchor attributes */
103: { "EFFECT" },
104: { "HREF"},
105: { "ID" },
106: { "METHODS" },
107: { "NAME" }, /* Should be ID */
108: { "PRINT" },
109: { "REL" }, /* Relationship */
110: { "REV" }, /* Reverse relationship */
111: { "SHAPE" },
112: { "TITLE" },
113: { 0 } /* Terminate list */
114: };
115:
116: static attr base_attr[] = { /* BASE attributes */
117: { "HREF"},
118: { 0 } /* Terminate list */
119: };
120:
121:
122: static attr changed_attr[] = {
123: { "ID" },
124: { "IDREF" },
125: { 0 } /* terminate list */
126: };
127:
128: static attr fig_attr[] = { /* Figures */
129: { "ALIGN" },
130: { "ID" },
131: { "INDEX" },
132: { "ISMAP" }, /* @@@ NO! */
133: { "LANG" },
134: { "SRC" },
135: { 0 } /* terminate list */
136: };
137:
138: static attr form_attr[] = { /* General, for many things */
139: { "ACTION" },
140: { "ID" },
141: { "INDEX" },
142: { "LANG" },
143: { "METHOD" },
144: { 0 } /* terminate list */
145: };
146:
147: static attr gen_attr[] = { /* General, for many things */
148: { "ID" },
149: { "INDEX" },
150: { "LANG" },
151: { 0 } /* terminate list */
152: };
153:
154: static attr htmlplus_attr[] = { /* wrapper HTMLPLUS */
155: { "FORMS" },
156: { "VERSION" },
157: { 0 } /* terminate list */
158: };
159:
160: static attr id_attr[2] = {
161: { "ID" },
162: { 0 } /* terminate list */
163: };
164:
165: static attr image_attr[HTML_IMAGE_ATTRIBUTES+1] = { /* Image attributes */
166: { "ALIGN" },
167: { "ISMAP"}, /* Use HTTP SpaceJump instead */
168: { "LANG" },
169: { "SEETHRU"},
170: { "SRC"},
171: { 0 } /* Terminate list */
172: };
173:
174: static attr img_attr[HTML_IMG_ATTRIBUTES+1] = { /* IMG attributes */
175: { "ALIGN" },
176: { "ALT" },
177: { "ISMAP"}, /* Use HTTP SpaceJump instead */
178: { "SEETHRU"},
179: { "SRC"},
180: { 0 } /* Terminate list */
181: };
182:
183: static attr input_attr[HTML_INPUT_ATTRIBUTES+1] = {
184: { "ALIGN" },
185: { "CHECKED" },
186: { "DISABLED" },
187: { "ERROR" },
2.15 ! frystyk 188: { "MAXLENGTH" }, /* WSM bug fix, was MAX */
2.1 timbl 189: { "MIN" },
190: { "NAME" },
191: { "SIZE" },
192: { "SRC" },
193: { "TYPE" },
194: { "VALUE" },
2.2 timbl 195: { "0" }
2.1 timbl 196: };
197:
198: static attr l_attr[] = {
199: { "ALIGN"},
200: { "ID" },
201: { "LANG" },
202: { "INDEX" },
203: { 0 } /* Terminate list */
204: };
205:
206: static attr li_attr[] = {
207: { "ID" },
208: { "LANG" },
209: { "INDEX" },
210: { "SRC"},
211: { 0 } /* Terminate list */
212: };
213:
214: static attr link_attr[HTML_LINK_ATTRIBUTES+1] = { /* link attributes */
215: { "HREF"},
216: { "IDREF" },
217: { "METHODS" },
218: { "REL" }, /* Relationship */
219: { "REV" }, /* Reverse relationship */
220: { 0 } /* Terminate list */
221: };
222:
223: static attr list_attr[] = {
224: { "COMPACT"},
225: { "ID" },
226: { "LANG" },
227: { "INDEX" },
228: { 0 } /* Terminate list */
229: };
230:
2.3 frystyk 231: static attr glossary_attr[HTML_DL_ATTRIBUTES+1] = {
232: { "ID" },
233: { "COMPACT " },
234: { "INDEX" },
2.1 timbl 235: { 0 } /* Terminate list */
236: };
237:
238: static attr nextid_attr[HTML_NEXTID_ATTRIBUTES+1] = {
239: { "N" },
240: { 0 } /* Terminate list */
241: };
242:
243: static attr note_attr[HTML_NOTE_ATTRIBUTES+1] = { /* Footnotes etc etc */
244: { "ID" },
245: { "INDEX" },
246: { "LANG" },
247: { "ROLE" },
248: { 0 } /* terminate list */
249: };
250:
251: static attr option_attr[HTML_OPTION_ATTRIBUTES+1] = {
252: { "DISABLED" },
253: { "LANG" },
254: { "SELECTED" },
255: { 0 }
256: };
257:
258: #ifdef OLD_CODE
259: static attr pre_attr[HTML_PRE_ATTRIBUTES+1] = {
260: { "WIDTH" },
261: { 0 } /* Terminate list */
262: };
263: #endif
264:
265: static attr render_attr[HTML_RENDER_ATTRIBUTES+1] = {
266: { "STYLE" },
267: { "TAG" },
268: { 0 } /* Terminate list */
269: };
270:
271: static attr select_attr[HTML_SELECT_ATTRIBUTES+1] = {
272: { "ERROR" },
273: { "LANG" },
2.15 ! frystyk 274: { "MULTIPLE" }, /* WSM bug fix, was SEVERAL */
2.1 timbl 275: { "NAME" },
2.15 ! frystyk 276: { "SIZE" }, /* WSM bug fix, missing */
2.1 timbl 277: { 0 },
278: };
279:
280: static attr tab_attr[HTML_TAB_ATTRIBUTES+1] = {
281: { "ALIGN" },
282: { "AT" },
283: { 0 }
284: };
285:
286: static attr table_attr[HTML_TABLE_ATTRIBUTES+1] = {
287: { "BORDER" },
288: { "ID" },
289: { "INDEX" },
290: { "LANG" },
291: { 0 }
292: };
293:
294: static attr td_attr[HTML_TD_ATTRIBUTES+1] = {
295: { "ALIGN" },
296: { "COLSPAN" },
297: { "ROWSPAN" },
298: { "TAG" },
299: { 0 }
300: };
301:
302: static attr textarea_attr[HTML_TEXTAREA_ATTRIBUTES+1] = {
303: { "COLS" },
304: { "DISABLED" },
305: { "ERROR" },
306: { "LANG" },
307: { "NAME" },
308: { "ROWS" },
309: { 0 }
310: };
311:
312: static attr ul_attr[HTML_UL_ATTRIBUTES+1] = {
313: { "COMPACT" },
314: { "ID" },
315: { "INDEX" },
316: { "LANG" },
317: { "PLAIN" },
318: { "WRAP" },
319: { 0 }
320: };
321:
322:
323: /* Elements
324: ** --------
325: **
326: ** Must match definitions in HTMLPDTD.html!
327: ** Must be in alphabetical order.
328: **
329: ** HTML is included to allow HTML documents to be parsed as a subset.
2.4 frystyk 330: ** TITLE is changed to type SGML_MIXED so it get parsed as well
331: ** Henrik 08/03-94
2.1 timbl 332: **
333: ** Name, Attributes, content
334: */
335: static HTTag tags[HTMLP_ELEMENTS] = {
336: { "A" , a_attr, HTML_A_ATTRIBUTES, SGML_MIXED },
337: { "ABBREV" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
338: { "ABSTRACT" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
339: { "ACRONYM" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
340: { "ADDED" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
341: { "ADDRESS" , no_attr, 0, SGML_MIXED },
342: { "ARG" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
343: { "B" , no_attr, 0, SGML_MIXED },
344: { "BASE" , base_attr, HTML_BASE_ATTRIBUTES, SGML_MIXED },
345: { "BLOCKQUOTE", no_attr, 0, SGML_MIXED },
346: { "BODY" , no_attr, 0, SGML_MIXED },
347: { "BOX" , no_attr, 0, SGML_MIXED },
348: { "BR" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY },
349: { "BYLINE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
350: { "CAPTION" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
351: { "CHANGED" , changed_attr, HTML_CHANGED_ATTRIBUTES, SGML_MIXED },
352: { "CITE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
353: { "CMD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
354: { "CODE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
355: { "COMMENT" , no_attr, 0, SGML_MIXED },
356: { "DD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY },
357: { "DFN" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
358: { "DIR" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
359: { "DL" , glossary_attr,HTML_DL_ATTRIBUTES, SGML_MIXED },
360: { "DT" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY },
361: { "EM" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
362: { "FIG" , fig_attr, HTML_FIG_ATTRIBUTES, SGML_MIXED },
363: { "FOOTNOTE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
364: { "FORM" , form_attr, HTML_FORM_ATTRIBUTES, SGML_MIXED },
365: { "H1" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
366: { "H2" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
367: { "H3" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
368: { "H4" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
369: { "H5" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
370: { "H6" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
371: { "H7" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
372: { "HEAD" , no_attr, 0, SGML_MIXED },
373: { "HR" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY },
374: { "HTML" , no_attr, 0, SGML_MIXED }, /* */
2.2 timbl 375: { "HTMLPLUS", htmlplus_attr,HTML_HTMLPLUS_ATTRIBUTES, SGML_MIXED },
2.1 timbl 376: { "I" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
377: { "IMAGE" , image_attr, HTML_IMAGE_ATTRIBUTES, SGML_EMPTY },
2.7 frystyk 378: { "IMG" , img_attr, HTML_IMG_ATTRIBUTES, SGML_EMPTY },
2.1 timbl 379: { "INPUT" , input_attr, HTML_INPUT_ATTRIBUTES, SGML_EMPTY },
380: { "ISINDEX" , no_attr, 0, SGML_EMPTY },
381: { "KBD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
382: { "L" , l_attr, HTML_L_ATTRIBUTES, SGML_MIXED },
383: { "LI" , li_attr, HTML_LI_ATTRIBUTES, SGML_EMPTY },
384: { "LINK" , link_attr, HTML_LINK_ATTRIBUTES, SGML_EMPTY },
385: { "LISTING" , no_attr, 0, SGML_LITERAL },
386: { "LIT" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
387: { "MARGIN" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
388: { "MATH" , id_attr, HTML_ID_ATTRIBUTE, SGML_MIXED },
389: { "MENU" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
390: { "NEXTID" , nextid_attr, 1, SGML_EMPTY },
391: { "NOTE" , note_attr, HTML_NOTE_ATTRIBUTES, SGML_EMPTY },
392: { "OL" , list_attr, HTML_LIST_ATTRIBUTES, SGML_MIXED },
2.8 timbl 393: { "OPTION" , option_attr, HTML_OPTION_ATTRIBUTES, SGML_EMPTY },/*Mixed?*/
2.1 timbl 394: { "OVER" , no_attr, 0, SGML_MIXED },
395: { "P" , l_attr, HTML_L_ATTRIBUTES, SGML_EMPTY },
396: { "PERSON" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
397: { "PLAINTEXT", no_attr, 0, SGML_LITERAL },
398: { "PRE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
399: { "Q" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
400: { "QUOTE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
401: { "RENDER" , render_attr, HTML_RENDER_ATTRIBUTES, SGML_MIXED },
402: { "REMOVED" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
403: { "S" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
404: { "SAMP" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
405: { "SELECT" , select_attr, HTML_SELECT_ATTRIBUTES, SGML_MIXED },
406: { "STRONG" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
407: { "SUB" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
408: { "SUP" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
409: { "TAB" , tab_attr, HTML_TAB_ATTRIBUTES, SGML_EMPTY },
410: { "TABLE" , table_attr, HTML_TABLE_ATTRIBUTES, SGML_MIXED },
411: { "TD" , td_attr, HTML_TABLE_ATTRIBUTES, SGML_MIXED },
412: { "TEXTAREA", textarea_attr,HTML_TEXTAREA_ATTRIBUTES,SGML_MIXED},
413: { "TH" , td_attr, HTML_TD_ATTRIBUTES, SGML_MIXED },
2.4 frystyk 414: { "TITLE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
2.1 timbl 415: { "TR" , id_attr, HTML_ID_ATTRIBUTE, SGML_MIXED },
416: { "TT" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
417: { "U" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
418: { "UL" , ul_attr, HTML_UL_ATTRIBUTES, SGML_MIXED },
419: { "VAR" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED },
420: { "XMP" , no_attr, 0, SGML_LITERAL }
421: };
422:
423:
424: #ifdef OLD_CODE
425: PUBLIC CONST SGML_dtd HTML_dtd = {
426: tags,
427: HTML_ELEMENTS,
428: entities,
429: sizeof(entities)/sizeof(char**)
430: };
431: #endif
432:
433: PUBLIC CONST SGML_dtd HTMLP_dtd = {
434: tags,
435: HTMLP_ELEMENTS,
436: entities,
437: sizeof(entities)/sizeof(char**)
438: };
439:
440: /* Utility Routine: useful for people building HTML objects */
441:
442: /* Start anchor element
443: ** --------------------
444: **
445: ** It is kinda convenient to have a particulr routine for
446: ** starting an anchor element, as everything else for HTML is
447: ** simple anyway.
448: */
449: struct _HTStructured {
450: HTStructuredClass * isa;
451: /* ... */
452: };
453:
454: PUBLIC void HTStartAnchor ARGS3(HTStructured *, obj,
455: CONST char *, name,
456: CONST char *, href)
457: {
458: BOOL present[HTML_A_ATTRIBUTES];
459: CONST char* value[HTML_A_ATTRIBUTES];
460:
461: {
462: int i;
463: for(i=0; i<HTML_A_ATTRIBUTES; i++)
464: present[i] = NO;
465: }
466: if (name) {
467: present[HTML_A_NAME] = YES;
468: value[HTML_A_NAME] = name;
469: }
470: if (href) {
471: present[HTML_A_HREF] = YES;
472: value[HTML_A_HREF] = href;
473: }
474:
475: (*obj->isa->start_element)(obj, HTML_A , present, value);
476:
477: }
478:
2.5 frystyk 479: /* Utility Routine: useful for people building HTML objects */
480:
481: /* Put image element
482: ** --------------------
483: **
484: ** Hopefully as usefull as HTStartAnchor. Henrik 23/03-94
485: */
486: PUBLIC void HTMLPutImg ARGS4(HTStructured *, obj,
487: CONST char *, src,
488: CONST char *, alt,
489: CONST char *, align)
490: {
491: BOOL present[HTML_IMG_ATTRIBUTES];
492: CONST char* value[HTML_IMG_ATTRIBUTES];
493:
494: {
495: int i;
496: for(i=0; i<HTML_IMG_ATTRIBUTES; i++)
497: present[i] = NO;
498: }
499: if (src) {
500: present[HTML_IMG_SRC] = YES;
501: value[HTML_IMG_SRC] = src;
502: }
503: if (alt) {
504: present[HTML_IMG_ALT] = YES;
505: value[HTML_IMG_ALT] = alt;
506: }
507: if (align) {
508: present[HTML_IMG_ALIGN] = YES;
509: value[HTML_IMG_ALIGN] = align;
510: }
511: (*obj->isa->start_element)(obj, HTML_IMG , present, value);
2.7 frystyk 512: /* (*obj->isa->end_element)(obj, HTML_IMG); */
2.5 frystyk 513: }
514:
515:
2.1 timbl 516: PUBLIC void HTNextID ARGS2(HTStructured *, obj,
2.9 timbl 517: CONST char *, next_one)
2.1 timbl 518: {
519: BOOL present[HTML_NEXTID_ATTRIBUTES];
520: CONST char* value[HTML_NEXTID_ATTRIBUTES];
521: {
522: int i;
523: for(i=0; i<HTML_NEXTID_ATTRIBUTES; i++)
524: present[i] = NO;
525: }
526: present[HTML_NEXTID_N] = YES;
2.9 timbl 527: value[HTML_NEXTID_N] = next_one;
2.1 timbl 528:
529: (*obj->isa->start_element)(obj, HTML_NEXTID , present, value);
530:
531: }
2.5 frystyk 532:
533:
2.1 timbl 534:
Webmaster