The end_element function in sgml.c hanles ill-nested tags pretty well, ignoring bad end tags or assuming corresponding start tags. The rule used now states that if we encounter an ending tag without the corresponding start tag on the top of the element=20 stack, we do one of the following:
<HTML> <BODY> <UL> <LI> <A...> <B> ... </A> </I> </B> <LI> ... </UL> </BODY> </HTML>There are two errors in this example:
<HTML> <BODY> <UL> <LI> <A...> <B> ... </B> </A> </UL> </BODY> <LI> ... </HTML>This is because:
So, after this modification the example is parsed as:
<HTML> <BODY> <UL> <LI> <A...> <B> ... </B> </A> <LI> ... </UL> </BODY> </HTML>Here's the patch:
/* ** Helper function to check if the tag is on the stack */ PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag) { HTElement* elem; for (elem = stack; elem != NULL; elem = elem->next) { if (elem->tag == tag) return TRUE; } return FALSE; } /* ** Modified end_element function ** Only one line is added, it's marked with <== */ PRIVATE void end_element (HTStream * context, HTTag * old_tag) { if (SGML_TRACE) TTYPrint(TDEST, "SGML: End </%s>\n", old_tag->name); if (old_tag->contents == SGML_EMPTY) { if (SGML_TRACE) TTYPrint(TDEST,"SGML: Illegal end tag </%s> found.\n", old_tag->name); return; } while (context->element_stack) {/* Loop is error path only */ HTElement * N = context->element_stack; HTTag * t = N->tag; if (old_tag != t) { /* Mismatch: syntax error */ if (context->element_stack->next /* This is not the last level */ && lookup_element_stack(context->element_stack, old_tag)) { /* <== */ if (SGML_TRACE) TTYPrint(TDEST, "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n", old_tag->name, t->name, t->name); } else { /* last level */ if (SGML_TRACE) TTYPrint(TDEST, "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n", old_tag->name, t->name, old_tag->name); return; /* Ignore */ } } context->element_stack = N->next; /* Remove from stack */ free(N); (*context->actions->end_element)(context->target, t - context->dtd->tags); if (old_tag == t) return; /* Correct sequence */ /* Syntax error path only */ } if (SGML_TRACE) TTYPrint(TDEST, "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name); }Tha't all for now. I have also another proposition concerning SGML module (better handling of <P> tags), but in my opinion it will need some discussion, and I don't know if you are interested in this.