Hi,
mostly just wanted to report my observation:
it seems to me that sgml_parse wrongly flags end tags as explicit because it releases a warning for them same ones being omitted at the same time. I can surely workaround with handling the warnings, although not exactly easily. My example code, listed below, performs as attached below, on the file yet below:
use_module(library(sgml)).
:- initialization(main, main).
main([Filename]) :-
open(Filename, read, In),
new_sgml_parser(Parser, []),
set_sgml_parser(Parser, file(Filename)),
set_sgml_parser(Parser, doctype(html)),
sgml_parse(Parser,
[ source(In),
call(error, print_error),
call(end, print_term)
]),
close(In).
print_error(error, 'DTD "netscape-bookmark-file-1" does not exist',
_) :- !, true.
print_error(A, B, _) :- writeln([A, B]).
print_term(A, Parser) :-
( get_sgml_parser(Parser, event_class(Class)) ->
writeln([Class, ' actually: ', A])
;
writeln(['no class: ', A]) ).
output:
[explicit, actually: ,title]
[explicit, actually: ,h1]
[explicit, actually: ,h3]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dl]
[explicit, actually: ,h3]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dl]
[explicit, actually: ,h3]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,a]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dl]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dt]
[warning,Inserted omitted end-tag for "dt"]
[explicit, actually: ,p]
[warning,Inserted omitted end-tag for "p"]
[explicit, actually: ,dl]
[warning,Inserted omitted end-tag for "p"]
[omitted, actually: ,p]
[warning,Inserted omitted end-tag for "meta"]
[explicit, actually: ,meta]
input file:
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<!-- This is an automatically generated file.
It will be read and overwritten.
DO NOT EDIT! -->
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
<DT><H3>Pasek zakładek</H3>
<DL><p>
</DL><p>
<DT><H3>Menu zakładek</H3>
<DL><p>
</DL><p>
<DT><H3>Nieposortowane zakładki</H3>
<DL><p>
<DT><A HREF="http">Stories: Halloween Memo w/o Raymond</A>
<DT><A HREF="https">The decommoditization of protocols</A>
<DT><A HREF="https">The Monospace Web</A>
<DT><A HREF="https">Ask Ubuntu</A>
<DT><A HREF="https">rums</A>
<DT><A HREF="https">Wiki</A>
<DT><A HREF="https">Task::Kensho</A>
<DT><A HREF="https">Stack Overflow</A>
</DL><p>
</DL><p>
(it’s end of file here. Just one of them browsers’ netscape-bookmarks html exports, Falkon browser in this case.)
P.S. Sorry for shadowing print_term/2, it was a draft