Interesting StackOverflow question on replacing text in HTML. Looking for better way to do this

Here’s my approach, using library(sgml) to parse it into something structured. I was first thinking about XPath, but I don’t think that’s easy to use for editing a document (just for selecting).

:- module(html, []).

:- use_module(library(sgml)).
:- use_module(library(sgml_write)).

change_server_in_footer(HtmlIn, HtmlOut) :-
    open_string(HtmlIn, HtmlInStream),
    load_html(stream(HtmlInStream), DOM, []),
    transform(DOM, HtmlOut).

transform([Elt|Elts], [TElt|TElts]) :- !,
    transform(Elt, TElt),
    transform(Elts, TElts).
transform(element(div, Attrs, Body),
          element(div, Attrs, TBody)) :-
    memberchk(class=footer, Attrs), !,
    replace_path(Body, TBody).
transform(element(E, A, B), element(E, A, TB)) :- !,
    transform(B, TB).
transform(Elt, Elt).

replace_path([E|Es], [TE|TEs]) :- !,
    replace_path(E, TE),
    replace_path(Es, TEs).
replace_path(element(a, Attrs, Body), element(a, EditAttrs, Body)) :- !,
    selectchk(href=_OldPath, Attrs, AttrsRest),
    EditAttrs = [href='http://new.path'|AttrsRest].
replace_path(element(E, A, B), element(E, A, RB)) :- !,
    replace_path(B, RB).
replace_path(X, X).

test :-
    HTML = "
<body>\n
   <a href=\"http://hardcoded/something\">This is ok</a>      \n
   <div class=\"footer\">\n
       <a href=\"http://hardcoded/something\">Change this one</a>     \n
     <span class=\"outer\"><span class=\"inner\">
          <a href=\"http://hardcoded/something\">This too</a>
     </span></span>\n
   </div>\n
</body>\n
    ",
    change_server_in_footer(HTML, HtmlOut),
    html_write(user_output, HtmlOut, []).

Edit: handle more deeply nested links in footer

1 Like