Hello Everyone, I am experiencing a very weird issue with my code. In short, I am writing a parser for an indentation sensitive language that also has left-recursive rules. I came across the PackRat parsing paper Paper Link and figured it would be a great strategy to tackle the problem.
To my surprise, the original version I wrote at home on my ubuntu machine with SWIPL 9.1.3 (compiled locally) manages to parse the file as expected. However, when I run it on my Mac work machine with SWIPL 9.2.2 from brew, I get an unexpected failure. To further complicate matters, when I run the code on a docker container (running on the mac) with the official SWI image w/ version 9.2.2 I also get the (Correct) expected behavior.
The code in question (The complete repository is available Here):
N.B. To simplify analysis I have included two predicates test_tokens/1 and test_parse/1 that avoid having to run the lexer to examine the issue and allows for testing with this file as a standalone file. You can run test_parse/1 to observe the described behavior.
:- module(uvl, [parse/3]).
:- set_prolog_flag(double_quotes, string).
:- set_prolog_flag(encoding, utf8).
:- encoding(utf8).
:- use_module(library(dcg/basics)).
:- use_module(library(dcg/high_order)).
:- use_module(lexer).
%:- use_module(library(tabling)).
:- use_module(library(clpfd)).
%%Main entrypoint for the parser for UVL files
%%Is conformant to the grammar as defined in the uvl-parser
%%repository
parse(File, AST, Stop) :-
lex_uvl(Tokens, File), !,
phrase(uvl(AST), Tokens, Stop).
test_tokens(
[
features,indent,id_strict("Sandwich"),indent,mandatory,indent,id_strict("Bread"),dedent,
optional,indent,id_strict("Sauce"),indent,alternative,indent,id_strict("Ketchup"),
id_strict("Mustard"),dedent,dedent,id_strict("Cheese"),dedent,dedent,dedent,constraints,
indent,id_strict("Ketchup"),impl,id_strict("Cheese"),dedent
]
).
test_parse(AST) :-
test_tokens(Ts),
phrase(uvl(AST), Ts).
uvl(ast(H,F,C)) -->
header(H),
optional(features(F),{F = nil}),
optional(constraints(C),{C = nil}).
header(header(namespace(N),includes(In),imports(Im))) -->
optional(namespace(N),{N = []}),
optional(includes(In),{In = []}),
optional(imports(Im),{Im = []}).
namespace(N) --> [namespace], reference(N), !.
reference(N) --> ([id_strict(N)], !) | ([id_not_strict(N)], !).
fully_qualified_reference(FQN) --> sequence(reference, [dot], FQN), {length(FQN, Len), Len #> 0}.
includes(Is) --> [include, indent], includes_(Is), [dedent].
includes_([I|Is]) --> language_level(I), !, includes_(Is).
includes_([]) --> [].
language_level(lang_level(Major,Minor)) -->
major_level(Major), optional(minor_level(Minor), {Minor = []}).
major_level(Major) -->
([boolean_t], {Major = boolean_level}, ! ) |
([arithmetic_t], {Major = arithmetic_level}, ! ) |
([type_t], {Major = type_level}, ! ).
minor_level(Minor) -->
[dot], !, (
([group_card], {Minor = group_card}, !) |
([feature_card], {Minor = feature_card}, !) |
([agg_fun], {Minor = agg_fun}, !) |
([string_constraints], {Minor = string_constraints}, !) |
([mul], {Minor = minor_all}, !)
).
imports(Is) --> [imports, indent], imports_(Is), [dedent].
imports_([import(NS,Alias)|Is]) -->
reference(NS),
optional(([as], reference(Alias)), {Alias = []}), !,
imports_(Is).
imports_([]) --> [].
features(Fs) --> [features, indent], features_(Fs), [dedent].
features_(
[feature(
name(N), type(FeatureType), cardinality(Card),
attributes(Attrs), group(Gs)
)|Fs]
) -->
optional(feature_type(FeatureType), {FeatureType = boolean}),
fully_qualified_reference(N),
optional(feature_cardinality(Card), {Card = nil}),
optional(attributes(Attrs), {Attrs = nil}),
optional(([indent], groups(Gs), [dedent]), {Gs = nil}), !,
features_(Fs).
features_([]) --> [].
feature_type(boolean) --> [boolean_t], !.
feature_type(integer) --> [integer_t], !.
feature_type(string) --> [string_t], !.
feature_type(real) --> [real_t], !.
feature_cardinality(card(From,To)) -->
[cardinality, lbracket, integer(From)],
optional(([range], ([integer(To)] | ([mul], {To = inf}))), {To = nil}),
[rbracket].
cardinality(card(From,To)) -->
[lbracket, integer(From)],
optional(([range], ([integer(To)] | ([mul], {To = inf}))), {To = nil}),
[rbracket].
attributes(Attrs) --> [lbrace], attributes_(Attrs), [rbrace].
attributes_([A|As]) -->
(value_attr(A) | constraint_attr(A)), [comma], !, attributes_(As).
attributes_([A]) --> (value_attr(A) | constraint_attr(A)), !.
attributes_([]) --> [], !.
value_attr(attr(key(K),val(V))) -->
id(K), optional(value(V), {V = nil}).
value(bool(B)) --> [boolean(B)], !.
value(integer(I)) --> [integer(I)], !.
value(float(F)) --> [float(F)], !.
value(string(S)) --> [normal_string(S)], !.
value(attributes(Attrs)) --> attributes(Attrs), !.
value(vector(V)) --> [lbracket], vector(V), [rbracket], !.
vector([V|Vs]) --> value(V), [comma], !, vector(Vs).
vector([V]) --> value(V), !.
vector([]) --> [], !.
groups([G|Gs]) --> group(G), !, groups(Gs).
groups([]) --> [].
group(or_group(S)) --> [or_group], group_spec(S), !.
group(alternative(S)) --> [alternative], group_spec(S), !.
group(optional(S)) --> [optional], group_spec(S), !.
group(mandatory(S)) --> [mandatory], group_spec(S), !.
group(cardinality(Card,S)) --> cardinality(Card), group_spec(S), !.
group_spec(Fs) --> [indent], features_(Fs), {length(Fs,L), L > 0}, [dedent].
constraint_attr(C) --> [constraint], constraint(C), !.
constraint_attr(Cs) --> [constraints], constraint_list(Cs), !.
:- table constraint/3.
constraint(equation(E)) --> equation(E).
constraint(paren_constraint(C)) --> [lparen], constraint(C), [rparen].
constraint(not_constraint(C)) --> [not], constraint(C).
constraint(and_constraint(C1,C2)) --> constraint(C1), [and], constraint(C2).
constraint(or_constraint(C1,C2)) --> constraint(C1), [or], constraint(C2).
constraint(impl_constraint(C1,C2)) --> constraint(C1), [impl], constraint(C2).
constraint(eq_constraint(C1,C2)) --> constraint(C1), [equivalence], constraint(C2).
constraint(literal_constraint(C)) --> fully_qualified_reference(C).
constraint_list(Cs) --> [lbracket], constraint_list_(Cs), [rbracket].
constraint_list_([C|Cs]) --> constraint(C), [comma], !, constraint_list_(Cs).
constraint_list_([C]) --> constraint(C), !.
constraint_list_([]) --> [], !.
constraints(Cs) --> [constraints, indent], constraints_(Cs), [dedent].
constraints_([C|Cs]) --> constraint(C), !, constraints_(Cs).
constraints_([]) --> [].
:- table equation/3.
equation(equal(E1,E2)) --> expression(E1), [equal], expression(E2).
equation(lt(E1,E2)) --> expression(E1), [lt], expression(E2).
equation(gt(E1,E2)) --> expression(E1), [gt], expression(E2).
equation(lte(E1,E2)) --> expression(E1), [lte], expression(E2).
equation(gte(E1,E2)) --> expression(E1), [gte], expression(E2).
equation(neq(E1,E2)) --> expression(E1), [neq], expression(E2).
:- table expression/3.
expression(E) --> ([float(E)] | [integer(E)] | [string(E)]).
expression(E) --> aggregate_function(E).
expression(E) --> fully_qualified_reference(E).
expression(sub_expression(E)) --> [lparen], expression(E), [rparen].
expression(add(E1,E2)) --> expression(E1), [add], expression(E2).
expression(sub(E1,E2)) --> expression(E1), [sub], expression(E2).
expression(mul(E1,E2)) --> expression(E1), [mul], expression(E2).
expression(div(E1,E2)) --> expression(E1), [div], expression(E2).
aggregate_function(sum(ref(R),ref_op(ROP))) -->
[sum, lparen], optional((fully_qualified_reference(ROP),[comma]), {ROP = nil}),
fully_qualified_reference(R), [rparen], !.
aggregate_function(avg(ref(R),ref_op(ROP))) -->
[avg, lparen], optional((fully_qualified_reference(ROP),[comma]), {ROP = nil}),
fully_qualified_reference(R), [rparen], !.
aggregate_function(len(ref(R))) -->
[len, lparen], fully_qualified_reference(R), [rparen], !.
aggregate_function(floor(ref(R))) -->
[floor, lparen], fully_qualified_reference(R), [rparen], !.
aggregate_function(ceil(ref(R))) -->
[ceil, lparen], fully_qualified_reference(R), [rparen], !.
id(ID) --> [id_strict(ID)] | [id_not_strict(ID)].
Checking the output of the parse on the given example, I observe that the linux output is the following (as expected):
A = ast(header(namespace([]),includes([]),imports([])),
[feature(
name(["Sandwich"]),
type(boolean),
cardinality(nil),
attributes(nil),
group([
mandatory(
[ feature(name(["Bread"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
]),
optional(
[ feature(name(["Sauce"]),
type(boolean),
cardinality(nil),
attributes(nil),
group([
alternative(
[ feature(name(["Ketchup"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil)),
feature(name(["Mustard"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
])
])),
feature(name(["Cheese"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
])
]))
],
[impl_constraint(literal_constraint(["Ketchup"]),literal_constraint(["Cheese"]))])
Meanwhile, when I examine the output with phrase/3 to see the rest, I obtain the following partial parse on the mac side:
A = ast(header(namespace([]),includes([]),imports([])),
[ feature(name(["Sandwich"]),
type(boolean),
cardinality(nil),
attributes(nil),
group([ mandatory(
[ feature(name(["Bread"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
]),
optional(
[ feature(name(["Sauce"]),
type(boolean),
cardinality(nil),
attributes(nil),
group([ alternative(
[ feature(name(["Ketchup"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil)),
feature(name(["Mustard"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
])
])),
feature(name(["Cheese"]),
type(boolean),
cardinality(nil),
attributes(nil),
group(nil))
])
]))
],
nil)
Rest = [constraints,indent,id_strict("Ketchup"),impl,id_strict("Cheese"),dedent]
Steps I’ve taken to try to solve the issue
I observed with the debugger and with phrase/3, that the issue seems to stem from the constraint//1 rule above. To examine this further, I isolated the code into the following and created an even smaller test case available in the example/1 predicate.
:- module(packrat, [constraints/3]).
:- use_module(library(dcg/basics)).
:- use_module(library(dcg/high_order)).
:- use_module(library(tabling)).
:- table constraint/3.
constraint(equation(E)) --> equation(E).
constraint(paren_constraint(C)) --> [lparen], constraint(C), [rparen].
constraint(not_constraint(C)) --> [not], constraint(C).
constraint(and_constraint(C1,C2)) --> constraint(C1), [and], constraint(C2).
constraint(or_constraint(C1,C2)) --> constraint(C1), [or], constraint(C2).
constraint(impl_constraint(C1,C2)) --> constraint(C1), [impl], constraint(C2).
constraint(eq_constraint(C1,C2)) --> constraint(C1), [equivalence], constraint(C2).
constraint(literal_constraint(C)) --> fully_qualified_reference(C).
constraint_list(Cs) --> [lbracket], constraint_list_(Cs), [rbracket].
constraint_list_([C|Cs]) --> constraint(C), [comma], !, constraint_list_(Cs).
constraint_list_([C]) --> constraint(C), !.
constraint_list_([]) --> [], !.
constraints(Cs) --> [constraints, indent], constraints_(Cs), [dedent].
constraints_([C|Cs]) --> constraint(C), !, constraints_(Cs).
constraints_([]) --> [].
:- table equation/3.
equation(equal(E1,E2)) --> expression(E1), [equal], expression(E2).
equation(lt(E1,E2)) --> expression(E1), [lt], expression(E2).
equation(gt(E1,E2)) --> expression(E1), [gt], expression(E2).
equation(lte(E1,E2)) --> expression(E1), [lte], expression(E2).
equation(gte(E1,E2)) --> expression(E1), [gte], expression(E2).
equation(neq(E1,E2)) --> expression(E1), [neq], expression(E2).
:- table expression/3.
expression(E) --> ([float(E)] | [integer(E)] | [string(E)]).
expression(E) --> aggregate_function(E).
expression(E) --> fully_qualified_reference(E).
expression(sub_expression(E)) --> [lparen], expression(E), [rparen].
expression(add(E1,E2)) --> expression(E1), [add], expression(E2).
expression(sub(E1,E2)) --> expression(E1), [sub], expression(E2).
expression(mul(E1,E2)) --> expression(E1), [mul], expression(E2).
expression(div(E1,E2)) --> expression(E1), [div], expression(E2).
aggregate_function(sum(ref(R),ref_op(ROP))) -->
[sum, lparen], optional((fully_qualified_reference(ROP),[comma]), {ROP = nil}),
fully_qualified_reference(R), [rparen], !.
aggregate_function(avg(ref(R),ref_op(ROP))) -->
[avg, lparen], optional((fully_qualified_reference(ROP),[comma]), {ROP = nil}),
fully_qualified_reference(R), [rparen], !.
aggregate_function(len(ref(R))) -->
[len, lparen], fully_qualified_reference(R), [rparen], !.
aggregate_function(floor(ref(R))) -->
[floor, lparen], fully_qualified_reference(R), [rparen], !.
aggregate_function(ceil(ref(R))) -->
[ceil, lparen], fully_qualified_reference(R), [rparen], !.
id(ID) --> [id_strict(ID)] | [id_not_strict(ID)].
fully_qualified_reference(R) --> id(R).
example([constraints, indent, id_strict(abc), impl, id_strict(bcd), dedent]).
and ran the following query:
?- example(L), phrase(constraints(C),L).
L = [constraints, indent, id_strict(abc), impl, id_strict(bcd), dedent],
C = [impl_constraint(literal_constraint(abc), literal_constraint(bcd))].
And, to my absolute surprise, this does indeed work BOTH on linux and mac, which makes this issue even more surprising.
I am at a complete loss and haven’t the slightest idea why this may be happening and would appreciate any and all help.
Thank you all for your time.
Sincerely,
K