Solving the 8-Puzzle with group theory

@kuniaki.mukai wrote:

Here is an old codes of mine for solving n-puzzle, which is still
in the directory util of pack library pac. The codes produces long
redundant series of basic moves for even parity permutations,
and I have still no idea on how to find minimum (minimal ?) solutions.

I have checked that the codes, which
was written 30 years ago, still works for cgi.

:- module(npuzzle,[]).
% N-puzzle program    K. Mukai SFC Keio
% 1995.6.26 20:22  
% 
%   unsolvable case   
% | ?- npuzzle([[15,14,13,12],[11,10,9,8],[7,6,5,4],[3,2,1,0]],X).
%
%   solvable case
% | ?- npuzzle([[14,15,13,12],[11,10,9,8],[7,6,5,4],[3,2,1,0]],X).
% | ?- npuzzle([[24,23,22,21,20],[19,18,17,16,15],[14,13,12,11,10],
%      [9,8,7,6,5],[4,3,2,1,0]],P).
% :- use_module(library(lists)).

non_member(X,Y):- member(X,Y),!,fail.
non_member(_,_).

% for generating benchmark tests
makepuzzle(N,X):-J is N*N-1, matrix(N,N,X),setnpuzzle(J,X).

matrix(1,N,[X]):-!,length(X,N).
matrix(K,N,[T|R]):- J is K-1, length(T,N), matrix(J,N,R).

setnpuzzle(_,[]):-!.
setnpuzzle(X,[[]|Y]):-!,setnpuzzle(X,Y).
setnpuzzle(X,[[X|Y]|Z]):-!,U is X-1, setnpuzzle(U,[Y|Z]).

test(N,X,L):- 
	makepuzzle(N,A),
        npuzzle(A,P),
	flatten(P,Pf),
	length(Pf,L),
        reverse2(A,[],Ar),
        opposite(P,Po),
        apply(Po,Ar,B),
        reverse2(B,[],X).

% | ?- test(5,X,_).
%
% X = [[ 1, 2, 3, 4, 5],
%      [ 6, 7, 8, 9,10],
%      [11,12,13,14,15],
%      [16,17,18,19,20],
%      [21,22,23,24, 0]] 
%

% main predicate

npuzzle_web(X, 'Result' = Y) :- npuzzle(X,Y).

npuzzle(X,Y):-
	reverse2(X,[],Xr),     % reverse rows and columns
	makelinear(Xr,[_|Z]),  % see as a list of integers
	bubblesort(Z,_,G),     % lists of switches (i,j)
	length(G,N),
	0 =:= N mod 2,         % even permutation means being solvable.
	!,
	switch2cycle(G,L),     % even permutaions to 3-cyclic permutaions.
	cycle2path(L,Xr,_,Y1),
	opposite(Y1,Y2),
	flatten(Y2,Y).			% left-right, up-down  reverse
npuzzle(_,'unsolvable: odd permutation').

cycle2path([],X,X,[]).
cycle2path([X|Y],Z,U,[P,Q]):-
	cycle(X,Z,Z1,P),
	cycle2path(Y,Z1,U,Q).

makelinear([],[]).
makelinear([X|Y],Z):-
	makelinear(Y,Z1),
	append(X,Z1,Z).

bubblesort([],[],[]).
bubblesort([X|Y],Z,U):-
	bubblesort(Y,V,W),
	insert(X,V,Z,P),
	append(W,P,U).

insert(X,[],[X],[]):-!.
insert(X,[Y|Z],[X,Y|Z],[]):-X>Y,!.
insert(X,[Y|Z],[Y|U],[[Y,X]|V]):-insert(X,Z,U,V).

% product of two 2-cyclic permutations is a product of 
% 3-cyclic permutations.
makecyclic([X,Y],[Z,U],[]):-member(X,[Z,U]),member(Y,[Z,U]),!.
makecyclic([X,Y],[X,U],[[X,Y,U]]):-!.
makecyclic([X,Y],[U,X],[[X,Y,U]]):-!.
makecyclic([Y,X],[U,X],[[X,Y,U]]):-!.
makecyclic([Y,X],[X,U],[[X,Y,U]]):-!.
makecyclic([X,U],[Y,Z],[[X,Y,Z],[X,Y,U]]).

switch2cycle([],[]).
switch2cycle([X,Y|Z],U):-
	makecyclic(X,Y,C),
	switch2cycle(Z,U1),
	append(C,U1,U).

cycle(A,[X,Y|Z],[X1,Y1|Z],P):-
	contain(A,[X,Y]),!,
        cycle2n(A,[X,Y],[X1,Y1],P).
cycle(A,X,Y,P):-
	X=[[_,_]|_],!,  % two columns matrix
	zip(X,X1), % for reducing to two rows matrix.
	cycle(A,X1,Y1,P1),
	zip(Y1,Y),
	zip_path(P1,P).
cycle(A,[[0|X],Y|Z],[R1,R2|Z2],[P,d,Q,u,Pi]):-
	cleartop(A,[[0|X],Y],[[0|X1],[U|Y1]],P),
	inverse(P,Pi),
	cycle(A,[[0|Y1]|Z],[[0|Y2]|Z2],Q),
	apply([u,Pi],[[U|X1],[0|Y2]],[R1,R2]).

cleartop(A,[[0,X|R],[Z,U|S]|W],[R1,R2|W],[P,Q]):-
	cleartop1(A,[[0,X],[Z,U]],[[0,X1],[Z1,U1]],P),
	cleartop2(A,[[0,X1|R],[Z1,U1|S]],[R1,R2],Q).

cleartop1(A,[[0,X],[Z,U]],[[0,X],[Z,U]],[]):-non_member(Z,A),!.
cleartop1(A,[[0,X],[Z,U]],V,P):-non_member(X,A),!,
	triple2path([Z,X,U],[[0,X],[Z,U]],V,P).
cleartop1(_,[[0,X],[Z,U]],V,P):-
	triple2path([Z,U,X],[[0,X],[Z,U]],V,P).

cleartop2(A,[[0,X|R],[Y,Z|S]],D,[P,Q]):-
	member(U,A),
	member(U,[X|R]),!,
	cleartop3(U,A,[[0,X|R],[Y,Z|S]],T,P),
	cleartop2(A,T,D,Q).
cleartop2(_,X,X,[]).

cleartop3(N,A,[X,[Y,Z,U|S]],T,P):-member(Z,A),!,
	cycle2n([Z,N,U],[X,[Y,Z,U|S]],T,P).
cleartop3(N,_,[X,[Y,Z,U|S]],T,P):-
	cycle2n([N,Z,U],[X,[Y,Z,U|S]],T,P).

% | ?- cycle2n([1,2,3],[[0,1,4],[2,5,3]],X,P).
% P = [r,[[l,d,r,u],[[r,d,l,u],[[l,d,r,u],[]]]],[r,d,l,u],
%         [[[[],[d,l,u,r]],[d,r,u,l]],[d,l,u,r]],l]
% X = [[0,2,4],[3,5,1]] ? 
cycle2n([A,B,C],[[0,X|R],[Y,Z|S]],[[0,X1|R],[Y1,Z1|S]],Q):-
	member(X,[A,B,C]),
	member(Y,[A,B,C]),
	member(Z,[A,B,C]),!,
	triple2path([A,B,C],[[0,X],[Y,Z]],[[0,X1],[Y1,Z1]],Q).
cycle2n(P,[[0,X,U|R],[Y,Z,V|S]],
       [[0,X3,U3|R2],[Y3,Z3,V3|S2]],
	 [r,H,G,Hi,l]):-
 clearleft(P,[[X,0,U],[Y,Z,V]],[[X1,0,U1],[Y1,Z1,V1]],H),
 inverse(H,Hi),
 cycle2n(P,[[0,U1|R],[Z1,V1|S]],[[0,U2|R2],[Z2,V2|S2]],G),
 apply(Hi,[[X1,0,U2],[Y1,Z2,V2]],[[X3,0,U3],[Y3,Z3,V3]]).

equalcycperm([A,B,C],[A,B,C]).
equalcycperm([B,C,A],[A,B,C]).
equalcycperm([C,A,B],[A,B,C]).

move1(r,[[0,X],Y],[[X,0],Y]).  
move1(d,[[X,0],[Y,Z]],[[X,Z],[Y,0]]).
move1(l,[X,[Z,0]],[X,[0,Z]]).
move1(u,[[X,Y],[0,Z]],[[0,Y],[X,Z]]).
move1(r,[Y,[0,X]],[Y,[X,0]]).
move1(u,[[X,Y],[Z,0]],[[X,0],[Z,Y]]).
move1(l,[[X,0],Y],[[0,X],Y]).
move1(d,[[0,X],[Y,Z]],[[Y,X],[0,Z]]).

turn(Z,[[A,B|X1],[C,D|X2]|X],[[E,F|X1],[G,H|X2]|X]):-
	turn1(Z,[[A,B],[C,D]],[[E,F],[G,H]]).

turn1([d,r,u,l],[[0,A],[B,C]],[[0,B],[C,A]]).
turn1([r,d,l,u],[[0,A],[B,C]],[[0,C],[A,B]]).
turn1([l,d,r,u],[[A,0],[B,C]],[[B,0],[C,A]]).
turn1([d,l,u,r],[[A,0],[B,C]],[[C,0],[A,B]]).
turn1([l,u,r,d],[[A,B],[C,0]],[[B,C],[A,0]]).
turn1([u,l,d,r],[[A,B],[C,0]],[[C,A],[B,0]]).
turn1([r,u,l,d],[[A,B],[0,C]],[[C,A],[0,B]]).
turn1([u,r,d,l],[[A,B],[0,C]],[[B,C],[0,A]]).
         
% | ?- apply([r,d,r,u],[[0,1,2],[3,4,5]],X).
% X = [[1,4,0],[3,5,2]] ? 

apply(X,Y,Z):-apply(X,[],[],Y,M,N,U),restore(M,N,U,Z).

apply([],M,N,X,M,N,X).
apply([A,B,C,D|W],M,N,X,M1,N1,X1):-
	turn([A,B,C,D],X,X2),!,
	apply(W,M,N,X2,M1,N1,X1).
apply([A|W],M,N,X,M1,N1,X1):-!,
	apply(A,M,N,X,M2,N2,X2),
	apply(W,M2,N2,X2,M1,N1,X1).
apply(A,M,N,[[X,Y|R],[U,V|S]|T],M,N,[[X1,Y1|R],[U1,V1|S]|T]):-
	move1(A,[[X,Y],[U,V]],[[X1,Y1],[U1,V1]]),!.
apply(u,M,[R|Rs],X,M1,N1,X1):-apply(u,M,Rs,[R|X],M1,N1,X1).
apply(d,M,N,[X|Y],M1,N1,X1):-apply(d,M,[X|N],Y,M1,N1,X1).
apply(l,[C|M],N,X,M1,N1,X1):-!,
	revmulticons(C,N,N2,C2),
	multicons(C2,X,X3),
	apply(l,M,N2,X3,M1,N1,X1).
apply(r,M,N,X,M1,N1,X1):-
	firstcolumn(N,Cn,N2),
	firstcolumn(X,Cx,X2),
	reverse(Cn,Cx,C),
	apply(r,[C|M],N2,X2,M1,N1,X1).

triple2path(T,[[0,X],[Z,Y]],[[0,Y],[X,Z]],[r,d,l,u]):-
	equalcycperm(T,[X,Y,Z]),!.
triple2path(T,[[Z,0],[Y,X]],[[X,0],[Z,Y]],[d,l,u,r]):-
	equalcycperm(T,[X,Y,Z]),!.
triple2path(T,[[Y,Z],[X,0]],[[Z,X],[Y,0]],[l,u,r,d]):-
	equalcycperm(T,[X,Y,Z]),!.
triple2path(T,[[X,Y],[0,Z]],[[Y,Z],[0,X]],[u,r,d,l]):-
	equalcycperm(T,[X,Y,Z]),!.
triple2path(_,[[0,X],[Z,Y]],[[0,Z],[Y,X]],[d,r,u,l]):-!.
triple2path(_,[[Z,0],[Y,X]],[[Y,0],[X,Z]],[l,d,r,u]):-!.
triple2path(_,[[Y,Z],[X,0]],[[X,Y],[Z,0]],[u,l,d,r]):-!.
triple2path(_,[[X,Y],[0,Z]],[[Z,X],[0,Y]],[r,u,l,d]).

% | ?- clearleft([1,2,3],[[1,0,4],[2,5,3]],R,P).
% P = [[l,d,r,u],[[r,d,l,u],[[l,d,r,u],[]]]],
% R = [[5,0,3],[4,2,1]] ? 
clearleft(C,[[X,0,Y],[Z,U,V]],[[X,0,Y],[Z,U,V]],[]):-
	non_member(X,C),
	non_member(Z,C),!.
clearleft(C,[[X,0,Y],[Z,U,V]],R,[P,Q]):-
	member(U,C),!,
	out(A,[Y,V],C),
	remove(A,[Y,V],[B]),
	triple2path([U,A,B],[[0,Y],[U,V]],[M,N],P),
	clearleft(C,[[X|M],[Z|N]],R,Q).
clearleft(C,[[X,0,Y],[Z,U,V]],R,[P,Q]):-
	member(X,C),!,
	triple2path([U,X,Z],[[X,0],[Z,U]],[[X1,0],[Z1,U1]],P),
	clearleft(C,[[X1,0,Y],[Z1,U1,V]],R,Q).
clearleft(C,[[X,0,Y],[Z,U,V]],R,[P,Q]):-
	triple2path([U,Z,X],[[X,0],[Z,U]],[[X1,0],[Z1,U1]],P),
	clearleft(C,[[X1,0,Y],[Z1,U1,V]],R,Q).

%
% member(X,[X|_]).
% member(X,[_|Y]):-member(X,Y).

%flatten(X,Y):-flatten(X,Y,[]).
%flatten([],X,X):-!.
%flatten(X,[X|Z],Z):-atomic(X),!.
%flatten([X|Y],Z,U):-flatten(X,Z,V),flatten(Y,V,U).

remove(_,[],[]).
remove(X,[X|Y],Y).
remove(X,[U|Y],[U|Z]):-X\==U,remove(X,Y,Z).

out(A,X,Y):-member(A,X),non_member(A,Y).

reverse([],X,X).
reverse([X|Y],Z,U):-reverse(Y,[X|Z],U).

reverse2([],X,X).
reverse2([X|Y],Z,U):-reverse(X,[],Xr), reverse2(Y,[Xr|Z],U).

restore(X,Y,Z,U):-reverse(Y,Z,Z1),restorecol(X,Z1,U).

restorecol([],X,X).
restorecol([X|Y],Z,U):-multicons(X,Z,Z1),restorecol(Y,Z1,U).

multicons([],[],[]).
multicons([X|Y],[Z|U],[[X|Z]|V]):-multicons(Y,U,V).

revmulticons(A,[],[],A).
revmulticons(A,[X|Y],[[D|X]|Y1],B):-revmulticons(A,Y,Y1,[D|B]).

contain([],_).
contain([X|Y],Z):-contain1(X,Z),contain(Y,Z).

contain1(X,[Y|_]):-member(X,Y),!.
contain1(X,[_|R]):-contain1(X,R).

firstcolumn([],[],[]).
firstcolumn([[X|X1]|R],[X|Y],[X1|S]):-firstcolumn(R,Y,S).

zip_path([],[]).
zip_path([X|Y],[Xt|Yt]):-
	zip_path(X,Xt),
	zip_path(Y,Yt).
zip_path(X,Y):-zip_path1(X,Y).

zip_path1(d,r).
zip_path1(r,d).
zip_path1(u,l).
zip_path1(l,u).

opposite(u,d).
opposite(d,u).
opposite(l,r).
opposite(r,l).

opposite([],[]).
opposite([X|Y],[Xo|Yo]):-opposite(X,Xo), opposite(Y,Yo).

inverse([],[]).
inverse(r,l).
inverse(l,r).
inverse(d,u).
inverse(u,d).
inverse([X|Y],Z):-inverse([X|Y],[],Z).

inverse([],X,X).
inverse([X|Y],Z,U):-inverse(X,Xi), inverse(Y,[Xi|Z],U).

Does it do better than heuristic search? Does your code
only work for 15-puzzle or can it also solve 8-puzzle?

Ok, I see how to use it. Its worse than heuristic method
with Manhatten distance in the example. But it does have
the advantage that it implements the parity test:

?- npuzzle:npuzzle([[1,2,3],[4,5,6],[8,7,0]], X).
X = 'unsolvable: odd permutation'.

?- npuzzle:npuzzle([[8,7,6],[5,4,3],[2,1,0]], X), length(X, N).
X = [u, l, l, u, r, d, u, r, d|...],
N = 478 .

So it gives a solution with 478 steps, whereas Manhattan
distances and the slago ordering of state transition rules,
gives a solution with 220 steps:

?- X = [-1, 7, 6, 5, 4, 3, 2, 1, 0], 
    time(best(X, [], [], [], L)), length(L, N).
% 446,556 inferences, 0.047 CPU in 0.045 seconds (104% CPU, 9526528 Lips)
X = [-1, 7, 6, 5, 4, 3, 2, 1, 0],
L = [up, left, down, left, up, right, right, down, left|...],
N = 220 .

Source code:

Edit 24.05.2024

If I use slagos iterative deepening solution, its extremly slow,
but I guess it is guaranteed that it shows me a shortest solution,
which is only 30 steps:

?- time(ids).
* 7 6
5 4 3
2 1 0
[...]
* 0 1
2 3 4
5 6 7
moves = 30
% 572,148,838 inferences, 154.094 CPU in 154.335 seconds
(100% CPU, 3712992 Lips)
true.

It seems good work. Unfortunately I have little experience on Manhattan distance, though I know such approach based on metric space are powerful and main stream. Sorry for this general but poor comment. Anyway congratulation !

Whats even more amazing, going from Best First Search to A* Search.
In Best First Search I only use the Manhattan Distance as a selection
criteria, this is basically a lower bound estimated distance:

best(S, L, A, V, R) :-
   findall(W-(T,[M|L]), (move(S,T,M), \+ member(T, V), weight(T,W)), B),

In A* search I use the length of the already traveled path as well.
In the case of the 8 puzzle this length is discrete with even steps.
In real world example the already traveled path might be continous

astar(S, L, A, V, R) :-
   length(L, N),
   findall(W-(T,[M|L]), (move(S,T,M), \+ member(T, V), weight(T,J), 
      W is J+N), B),

and with uneven steps. It now finds the optimum for the slago example,
you can lookup the optimum on stackoverflow here. And for my example
it finds the optimum as well. Not sure whether it finds always the optimum?

/* slago example */
?- start(X), time(astar(X, [], [], [], L)), length(L, N).
% 10,427,657 inferences, 0.703 CPU in 0.709 seconds (99% CPU, 14830446 Lips)
X = [6, 1, 3, 4, -1, 5, 7, 2, 0],
L = [left, left, up, right, right, down, left, left, up|...],
N = 26 .

/* reverse example */
?- X = [-1, 7, 6, 5, 4, 3, 2, 1, 0], time(astar(X, [], [], [], L)), 
   length(L, N).
% 176,585,611 inferences, 13.563 CPU in 13.564 seconds 
(100% CPU, 13020137 Lips)
X = [-1, 7, 6, 5, 4, 3, 2, 1, 0],
L = [up, left, left, down, right, up, right, down, left|...],
N = 30 .

Maybe we have solved the riddle of the missing heuristic for the 8-puzzle
now? The problem was not that the Manhattan distance doesn’t work,
the problem was that we only looked into Best First and not into A*?

It will find an optimum with A* if your heuristic never overestimates. It is easy to prove, remember that you expand the node with lowest “distance so far” + “remaining estimate”.

And yes, you must use something like a priority heap for efficient implementation.

8 squares is too easily brute-forcible. 15 squares is a worthy puzzle, which I took a look at, a couple of years ago, before fizzling out :woozy_face:.

The usual aim is to produce a solution which has a guaranteed minimal length.

Here’s some links with relevant insight:

Nigel Galloway’s C++ code at 15 puzzle solver - Rosetta Code seems to be an amazing algorithm which many people then reimplement in various languages - it is not an easy algorithm to understand (I don’t :grinning:) - an attempted explanation is at 15 puzzle solver - Rosetta Code which links to http://kenogo.org/literate_programming/15_puzzle_solver.pdf

(Nothing of Importance)

There is an earlier alternative suggestion to Takahashi walking
distance from 2002. Its also an alternative to Korf et al.
Pattern Database approach from 2004.

You get an itch more heuristic power by adding on top
of Manhattan distance a goal conflict measure. It was
publish in 1992 by Hansson, Mayer and Yung:

image
image

https://www.cse.sc.edu/~mgv/csce580sp15/gradPres/HanssonMayerYung1992.pdf

The above could be some interesting test cases. But I was
not yet able to code the linear conflict heuristic in Prolog. In
as far not yet sure which heuristic is the more performant?

Now I have to thank @kuniaki.mukai that he published
his code, especially the bubblesort, which he uses to
count inversions. This counting does a good job as a

conflict measure. I now got a new heuristic by replacing the
keysort/2 in my manhattan distance calculation, by a DCG
variant bubblesort/4 which doesn’t use append/3 anymore:

% bubblesort(+Pairs, -Pairs, -List, +List)
bubblesort([],[]) --> [].
bubblesort([X|Y],Z) -->
	bubblesort(Y,V),
	insert(X,V,Z).

% insert(+Pair, -List, +List)
insert(X,[],[X]) --> !.
insert(X-P,[Y-Q|Z],[X-P,Y-Q|Z]) --> {X<Y}, !.
insert(X-P,[Y-Q|Z],[Y-Q|U]) --> kind(X,Y), insert(X-P,Z,U).

% kind(+Integer, +Integer, -List, +List)
kind(P, Q) --> {P div 3 =:= Q div 3}, !, [*].
kind(P, Q) --> {P mod 3 =:= Q mod 3}, !, [*].
kind(_, _) --> [].

Was weighting the inversions count by 1/3 which might have a
group theory justification. It gives some speed up to slagos and the
reverse example. Further it seems to give still optimum solutions:

/* slago example */
?- start(X), time(astar(X, [], [], [], L)), length(L, N).
% 6,431,573 inferences, 0.453 CPU in 0.457 seconds (99% CPU, 14193816 Lips)
X = [6, 1, 3, 4, -1, 5, 7, 2, 0],
L = [left, left, up, right, right, down, left, left, up|...],
N = 26.

/* reverse example */
?- X = [-1, 7, 6, 5, 4, 3, 2, 1, 0], time(astar(X, [], [], [], L)), 
   length(L, N).
% 50,214,313 inferences, 3.500 CPU in 3.504 seconds
(100% CPU, 14346947 Lips)
X = [-1, 7, 6, 5, 4, 3, 2, 1, 0],
L = [up, left, left, down, right, up, right, down, left|...],
N = 30.

Source code:

conflict.p.log (6,1 KB)

It’s good to hear that you have much improved “conflict measure” based on bubble sort which I used, and was at my best.

Some optimization hints, from my memory:

If an “edge” of the playing area is already completely in order, then there is never a good reason to enter/disturb it again. There are 4 edges: top, bottom, left, right. This rule also applies at 2 levels deep, e.g. if the top 2 rows are in order, don’t enter into those top 2 rows.

A move is either “good” (pushing a digit towards its destination position) or “bad” (pushing a digit away from its destination position). So, minimize the number of bad moves, to find the fastest solution. Interesting question: Is there a heuristic to guide on the number of bad moves needed?