Is there a way to find out the connection between actions and SF? Using the example of the DBN of the grounded fluent x4__y6
, the next-state depends on whether robot-at(x4,y5) ^ move-north or robot-at(x3,y6) ^ move-east or the robot has already arrived at the goal. As graphical representation I mean sth. like:
robot-at(x4,y5) + move-north --> robot-at(x4,y6)
robot-at(x3,y6) + move-east --> robot-at(x4,y6)
robot-at(x4,y6) --> robot-at(x4,y6)
////////////////////////////////////////////////////////////////////
//
// Navigation MDP
//
// Author: Scott Sanner (ssanner [at] gmail.com)
//
// In a grid, a robot (R) must get to a goal (G). Every cell offers
// the robot a (different) chance of disappearing. The robot needs
// to choose a path which gets it to the goal most reliably within
// the finite horizon time.
//
// ***********************
// * 0 0 0 0 R *
// * .1 .3 .5 .7 .9 *
// * .1 .3 .5 .7 .9 *
// * .1 .3 .5 .7 .9 *
// * .1 .3 .5 .7 .9 *
// * 0 0 0 0 G *
// ***********************
//
// This is a good domain to test deteminized planners because
// one can see here that the path using the .3 chance of failure
// leads to a 1-step most likely outcome of survival, but
// a poor 4-step change of survival (.7^(.4)) whereas the path
// using a .1 chance of failure is much more safe.
//
// The domain generators for navigation have a flag to produce slightly
// obfuscated files to discourage hand-coded policies, but
// rddl.viz.NavigationDisplay can properly display these grids, e.g.,
//
// ./run rddl.sim.Simulator files/final_comp/rddl rddl.policy.RandomBoolPolicy
// navigation_inst_mdp__1 rddl.viz.NavigationDisplay
//
// (Movement was not made stochastic due to the lack of intermediate
// variables and synchronic arcs to support both the PPDDL and SPUDD
// translations.)
//
////////////////////////////////////////////////////////////////////
domain navigation_mdp {
requirements = {
// constrained-state,
reward-deterministic
};
types {
xpos : object;
ypos : object;
};
pvariables {
NORTH(ypos, ypos) : {non-fluent, bool, default = false};
SOUTH(ypos, ypos) : {non-fluent, bool, default = false};
EAST(xpos, xpos) : {non-fluent, bool, default = false};
WEST(xpos, xpos) : {non-fluent, bool, default = false};
MIN-XPOS(xpos) : {non-fluent, bool, default = false};
MAX-XPOS(xpos) : {non-fluent, bool, default = false};
MIN-YPOS(ypos) : {non-fluent, bool, default = false};
MAX-YPOS(ypos) : {non-fluent, bool, default = false};
P(xpos, ypos) : {non-fluent, real, default = 0.0};
GOAL(xpos,ypos) : {non-fluent, bool, default = false};
// Fluents
robot-at(xpos, ypos) : {state-fluent, bool, default = false};
// Actions
move-north : {action-fluent, bool, default = false};
move-south : {action-fluent, bool, default = false};
move-east : {action-fluent, bool, default = false};
move-west : {action-fluent, bool, default = false};
};
cpfs {
robot-at'(?x,?y) =
if ( GOAL(?x,?y) ^ robot-at(?x,?y) )
then
KronDelta(true)
else if (( exists_{?x2 : xpos, ?y2 : ypos} [ GOAL(?x2,?y2) ^ robot-at(?x2,?y2) ] )
| ( move-north ^ exists_{?y2 : ypos} [ NORTH(?y,?y2) ^ robot-at(?x,?y) ] )
| ( move-south ^ exists_{?y2 : ypos} [ SOUTH(?y,?y2) ^ robot-at(?x,?y) ] )
| ( move-east ^ exists_{?x2 : xpos} [ EAST(?x,?x2) ^ robot-at(?x,?y) ] )
| ( move-west ^ exists_{?x2 : xpos} [ WEST(?x,?x2) ^ robot-at(?x,?y) ] ))
then
KronDelta(false)
else if (( move-north ^ exists_{?y2 : ypos} [ NORTH(?y2,?y) ^ robot-at(?x,?y2) ] )
| ( move-south ^ exists_{?y2 : ypos} [ SOUTH(?y2,?y) ^ robot-at(?x,?y2) ] )
| ( move-east ^ exists_{?x2 : xpos} [ EAST(?x2,?x) ^ robot-at(?x2,?y) ] )
| ( move-west ^ exists_{?x2 : xpos} [ WEST(?x2,?x) ^ robot-at(?x2,?y) ] ))
then
Bernoulli( 1.0 - P(?x, ?y) )
else
KronDelta( robot-at(?x,?y) );
};
// 0 reward for reaching goal, -1 in all other cases
reward = [sum_{?x : xpos, ?y : ypos} -(GOAL(?x,?y) ^ ~robot-at(?x,?y))];
}
non-fluents nf_navigation_inst_mdp__0 {
domain = navigation_mdp;
objects {
xpos : {x1,x2,x3,x4};
ypos : {y1,y2,y3,y4,y5,y6};
};
non-fluents {
NORTH(y1,y2);
SOUTH(y2,y1);
NORTH(y2,y3);
SOUTH(y3,y2);
NORTH(y3,y4);
SOUTH(y4,y3);
NORTH(y4,y5);
SOUTH(y5,y4);
NORTH(y5,y6);
SOUTH(y6,y5);
EAST(x1,x2);
WEST(x2,x1);
EAST(x2,x3);
WEST(x3,x2);
EAST(x3,x4);
WEST(x4,x3);
MIN-XPOS(x1);
MAX-XPOS(x4);
MIN-YPOS(y1);
MAX-YPOS(y6);
GOAL(x4,y6);
P(x1,y2) = 0.05619587033111525;
P(x1,y3) = 0.04836146267135677;
P(x1,y4) = 0.04224611192322229;
P(x1,y5) = 0.027581761688785046;
P(x2,y2) = 0.3290676024238835;
P(x2,y3) = 0.31491139267692797;
P(x2,y4) = 0.3367653188138302;
P(x2,y5) = 0.33334257087090663;
P(x3,y2) = 0.6136325686764752;
P(x3,y3) = 0.6107738967872153;
P(x3,y4) = 0.6151765929039653;
P(x3,y5) = 0.6120255439526258;
P(x4,y2) = 0.9491586061795277;
P(x4,y3) = 0.9140435651595797;
P(x4,y4) = 0.9300722126485798;
P(x4,y5) = 0.920351061739235;
};
}
instance navigation_inst_mdp__0 {
domain = navigation_mdp;
non-fluents = nf_navigation_inst_mdp__0;
init-state {
robot-at(x4,y1);
};
max-nondef-actions = 1;
horizon = 40;
discount = 1.0;
}