def setUp(self): self.batch_size = 10 self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.compiler2 = Compiler(self.rddl2, batch_mode=True) self.cell1 = ActionSimulationCell(self.compiler1) self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size) self.default_action1 = self.compiler1.compile_default_action(batch_size=1) self.cell2 = ActionSimulationCell(self.compiler2) self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size) self.default_action2 = self.compiler2.compile_default_action(batch_size=1)
def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 64 cls.epochs = 50 cls.learning_rate = 0.01 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # initialize ActionOptimizer cls.optimizer = ActionOptimizer(cls.rddl2tf, cls.policy) cls.optimizer.build(cls.learning_rate, cls.batch_size, cls.horizon)
def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 64 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # execute policy for the given horizon and initial state with cls.rddl2tf.graph.as_default(): cls.state = cls.rddl2tf.compile_initial_state(cls.batch_size) cls.actions = [] for t in range(cls.horizon - 1, -1, -1): timestep = tf.constant(t, dtype=tf.float32, shape=(cls.batch_size, 1)) action = cls.policy(cls.state, timestep) cls.actions.append(action)
def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 1 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # sample policy variables to initialize open-loop policy cls.policy_variables = [] for shape in cls.rddl2tf.rddl.action_size: size = [cls.horizon] + list(shape) cls.policy_variables.append( np.random.uniform(low=-1.0, high=1.0, size=size)) # initialize action evaluator cls.evaluator = ActionEvaluator(cls.rddl2tf, cls.policy)
def setUp(self): self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1)
def setUp(self): self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST) self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST) self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST) self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST) self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST) self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1) self.compiler2 = Compiler(self.rddl2) self.compiler3 = Compiler(self.rddl3) self.compiler4 = Compiler(self.rddl4) self.compiler5 = Compiler(self.rddl5) self.compiler6 = Compiler(self.rddl6) self.compiler7 = Compiler(self.rddl7)
class TestCompiler(unittest.TestCase): def setUp(self): self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST) self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST) self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST) self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST) self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST) self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1) self.compiler2 = Compiler(self.rddl2) self.compiler3 = Compiler(self.rddl3) self.compiler4 = Compiler(self.rddl4) self.compiler5 = Compiler(self.rddl5) self.compiler6 = Compiler(self.rddl6) self.compiler7 = Compiler(self.rddl7) def test_compile_state_action_constraints(self): batch_size = 1000 compilers = [self.compiler4, self.compiler5] expected_preconds = [(12, [True] + [False] * 11), (1, [False])] for compiler, expected in zip(compilers, expected_preconds): compiler.batch_mode_on() state = compiler.compile_initial_state(batch_size) action = compiler.compile_default_action(batch_size) constraints = compiler.compile_state_action_constraints( state, action) self.assertIsInstance(constraints, list) self.assertEqual(len(constraints), expected[0]) for c, batch_mode in zip(constraints, expected[1]): self.assertIsInstance(c, TensorFluent) self.assertEqual(c.dtype, tf.bool) if batch_mode: self.assertEqual(c.shape.batch_size, batch_size) else: self.assertEqual(c.shape.batch_size, 1) self.assertEqual(c.shape.batch, batch_mode) self.assertTupleEqual(c.shape.fluent_shape, ()) def test_compile_action_preconditions(self): batch_size = 1000 compilers = [self.compiler1, self.compiler2] expected_preconds = [2, 1] for compiler, expected in zip(compilers, expected_preconds): compiler.batch_mode_on() state = compiler.compile_initial_state(batch_size) action = compiler.compile_default_action(batch_size) preconds = compiler.compile_action_preconditions(state, action) self.assertIsInstance(preconds, list) self.assertEqual(len(preconds), expected) for p in preconds: self.assertIsInstance(p, TensorFluent) self.assertEqual(p.dtype, tf.bool) self.assertEqual(p.shape.batch_size, batch_size) self.assertTrue(p.shape.batch) self.assertTupleEqual(p.shape.fluent_shape, ()) def test_compile_state_invariants(self): batch_size = 1000 compilers = [self.compiler1, self.compiler2] expected_invariants = [2, 0] for compiler, expected in zip(compilers, expected_invariants): compiler.batch_mode_on() state = compiler.compile_initial_state(batch_size) invariants = compiler.compile_state_invariants(state) self.assertIsInstance(invariants, list) self.assertEqual(len(invariants), expected) for p in invariants: self.assertIsInstance(p, TensorFluent) self.assertEqual(p.dtype, tf.bool) self.assertTupleEqual(p.shape.fluent_shape, ()) def test_compile_action_preconditions_checking(self): batch_size = 1000 compilers = [self.compiler1, self.compiler2] for compiler in compilers: compiler.batch_mode_on() state = compiler.compile_initial_state(batch_size) action = compiler.compile_default_action(batch_size) checking = compiler.compile_action_preconditions_checking( state, action) self.assertIsInstance(checking, tf.Tensor) self.assertEqual(checking.dtype, tf.bool) self.assertListEqual(checking.shape.as_list(), [batch_size]) def test_compile_action_lower_bound_constraints(self): batch_size = 1000 compilers = [self.compiler1, self.compiler3] expected = [[('outflow/1', [], tf.int32)], [('AIR/1', [], tf.int32)]] for compiler, expected_bounds in zip(compilers, expected): compiler.batch_mode_on() initial_state = compiler.compile_initial_state(batch_size) default_action_fluents = compiler.compile_default_action( batch_size) bounds = compiler.compile_action_bound_constraints(initial_state) self.assertIsInstance(bounds, dict) self.assertEqual(len(bounds), len(expected_bounds)) for fluent_name, shape, dtype in expected_bounds: self.assertIn(fluent_name, bounds) self.assertIsInstance(bounds[fluent_name], tuple) self.assertEqual(len(bounds[fluent_name]), 2) lower, _ = bounds[fluent_name] self.assertIsInstance(lower, TensorFluent) self.assertListEqual(lower.shape.as_list(), shape) self.assertEqual(lower.dtype, dtype) def test_compile_action_lower_bound_constraints(self): batch_size = 1000 compilers = [self.compiler1, self.compiler3] expected = [[('outflow/1', [], tf.int32)], [('AIR/1', [], tf.int32)]] for compiler, expected_bounds in zip(compilers, expected): compiler.batch_mode_on() initial_state = compiler.compile_initial_state(batch_size) default_action_fluents = compiler.compile_default_action( batch_size) bounds = compiler.compile_action_bound_constraints(initial_state) self.assertIsInstance(bounds, dict) self.assertEqual(len(bounds), len(expected_bounds)) for fluent_name, shape, dtype in expected_bounds: self.assertIn(fluent_name, bounds) self.assertIsInstance(bounds[fluent_name], tuple) self.assertEqual(len(bounds[fluent_name]), 2) lower, _ = bounds[fluent_name] self.assertIsInstance(lower, TensorFluent) self.assertListEqual(lower.shape.as_list(), shape) self.assertEqual(lower.dtype, dtype) def test_compile_action_upper_bound_constraints(self): batch_size = 1000 compilers = [self.compiler1, self.compiler3] expected = [[('outflow/1', [batch_size, 8], tf.float32)], [('AIR/1', [3], tf.float32)]] for compiler, expected_bounds in zip(compilers, expected): compiler.batch_mode_on() initial_state = compiler.compile_initial_state(batch_size) default_action_fluents = compiler.compile_default_action( batch_size) bounds = compiler.compile_action_bound_constraints(initial_state) self.assertIsInstance(bounds, dict) self.assertEqual(len(bounds), len(expected_bounds)) for fluent_name, shape, dtype in expected_bounds: self.assertIn(fluent_name, bounds) self.assertIsInstance(bounds[fluent_name], tuple) self.assertEqual(len(bounds[fluent_name]), 2) _, upper = bounds[fluent_name] self.assertIsInstance(upper, TensorFluent) self.assertEqual(upper.dtype, dtype) self.assertListEqual(upper.shape.as_list(), shape) def test_initialize_non_fluents(self): nf = dict(self.compiler1.compile_non_fluents()) expected_non_fluents = { 'MAX_RES_CAP/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'UPPER_BOUND/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'LOWER_BOUND/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'RAIN_SHAPE/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'RAIN_SCALE/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'DOWNSTREAM/2': { 'shape': [8, 8], 'dtype': tf.bool }, 'SINK_RES/1': { 'shape': [ 8, ], 'dtype': tf.bool }, 'MAX_WATER_EVAP_FRAC_PER_TIME_UNIT/0': { 'shape': [], 'dtype': tf.float32 }, 'LOW_PENALTY/1': { 'shape': [ 8, ], 'dtype': tf.float32 }, 'HIGH_PENALTY/1': { 'shape': [ 8, ], 'dtype': tf.float32 } } self.assertIsInstance(nf, dict) self.assertEqual(len(nf), len(expected_non_fluents)) for name, fluent in nf.items(): self.assertIn(name, expected_non_fluents) shape = expected_non_fluents[name]['shape'] dtype = expected_non_fluents[name]['dtype'] self.assertEqual(fluent.name, 'non_fluents/{}:0'.format(name.replace('/', '-'))) self.assertIsInstance(fluent, TensorFluent) self.assertEqual(fluent.dtype, dtype) self.assertEqual(fluent.shape.as_list(), shape) expected_initializers = { 'MAX_RES_CAP/1': [100., 100., 200., 300., 400., 500., 800., 1000.], 'UPPER_BOUND/1': [80., 80., 180., 280., 380., 480., 780., 980.], 'LOWER_BOUND/1': [20., 20., 20., 20., 20., 20., 20., 20.], 'RAIN_SHAPE/1': [1., 1., 1., 1., 1., 1., 1., 1.], 'RAIN_SCALE/1': [5., 3., 9., 7., 15., 13., 25., 30.], 'DOWNSTREAM/2': [[False, False, False, False, False, True, False, False], [False, False, True, False, False, False, False, False], [False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, True], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, True], [False, False, False, False, False, False, False, False]], 'SINK_RES/1': [False, False, False, False, False, False, False, True], 'MAX_WATER_EVAP_FRAC_PER_TIME_UNIT/0': 0.05, 'LOW_PENALTY/1': [-5., -5., -5., -5., -5., -5., -5., -5.], 'HIGH_PENALTY/1': [-10., -10., -10., -10., -10., -10., -10., -10.] } with tf.Session(graph=self.compiler1.graph) as sess: for name, fluent in nf.items(): value = sess.run(fluent.tensor) list1 = list(value.flatten()) list2 = list(np.array(expected_initializers[name]).flatten()) for v1, v2 in zip(list1, list2): self.assertAlmostEqual(v1, v2) def test_initialize_initial_state_fluents(self): sf = dict(self.compiler1.compile_initial_state()) expected_state_fluents = { 'rlevel/1': { 'shape': [ 8, ], 'dtype': tf.float32 } } self.assertIsInstance(sf, dict) self.assertEqual(len(sf), len(expected_state_fluents)) for name, fluent in sf.items(): self.assertIn(name, expected_state_fluents) shape = expected_state_fluents[name]['shape'] dtype = expected_state_fluents[name]['dtype'] self.assertEqual( fluent.name, 'initial_state/{}:0'.format(name.replace('/', '-'))) self.assertIsInstance(fluent, TensorFluent) self.assertEqual(fluent.dtype, dtype) self.assertEqual(fluent.shape.as_list(), shape) expected_initializers = { 'rlevel/1': [75., 50., 50., 50., 50., 50., 50., 50.] } with tf.Session(graph=self.compiler1.graph) as sess: for name, fluent in sf.items(): value = sess.run(fluent.tensor) list1 = list(value.flatten()) list2 = list(np.array(expected_initializers[name]).flatten()) for v1, v2 in zip(list1, list2): self.assertAlmostEqual(v1, v2) def test_initialize_default_action_fluents(self): action_fluents = self.compiler1.compile_default_action() self.assertIsInstance(action_fluents, list) for fluent in action_fluents: self.assertIsInstance(fluent, tuple) self.assertEqual(len(fluent), 2) self.assertIsInstance(fluent[0], str) self.assertIsInstance(fluent[1], TensorFluent) af = dict(action_fluents) expected_action_fluents = { 'outflow/1': { 'shape': [ 8, ], 'dtype': tf.float32 } } self.assertEqual(len(af), len(expected_action_fluents)) for name, fluent in af.items(): self.assertIn(name, expected_action_fluents) shape = expected_action_fluents[name]['shape'] dtype = expected_action_fluents[name]['dtype'] self.assertEqual( fluent.name, 'default_action/{}:0'.format(name.replace('/', '-'))) self.assertIsInstance(fluent, TensorFluent) self.assertEqual(fluent.dtype, dtype) self.assertEqual(fluent.shape.as_list(), shape) expected_initializers = {'outflow/1': [0., 0., 0., 0., 0., 0., 0., 0.]} with tf.Session(graph=self.compiler1.graph) as sess: for name, fluent in af.items(): value = sess.run(fluent.tensor) list1 = list(value.flatten()) list2 = list(np.array(expected_initializers[name]).flatten()) for v1, v2 in zip(list1, list2): self.assertAlmostEqual(v1, v2) def test_state_scope(self): compilers = [ self.compiler1, self.compiler2, self.compiler3, self.compiler4, self.compiler5, self.compiler6, self.compiler7 ] for compiler in compilers: fluents = compiler.compile_initial_state() scope = dict(fluents) self.assertEqual(len(fluents), len(scope)) for i, name in enumerate( compiler.rddl.domain.state_fluent_ordering): self.assertIs(scope[name], fluents[i][1]) def test_action_scope(self): compilers = [ self.compiler1, self.compiler2, self.compiler3, self.compiler4, self.compiler5, self.compiler6, self.compiler7 ] for compiler in compilers: fluents = compiler.compile_default_action() scope = dict(fluents) self.assertEqual(len(fluents), len(scope)) for i, name in enumerate( compiler.rddl.domain.action_fluent_ordering): self.assertIs(scope[name], fluents[i][1]) def test_compile_expressions(self): expected = { # rddl1: RESERVOIR ==================================================== 'rainfall/1': { 'shape': [ 8, ], 'dtype': tf.float32, 'scope': ['?r'] }, 'evaporated/1': { 'shape': [ 8, ], 'dtype': tf.float32, 'scope': ['?r'] }, 'overflow/1': { 'shape': [ 8, ], 'dtype': tf.float32, 'scope': ['?r'] }, 'inflow/1': { 'shape': [ 8, ], 'dtype': tf.float32, 'scope': ['?r'] }, "rlevel'/1": { 'shape': [ 8, ], 'dtype': tf.float32, 'scope': ['?r'] }, # rddl2: MARS ROVER =================================================== "xPos'/0": { 'shape': [], 'dtype': tf.float32, 'scope': [] }, "yPos'/0": { 'shape': [], 'dtype': tf.float32, 'scope': [] }, "time'/0": { 'shape': [], 'dtype': tf.float32, 'scope': [] }, "picTaken'/1": { 'shape': [ 3, ], 'dtype': tf.bool, 'scope': ['?p'] } } compilers = [self.compiler1, self.compiler2] rddls = [self.rddl1, self.rddl2] for compiler, rddl in zip(compilers, rddls): nf = compiler.non_fluents_scope() sf = dict(compiler.compile_initial_state()) af = dict(compiler.compile_default_action()) scope = {} scope.update(nf) scope.update(sf) scope.update(af) _, cpfs = rddl.domain.cpfs for cpf in cpfs: name = cpf.name expr = cpf.expr t = compiler._compile_expression(expr, scope) scope[name] = t self.assertIsInstance(t, TensorFluent) self.assertEqual(t.shape.as_list(), expected[name]['shape']) self.assertEqual(t.dtype, expected[name]['dtype']) self.assertEqual(t.scope.as_list(), expected[name]['scope']) reward_expr = rddl.domain.reward t = compiler._compile_expression(reward_expr, scope) self.assertIsInstance(t, TensorFluent) self.assertEqual(t.shape.as_list(), []) self.assertEqual(t.dtype, tf.float32) self.assertEqual(t.scope.as_list(), []) def test_compile_cpfs(self): compilers = [self.compiler1, self.compiler2] expected = [ (['evaporated/1', 'overflow/1', 'rainfall/1', 'inflow/1'], ["rlevel'/1"]), ([], ["picTaken'/1", "time'/0", "xPos'/0", "yPos'/0"]), ] for compiler, (expected_interm, expected_state) in zip(compilers, expected): nf = compiler.non_fluents_scope() sf = dict(compiler.compile_initial_state()) af = dict(compiler.compile_default_action()) scope = {**nf, **sf, **af} interm_fluents, next_state_fluents = compiler.compile_cpfs(scope) self.assertIsInstance(interm_fluents, list) self.assertEqual(len(interm_fluents), len(expected_interm)) for fluent, expected_fluent in zip(interm_fluents, expected_interm): self.assertEqual(fluent[0], expected_fluent) self.assertIsInstance(next_state_fluents, list) self.assertEqual(len(next_state_fluents), len(expected_state)) for fluent, expected_fluent in zip(next_state_fluents, expected_state): self.assertEqual(fluent[0], expected_fluent) def test_compile_state_cpfs(self): compilers = [ self.compiler1, self.compiler2, self.compiler3, self.compiler4, self.compiler5, self.compiler6, self.compiler7 ] for compiler in compilers: nf = compiler.non_fluents_scope() sf = dict(compiler.compile_initial_state()) af = dict(compiler.compile_default_action()) scope = {**nf, **sf, **af} interm_fluents = compiler.compile_intermediate_cpfs(scope) scope.update(dict(interm_fluents)) next_state_fluents = compiler.compile_state_cpfs(scope) self.assertIsInstance(next_state_fluents, list) for cpf in next_state_fluents: self.assertIsInstance(cpf, tuple) self.assertEqual(len(next_state_fluents), len(sf)) next_state_fluents = dict(next_state_fluents) for fluent in sf: next_fluent = utils.rename_state_fluent(fluent) self.assertIn(next_fluent, next_state_fluents) self.assertIsInstance(next_state_fluents[next_fluent], TensorFluent) def test_compile_intermediate_cpfs(self): compilers = [ self.compiler1, self.compiler2, self.compiler3, self.compiler4, self.compiler5, self.compiler6, self.compiler7 ] for compiler in compilers: fluents = compiler.rddl.domain.interm_fluent_ordering nf = compiler.non_fluents_scope() sf = dict(compiler.compile_initial_state()) af = dict(compiler.compile_default_action()) scope = {**nf, **sf, **af} interm_fluents = compiler.compile_intermediate_cpfs(scope) self.assertIsInstance(interm_fluents, list) self.assertEqual(len(interm_fluents), len(fluents)) for actual, expected in zip(interm_fluents, fluents): self.assertIsInstance(actual, tuple) self.assertEqual(len(actual), 2) self.assertIsInstance(actual[0], str) self.assertIsInstance(actual[1], TensorFluent) self.assertEqual(actual[0], expected) def test_compile_reward(self): # TODO: self.compiler4 compilers = [ self.compiler1, self.compiler2, self.compiler3, self.compiler5, self.compiler6, self.compiler7 ] batch_size = 32 for compiler in compilers: compiler.batch_mode_on() state = compiler.compile_initial_state(batch_size) action = compiler.compile_default_action(batch_size) scope = compiler.transition_scope(state, action) interm_fluents, next_state_fluents = compiler.compile_cpfs(scope) scope.update(next_state_fluents) reward = compiler.compile_reward(scope) self.assertIsInstance(reward, TensorFluent) self.assertEqual(reward.shape.as_list(), [batch_size]) def test_compile_probabilistic_normal_random_variable(self): mean = Expression(('number', 0.0)) var = Expression(('number', 1.0)) normal = Expression(('randomvar', ('Normal', (mean, var)))) expressions = [normal] self._test_random_variable_expressions(expressions) def test_compile_probabilistic_gamma_random_variable(self): shape = Expression(('number', 5.0)) scale = Expression(('number', 1.0)) gamma = Expression(('randomvar', ('Gamma', (shape, scale)))) expressions = [gamma] self._test_random_variable_expressions(expressions) def __get_batch_compiler_with_state_action_scope(self): compilers = [self.compiler2] batch_sizes = [8] for compiler, batch_size in zip(compilers, batch_sizes): compiler.batch_mode_on() nf = compiler.non_fluents_scope() sf = dict(compiler.compile_initial_state()) af = dict(compiler.compile_default_action()) scope = {**nf, **sf, **af} yield (compiler, batch_size, scope) def _test_random_variable_expressions(self, expressions): for compiler, batch_size, scope in self.__get_batch_compiler_with_state_action_scope( ): for expr in expressions: self._test_random_variable_expression(expr, compiler, scope, batch_size) def _test_random_variable_expression(self, expr, compiler, scope, batch_size): sample = compiler._compile_random_variable_expression( expr, scope, batch_size) self._test_sample_fluents(sample, batch_size) self._test_sample_fluent(sample) def _test_sample_fluents(self, sample, batch_size=None): self.assertIsInstance(sample, TensorFluent) if batch_size is not None: self.assertEqual(sample.shape[0], batch_size) def _test_sample_fluent(self, sample): self.assertTrue(sample.tensor.name.startswith('sample'), sample.tensor) def _test_conditional_sample(self, sample): inputs = sample.tensor.op.inputs self.assertEqual(len(inputs), 3) self.assertTrue(inputs[0].name.startswith('LogicalNot'), inputs[0]) self.assertTrue(inputs[1].name.startswith('StopGradient'), inputs[1]) self.assertTrue(inputs[2].name.startswith('sample'), inputs[2])
def setUpClass(cls): cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST) cls.compiler1 = Compiler(cls.rddl1, batch_mode=True) cls.compiler2 = Compiler(cls.rddl2, batch_mode=True)
def compile_model(filename): model = parse_model(filename) compiler = Compiler(model) return compiler
def __init__(self, compiler: Compiler, policy: Policy, batch_size: int) -> None: self._cell = PolicySimulationCell(compiler, policy, batch_size) self._non_fluents = [ fluent.tensor for _, fluent in compiler.compile_non_fluents() ]
class TestActionSimulationCell(unittest.TestCase): def setUp(self): self.batch_size = 10 self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.compiler2 = Compiler(self.rddl2, batch_mode=True) self.cell1 = ActionSimulationCell(self.compiler1) self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size) self.default_action1 = self.compiler1.compile_default_action(batch_size=1) self.cell2 = ActionSimulationCell(self.compiler2) self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size) self.default_action2 = self.compiler2.compile_default_action(batch_size=1) def test_state_size(self): state_size1 = self.cell1.state_size self.assertIsInstance(state_size1, tuple) self.assertEqual(len(state_size1), len(self.initial_state1)) for shape, tensor in zip(state_size1, self.initial_state1): self.assertListEqual(list(shape), tensor.shape.as_list()[1:]) state_size2 = self.cell2.state_size self.assertIsInstance(state_size2, tuple) self.assertEqual(len(state_size2), len(self.initial_state2)) for shape, tensor in zip(state_size2, self.initial_state2): self.assertListEqual(list(shape), tensor.shape.as_list()[1:]) def test_interm_size(self): expected = [((8,), (8,), (8,), (8,)), ((2,), (2,))] cells = [self.cell1, self.cell2] for cell, sz in zip(cells, expected): interm_size = cell.interm_size self.assertIsInstance(interm_size, tuple) self.assertTupleEqual(interm_size, sz) def test_output_size(self): cells = [self.cell1, self.cell2] for cell in cells: output_size = cell.output_size state_size = cell.state_size interm_size = cell.interm_size action_size = cell.action_size self.assertEqual(output_size, (state_size, action_size, interm_size, 1)) def test_next_state(self): cells = [self.cell1, self.cell2] actions = [self.default_action1, self.default_action2] states = [self.initial_state1, self.initial_state2] for cell, inputs, state in zip(cells, actions, states): output, next_state = cell(inputs, state) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 4) next_state, action, interm, reward = output state_size, action_size, interm_size, reward_size = cell.output_size # interm_state # TO DO # next_state self.assertIsInstance(next_state, tuple) self.assertEqual(len(next_state), len(state_size)) for s, sz in zip(next_state, state_size): self.assertIsInstance(s, tf.Tensor) self.assertListEqual(s.shape.as_list(), [self.batch_size] + list(sz)) # action self.assertIsInstance(action, tuple) self.assertEqual(len(action), len(action_size)) for a, sz in zip(action, action_size): self.assertIsInstance(a, tf.Tensor) self.assertListEqual(a.shape.as_list(), [1] + list(sz)) # reward self.assertIsInstance(reward, tf.Tensor) self.assertListEqual(reward.shape.as_list(), [self.batch_size, reward_size]) def test_output(self): (output1, next_state1) = self.cell1(self.default_action1, self.initial_state1) next_state1, action1, interm_state1, reward1 = output1 self.assertIsInstance(reward1, tf.Tensor) self.assertListEqual(reward1.shape.as_list(), [self.batch_size, 1]) self.assertEqual(reward1.dtype, tf.float32) (output2, next_state2) = self.cell2(self.default_action2, self.initial_state2) next_state2, action2, interm_state2, reward2 = output2 self.assertIsInstance(reward2, tf.Tensor) self.assertListEqual(reward2.shape.as_list(), [self.batch_size, 1]) self.assertEqual(reward2.dtype, tf.float32)
def __init__(self, compiler: Compiler, batch_size: int) -> None: self._default = compiler.compile_default_action(batch_size)