def setUp(self):
        self.batch_size = 10

        self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
        self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1, batch_mode=True)
        self.compiler2 = Compiler(self.rddl2, batch_mode=True)

        self.cell1 = ActionSimulationCell(self.compiler1)
        self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size)
        self.default_action1 = self.compiler1.compile_default_action(batch_size=1)

        self.cell2 = ActionSimulationCell(self.compiler2)
        self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size)
        self.default_action2 = self.compiler2.compile_default_action(batch_size=1)
示例#2
0
def _session_run(planner, fetches):
    env = rddlgym.make(planner.rddl, mode=rddlgym.GYM)

    with tf.Session(graph=planner.compiler.graph) as sess:
        sess.run(tf.global_variables_initializer())
        feed_dict = _get_feed_dict(sess, planner, env)
        return sess.run(fetches, feed_dict=feed_dict)
示例#3
0
    def setUpClass(cls):

        # hyper-parameters
        cls.batch_size = 64
        cls.horizon = 20
        cls.learning_rate = 0.001
        cls.regularization_rate = 0.1

        # rddl
        cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG)
        cls.compiler.batch_mode_on()

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [256],
            'activation': 'elu',
            'input_layer_norm': False
        })
        cls.policy.build()

        # planner
        cls.config = {
            'batch_size': cls.batch_size,
            'horizon': cls.horizon,
            'learning_rate': cls.learning_rate,
            'regularization_rate': cls.regularization_rate
        }
        cls.planner = MinimaxOptimizationPlanner(cls.compiler, cls.config)
        cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
示例#4
0
def run(config):
    # pylint: disable=import-outside-toplevel

    import os

    import psutil
    import rddlgym
    import tensorflow as tf

    import tfplan

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.environ["OMP_NUM_THREADS"] = str(psutil.cpu_count(logical=False))

    planner = config["planner"]
    rddl = config["rddl"]
    filepath = os.path.join(config["logdir"], "data.csv")

    config["optimization"] = {
        "optimizer": config["optimizer"],
        "learning_rate": config["learning_rate"],
    }

    env = rddlgym.make(rddl, mode=rddlgym.GYM, config=config)
    env.set_horizon(config["horizon"])

    planner = tfplan.make(planner, rddl, config)

    with rddlgym.Runner(env, planner, debug=config["verbose"]) as runner:
        trajectory = runner.run()
        trajectory.save(filepath)
        print(trajectory.as_dataframe())
    def setUpClass(cls):

        # hyper-parameters
        cls.horizon = 20
        cls.batch_size = 64

        # rddl
        cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG)
        cls.compiler.batch_mode_on()

        # initial state
        cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size)

        # default action
        cls.default_action = cls.compiler.compile_default_action(cls.batch_size)

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {'layers': [32, 32], 'activation': 'elu', 'input_layer_norm': False})
        cls.policy.build()

        # model
        cls.config = {}
        cls.model = ReparameterizationSampling(cls.compiler, cls.config)
        cls.model.build(cls.policy)
        cls.output = cls.model(cls.initial_state, cls.horizon)
示例#6
0
    def setUpClass(cls):

        # hyper-parameters
        cls.batch_size = 64
        cls.horizon = 20
        cls.learning_rate = 0.001

        # rddl
        cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG)
        cls.compiler.init()
        cls.compiler.batch_size = cls.batch_size

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [256],
            'activation': 'elu',
            'input_layer_norm': True
        })
        cls.policy.build()

        # planner
        cls.config = {
            'batch_size': cls.batch_size,
            'horizon': cls.horizon,
            'learning_rate': cls.learning_rate
        }
        cls.planner = PathwiseOptimizationPlanner(cls.compiler, cls.config)
        cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
示例#7
0
    def __init__(self, rddl, config=None):
        self._compiler = rddlgym.make(rddl, mode=rddlgym.SCG)
        self._compiler.init()

        self.config = config

        self._graph = self._compiler.graph

        self._config_proto = tf.ConfigProto(
            inter_op_parallelism_threads=1,
            intra_op_parallelism_threads=1,
            log_device_placement=False,
        )

        self._sess = tf.Session(graph=self._graph, config=self._config_proto)

        self.observation_space = self._create_observation_space()
        self.action_space = self._create_action_space()

        self.non_fluents = self._eval_non_fluents()

        with self._compiler.graph.as_default():
            self._state_inputs = self._build_state_inputs()
            self._action_inputs = self._build_action_inputs()
            self._interms, self._next_state, self._reward = self._build_model_ops(
            )

        self._state = None
        self._timestep = None

        self._horizon = None
示例#8
0
    def setUpClass(cls):

        # hyper-parameters
        cls.horizon = 40
        cls.batch_size = 16

        # rddl
        cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG)
        cls.compiler.init()
        cls.compiler.batch_size = cls.batch_size

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [64, 64],
            'activation': 'relu',
            'input_layer_norm': True
        })
        cls.policy.build()

        # cell
        cls.cell = BasicMarkovCell(cls.compiler, cls.policy)

        with cls.cell.graph.as_default():
            # timestep
            cls.timestep = tf.constant(cls.horizon, dtype=tf.float32)
            cls.timestep = tf.expand_dims(cls.timestep, -1)
            cls.timestep = tf.stack([cls.timestep] * cls.batch_size)
示例#9
0
    def setUpClass(cls):

        # hyper-parameters
        cls.horizon = 40
        cls.batch_size = 128

        # rddl
        cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG)
        cls.compiler.init()
        cls.compiler.batch_size = cls.batch_size

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [32, 32],
            'activation': 'elu',
            'input_layer_norm': True
        })
        cls.policy.build()

        # model
        cls.config = {}
        cls.model = MonteCarloSampling(cls.compiler, cls.config)
        cls.model.build(cls.policy)
示例#10
0
    def setUpClass(cls):

        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 15

        # model
        cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG)
        cls.compiler.init()
        cls.compiler.batch_size = cls.batch_size

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()

        # policy
        cls.config = {
            'layers': [128, 64, 32],
            'activation': 'elu',
            'input_layer_norm': True
        }
        cls.policy = FeedforwardPolicy(cls.compiler, cls.config)
        cls.policy.build()
示例#11
0
def test_runner(planner):
    # pylint: disable=protected-access
    rddl = planner.rddl
    env = rddlgym.make(rddl, mode=rddlgym.GYM)
    env._horizon = 3
    runner = rddlgym.Runner(env, planner)
    trajectory = runner.run()
    assert len(trajectory) == env._horizon
示例#12
0
def cell(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)
    compiler = DefaultCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()
    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("tensorplan")
    yield SimulationCell(compiler, policy)
示例#13
0
def runner(request):
    rddl = request.param
    env = make(rddl, mode=GYM)

    def planner(state, timestep):
        # pylint: disable=unused-argument
        return env.action_space.sample()

    return Runner(env, planner)
示例#14
0
def run(model_id, logdir, layers, activation, batch_size, learning_rate, horizon, epochs):
    compiler = rddlgym.make(model_id, mode=rddlgym.SCG)
    compiler.batch_mode_on()
    input_layer_norm = True
    hidden_layer_norm = False
    planner = PolicyOptimizationPlanner(compiler, layers, activation, input_layer_norm, hidden_layer_norm, logdir=logdir)
    planner.build(learning_rate, batch_size, horizon)
    rewards, policy, _ = planner.run(epochs)
    return rewards, policy
示例#15
0
    def setUp(self):
        self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1, batch_mode=True)

        self.policy1 = FeedforwardPolicy(self.compiler1, {
            'layers': [64],
            'activation': 'elu',
            'input_layer_norm': False
        })
        self.policy1.build()
        self.valuefn1 = Value(self.compiler1, self.policy1)
示例#16
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 15

        # model
        cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG)
        cls.compiler.batch_mode_on()

        # initial state
        cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size)
示例#17
0
def test_get_batch_initial_state(planner):
    # pylint: disable=protected-access
    env = rddlgym.make(planner.rddl, mode=rddlgym.GYM)

    with planner.compiler.graph.as_default():
        state = env.observation_space.sample()
        batch_state = planner._get_batch_initial_state(state)
        assert len(state) == len(batch_state)

        for fluent, batch_fluent in zip(state.values(), batch_state):
            assert fluent.dtype == batch_fluent.dtype
            assert fluent.shape == batch_fluent.shape[1:]
            assert batch_fluent.shape[0] == planner.compiler.batch_size
示例#18
0
def simulator(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)

    compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()

    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("planning")

    simulator = Simulator(compiler, policy, config=None)
    simulator.build()
    yield simulator
示例#19
0
    def __init__(self, rddl, compiler_cls, config):
        self.rddl = rddl
        self.model = rddlgym.make(rddl, mode=rddlgym.AST)
        self.compiler = compiler_cls(self.model, batch_size=config["batch_size"])
        self.config = config

        self.compiler.init()

        config = tf.ConfigProto(
            inter_op_parallelism_threads=1,
            intra_op_parallelism_threads=1,
            log_device_placement=False,
        )
        self._sess = tf.Session(graph=self.graph, config=config)
示例#20
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 15

        # model
        cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG)
        cls.compiler.init()
        cls.compiler.batch_size = cls.batch_size

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()
示例#21
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 20

        cls.compiler = rddlgym.make('Navigation-v2', mode=rddlgym.SCG)
        cls.compiler.batch_mode_on()

        cls.noise_shapes = get_cpfs_reparameterization(cls.compiler.rddl)

        with cls.compiler.graph.as_default():
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)
示例#22
0
    def setUp(self):
        self.horizon = 40
        self.batch_size = 128

        self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
        self.compiler1 = rddl2tf.compilers.DefaultCompiler(
            self.rddl1, batch_size=self.batch_size)
        self.compiler1.init()

        self.policy1 = FeedforwardPolicy(self.compiler1, {
            'layers': [64],
            'activation': 'elu',
            'input_layer_norm': False
        })
        self.policy1.build()
        self.valuefn1 = Value(self.compiler1, self.policy1)
示例#23
0
    def setUpClass(cls):

        # hyper-parameters
        cls.horizon = 40
        cls.batch_size = 16

        # rddl
        rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        cls.compiler = rddl2tf.compilers.ReparameterizationCompiler(
            rddl, batch_size=cls.batch_size)
        cls.compiler.init()

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [64, 64],
            'activation': 'relu',
            'input_layer_norm': True
        })
        cls.policy.build()

        with cls.compiler.graph.as_default():

            # reparameterization
            cls.noise_shapes = cls.compiler.get_cpfs_reparameterization()
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)

            # timestep
            cls.timestep = tf.constant(cls.horizon, dtype=tf.float32)
            cls.timestep = tf.expand_dims(cls.timestep, -1)
            cls.timestep = tf.stack([cls.timestep] * cls.batch_size)

            # inputs
            cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]],
                                   axis=1)

        # cell
        cls.config = {'encoding': cls.encoding}
        cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)
示例#24
0
def test_get_action(planner):
    # pylint: disable=protected-access
    env = rddlgym.make(planner.rddl, mode=rddlgym.GYM)

    with tf.Session(graph=planner.compiler.graph) as sess:
        sess.run(tf.global_variables_initializer())
        feed_dict = _get_feed_dict(sess, planner, env)

        actions_ = planner._get_action(planner.action, feed_dict)

        action_fluents = planner.compiler.default_action_fluents
        assert isinstance(actions_, OrderedDict)
        assert len(actions_) == len(action_fluents)
        for action_, action_fluent in zip(actions_.values(), action_fluents):
            assert tf.dtypes.as_dtype(action_.dtype) == action_fluent[1].dtype
            assert list(action_.shape) == list(
                action_fluent[1].shape.fluent_shape)
示例#25
0
 def setUp(self):
     self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
     self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
     self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST)
     self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST)
     self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST)
     self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST)
     self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
     self.compiler1 = Compiler(self.rddl1)
     self.compiler2 = Compiler(self.rddl2)
     self.compiler3 = Compiler(self.rddl3)
     self.compiler4 = Compiler(self.rddl4)
     self.compiler5 = Compiler(self.rddl5)
     self.compiler6 = Compiler(self.rddl6)
     self.compiler7 = Compiler(self.rddl7)
示例#26
0
def reparameterization(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)
    compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()

    with compiler.graph.as_default():
        mapping = compiler.get_cpfs_reparameterization()
        samples = utils.get_noise_samples(mapping, BATCH_SIZE, HORIZON)

        inputs, encoding = utils.encode_noise_samples_as_inputs(samples)

        decoded_samples = utils.decode_inputs_as_noise_samples(
            inputs[:, 0, ...], encoding)

    return Reparameterization(compiler, mapping, samples, inputs, encoding,
                              decoded_samples)
示例#27
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 20

        rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        cls.compiler = rddl2tf.compilers.ReparameterizationCompiler(
            rddl, batch_size=cls.batch_size)
        cls.compiler.init()

        cls.noise_shapes = cls.compiler.get_cpfs_reparameterization()

        with cls.compiler.graph.as_default():
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)
示例#28
0
def cell(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)

    compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()

    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("planning")

    with compiler.graph.as_default():
        reparameterization_map = compiler.get_cpfs_reparameterization()
        cell_samples = utils.get_noise_samples(reparameterization_map,
                                               BATCH_SIZE,
                                               horizon=1)
        cell_noise, encoding = utils.encode_noise_samples_as_inputs(
            cell_samples)

    cell = SimulationCell(compiler, policy, config={"encoding": encoding})
    cell.cell_noise = cell_noise
    yield cell
示例#29
0
    def _init_domain(self, model_id):
        compiler = rddlgym.make(model_id, mode=rddlgym.SCG)
        compiler.batch_mode_on()

        initial_state = compiler.compile_initial_state(batch_size=1)
        default_action = compiler.compile_default_action(batch_size=1)

        planner = OnlineOpenLoopPlanner(compiler, self.batch_size,
                                        self.horizon)
        planner.build(self.learning_rate,
                      epochs=self.epochs,
                      show_progress=False)

        online_planner = OnlinePlanning(compiler, planner)
        online_planner.build()

        return {
            'initial_state': initial_state,
            'default_action': default_action,
            'online_planner': online_planner
        }
示例#30
0
def test_get_action(planner):
    # pylint: disable=protected-access
    env = rddlgym.make(planner.rddl, mode=rddlgym.GYM)

    with tf.Session(graph=planner.compiler.graph) as sess:
        sess.run(tf.global_variables_initializer())
        state = env.observation_space.sample()
        batch_state = planner._get_batch_initial_state(state)
        samples = utils.evaluate_noise_samples_as_inputs(
            sess, planner.simulator.samples)
        feed_dict = {
            planner.initial_state: batch_state,
            planner.simulator.noise: samples,
            planner.steps_to_go: HORIZON,
        }
        actions_ = planner._get_action(planner.trajectory.actions, feed_dict)
        action_fluents = planner.compiler.default_action_fluents
        assert isinstance(actions_, OrderedDict)
        assert len(actions_) == len(action_fluents)
        for action_, action_fluent in zip(actions_.values(), action_fluents):
            assert tf.dtypes.as_dtype(action_.dtype) == action_fluent[1].dtype
            assert list(action_.shape) == list(
                action_fluent[1].shape.fluent_shape)