def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 64 cls.epochs = 50 cls.learning_rate = 0.01 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # initialize ActionOptimizer cls.optimizer = ActionOptimizer(cls.rddl2tf, cls.policy) cls.optimizer.build(cls.learning_rate, cls.batch_size, cls.horizon)
def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 64 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # execute policy for the given horizon and initial state with cls.rddl2tf.graph.as_default(): cls.state = cls.rddl2tf.compile_initial_state(cls.batch_size) cls.actions = [] for t in range(cls.horizon - 1, -1, -1): timestep = tf.constant(t, dtype=tf.float32, shape=(cls.batch_size, 1)) action = cls.policy(cls.state, timestep) cls.actions.append(action)
def setUpClass(cls): # initialize hyper-parameters cls.horizon = 40 cls.batch_size = 1 # parse RDDL file with open('rddl/deterministic/Navigation.rddl') as file: parser = RDDLParser() parser.build() rddl = parser.parse(file.read()) rddl.build() # initializer RDDL2TensorFlow compiler cls.rddl2tf = Compiler(rddl, batch_mode=True) # initialize open-loop policy cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon) cls.policy.build('test') # sample policy variables to initialize open-loop policy cls.policy_variables = [] for shape in cls.rddl2tf.rddl.action_size: size = [cls.horizon] + list(shape) cls.policy_variables.append( np.random.uniform(low=-1.0, high=1.0, size=size)) # initialize action evaluator cls.evaluator = ActionEvaluator(cls.rddl2tf, cls.policy)
def _build_scenario_policy_ops(self): horizon = self.horizon - 1 self.scenario_policy = OpenLoopPolicy( self.compiler, horizon, parallel_plans=True ) self.scenario_policy.build("scenario_policy")
def cell(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = DefaultCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("tensorplan") yield SimulationCell(compiler, policy)
def _build_policy_ops(self): horizon = self.horizon self.policy = OpenLoopPolicy(self.compiler, horizon, parallel_plans=False) self.policy.build("planning") if "warm_start" in self.config: self.warm_start_op = self.policy._build_warm_start_op()
def simulator(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("planning") simulator = Simulator(compiler, policy, config=None) simulator.build() yield simulator
def cell(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("planning") with compiler.graph.as_default(): reparameterization_map = compiler.get_cpfs_reparameterization() cell_samples = utils.get_noise_samples(reparameterization_map, BATCH_SIZE, horizon=1) cell_noise, encoding = utils.encode_noise_samples_as_inputs( cell_samples) cell = SimulationCell(compiler, policy, config={"encoding": encoding}) cell.cell_noise = cell_noise yield cell
def _build_policy_graph(self) -> None: '''Builds the open loop policy ops.''' self._policy = OpenLoopPolicy(self._compiler, self.batch_size, self.horizon, self.parallel_plans) self._policy.build('planning')
def non_parallel_plans(compiler): policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=False) policy.build("non_parallel_plans") return policy
def parallel_plans(compiler): policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("parallel_plans") return policy
def _build_policy_ops(self): horizon = self.config["horizon"] self.policy = OpenLoopPolicy(self.compiler, horizon, parallel_plans=True) self.policy.build("tensorplan")
def _build_base_policy_ops(self): horizon = 1 self.base_policy = OpenLoopPolicy(self.compiler, horizon, parallel_plans=False) self.base_policy.build("base_policy")