def make_hillcar_problem(step_len, n_steps, damp, jitter, discount, bounds, cost_radius, actions): """ Makes a hillcar problem TODO: take in parameters """ (A,action_dim) = actions.shape assert(action_dim == 1) state_dim = 2 # Set up parameters for the DI problem trans_params = utils.kwargify( mass=1.0, step=step_len, num_steps=n_steps, dampening=damp, jitter=jitter) trans_fn = HillcarTransitionFunction( **trans_params) #boundary = SaturationBoundary(bounds) boundary = HillcarBoundary(bounds) cost_state_fn = BallSetFn(np.zeros(2), cost_radius) cost_fn = CostWrapper(cost_state_fn) cost_fn.favored=np.array([0.0]) #If we see 100 leaking in, there is a problem with v saturation #with the boundary containment. oob_costs = np.array([0,0,0,0]) # otherwise we will pay a penalty at the boundary, but be 0 after. gen_model = GenerativeModel(trans_fn, boundary, cost_fn, state_dim, action_dim, oob_costs) action_boundary = [(actions[0],actions[-1])] problem = Problem(gen_model, action_boundary, discount) return problem
def make_di_problem(step_len, n_steps, damp, jitter, discount, bounds, cost_radius, actions): """ Makes a double integrator problem TODO: take in parameters """ (A,action_dim) = actions.shape assert(action_dim == 1) assert(actions[0] == -actions[-1]) state_dim = 2 # Set up parameters for the DI problem trans_params = utils.kwargify(step=step_len, num_steps=n_steps, dampening=damp, control_jitter=jitter) trans_fn = DoubleIntegratorTransitionFunction( **trans_params) #boundary = DoubleIntBoundary(bounds) #boundary = SaturationBoundary(bounds) boundary = TorusBoundary(bounds) cost_state_fn = BallSetFn(np.zeros(2), cost_radius) #cost_state_fn = TargetZoneFn(np.array([[-0.5,0.5],[-0.5,0.5]])) cost_fn = CostWrapper(cost_state_fn) oob_costs = np.array([100]*2*state_dim) gen_model = GenerativeModel(trans_fn, boundary, cost_fn, state_dim, action_dim, oob_costs) action_boundary = [(actions[0],actions[-1])] problem = Problem(gen_model, action_boundary, discount) return problem
def f(g): self._plugins.append(Plugin(name, keys, kwargify(g), kwargs)) return g # So the markers can be chained