def execute(env, init_state, steps, init_mean, init_var, model, config, last_action_seq, task_likelihoods, pred_high, pred_low, recorder): # current_state = env.reset() current_state = copy.copy(env.state) if config['online'] else env.reset() try: config["goal"] = env.goal except: pass trajectory = [] traject_cost = 0 sliding_mean = init_mean # np.zeros(config["sol_dim"]) temp_config = copy.deepcopy(config) temp_config["popsize"] = 20000 optimizer = None sol = None bar = ProgBar(steps, track_time=True, title='\nExecuting....', bar_char='▒') for i in range(steps): cost_object = Cost(model=model, init_state=current_state, horizon=config["horizon"], task_likelihoods=task_likelihoods, action_dim=env.action_space.shape[0], goal=config["goal"], pred_high=pred_high, pred_low=pred_low) config["cost_fn"] = cost_object.cost_fn optimizer = RS_opt(config) # sol = optimizer.obtain_solution(sliding_mean, init_var) sol = optimizer.obtain_solution() a = sol[0:env.action_space.shape[0]] next_state, r = 0, 0 for k in range(1): if config["record_video"]: recorder.capture_frame() next_state, r, _, _ = env.step(a) # env.joint_reset() trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) current_state = next_state traject_cost += -r # sliding_mean = last_action_seq[i*config["sol_dim"] : (i+1) * config["sol_dim"]] # sliding_mean[0:-len(a)] = sol[len(a)::] # sliding_mean[-len(a)::] = sol[-len(a)::] bar.update(item_id=" Step " + str(i) + " ") if config["record_video"]: recorder.capture_frame() recorder.close() return trajectory, traject_cost
def execute_random(env, steps, init_state): current_state = env.reset() trajectory = [] traject_cost = 0 for i in range(steps): a = env.action_space.sample() next_state, r = 0, 0 for k in range(1): next_state, r, _, _ = env.step(a) trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) current_state = next_state traject_cost += -r return np.array(trajectory), traject_cost
def execute(env, init_state, steps, init_mean, init_var, model, config, last_action_seq, pred_high, pred_low): current_state = env.reset() try: config["goal"] = env.goal except: pass trajectory = [] traject_cost = 0 model_error = 0 sliding_mean = np.zeros(config["sol_dim"]) rand = np.random.rand(config["sol_dim"]) bar = ProgBar(steps, track_time=True, title='\nExecuting....', bar_char='▒') for i in range(steps): cost_object = Cost(ensemble_model=model, init_state=current_state, horizon=config["horizon"], action_dim=env.action_space.shape[0], goal=config["goal"], pred_high=pred_high, pred_low=pred_low) config["cost_fn"] = cost_object.cost_fn optimizer = RS_opt(config) sol = optimizer.obtain_solution() ## Take soft action a = sol[0:env.action_space.shape[ 0]] #if i == 0 else sol[0:env.action_space.shape[0]] * 0.8 + a * 0.2 next_state, r = 0, 0 for k in range(1): next_state, r, _, _ = env.step(a) trajectory.append( [current_state.copy(), a.copy(), next_state - current_state, -r]) model_error += test_model(model, current_state.copy(), a.copy(), next_state - current_state) current_state = next_state traject_cost += -r sliding_mean[0:-len(a)] = sol[len(a)::] bar.update(item_id=" Step " + str(i) + " ") print("Model error: ", model_error) return np.array(trajectory), traject_cost