def run_policy(env, get_action, max_ep_len=None, num_episodes=10): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: a = get_action(o) # print(a) #加入不确定性 # log_std = -0.5 * np.ones(3, dtype=np.float32) # log_std = torch.nn.Parameter(torch.as_tensor(log_std)) # std = torch.exp(log_std) # pi = Normal(torch.as_tensor(a), std) # a = pi.sample() # a = a.numpy() # print(" ", a) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if ep_ret == 10: print("Success!") if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(torch.Tensor(o.reshape(1,-1)))[0] o, r, d, _ = env.step(a.data.numpy()[0]) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, sleep=1e-3, log=True, verbose=True, reset_state=None, q_action=None, action_parameters=None, random=False): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." if log: logger = EpochLogger() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 o = env.reset() if reset_state is None else set_state(reset_state, env) action_parameters = {} if action_parameters is None else action_parameters while n < num_episodes: img = None if render: img = env.render(mode='rgb_array') time.sleep(sleep) if ep_len == 0 and q_action is not None: a = q_action elif random: a = env.action_space.sample() else: a = get_action(o, **action_parameters) o_prev = o o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 results = {'img': img, 'a': a, 'r': r, 'd': d, 'score': ep_ret, 't': ep_len, 'o': o_prev} yield results if d or (ep_len == max_ep_len): if log: logger.store(EpRet=ep_ret, EpLen=ep_len) if verbose: print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 o = env.reset() if reset_state is None else set_state(reset_state, env) n += 1 if log: logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." def unscale_action(action_space, scaled_action): """ Rescale the action from [-1, 1] to [low, high] (no need for symmetric action space) :param action_space: (gym.spaces.box.Box) :param scaled_action: (np.ndarray) :return: (np.ndarray) """ low, high = action_space.low, action_space.high return low + (0.5 * (scaled_action + 1.0) * (high - low)) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) unscaled_action = unscale_action(env.action_space, a) o, r, d, _ = env.step(unscaled_action) #time.sleep(0.1) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." goal_env = hasattr(env, 'goal') def cat_obs(o): return np.concatenate([o['observation'], o['desired_goal']], axis=-1) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 if goal_env: o = cat_obs(o) while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if goal_env: o = cat_obs(o) if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 if goal_env: o = cat_obs(o) n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() all_feats = [] o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render(episode=n) time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) ep_ret += r ep_len += 1 if "all_feats" in info.keys(): all_feats.append(info["all_feats"]) if d or (ep_len == max_ep_len): print(f"Coeff: {o[-env.coeff_dim:]}") print(f"All feats", np.array(all_feats).sum(axis=0)) # import pdb; pdb.set_trace() logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 all_feats = [] n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, policy, max_ep_len=None, num_episodes=100, render=True): logger = EpochLogger() obs, reward, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) action = policy(obs) obs, reward, done, _ = env.step(action) ep_ret += reward ep_len += 1 if done or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) obs, reward, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, gamma=1, key='danger'): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n, ep_info = env.reset(), 0, False, 0, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) ep_info = max(ep_info, info[key]) ep_ret += r * gamma**ep_len ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len, perf=ep_ret, fail=ep_info) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len, ep_info = env.reset(), 0, False, 0, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.log_tabular('perf', average_only=True) logger.log_tabular('fail', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=1000, out_name="", render=False): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 num_episodes = 1000 results = {} while n < num_episodes: a = get_action(o) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) results[n] = ep_len print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 with open("outputs/" + out_name + ".json", 'w') as f: json.dump(results, f) logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_adversarial_policy(env, ego_action, opp_action, env_init, ego_agent, opp_agent, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 init_positions = np.random.random_integers(0, 1) o = env.reset({ 'x': env_init['initial_x'][init_positions], 'y': env_init['initial_y'], 'theta': env_init['initial_theta'] }) while n < num_episodes: if render == True: env.render() # time.sleep(1e-3) #Convert o to RL obs RLobs = ego_agent.process_obs(o) Oppobs = opp_agent.process_obs(o) # Take deterministic actions at test time a = ego_action(RLobs, action_mask=ego_agent.aval_paths, deterministic=True) ego_speed, ego_steer, a = ego_agent.plan(o, a) #Opponent decision a_opp = opp_action(Oppobs, action_mask=opp_agent.aval_paths, deterministic=True) opp_speed, opp_steer, _ = opp_agent.plan(o, a_opp) action = { 'ego_idx': 0, 'speed': [ego_speed, opp_speed], 'steer': [ego_steer, opp_steer] } o, r, d, _ = env.step(action) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) init_positions = np.random.random_integers(0, 1) o, r, d, ep_ret, ep_len = env.reset({ 'x': env_init['initial_x'][init_positions], 'y': env_init['initial_y'], 'theta': env_init['initial_theta'] }), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, env_name=None): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 num_violations = 0 num_target = 0 violations = [] target = [] hit_by_opponent = 0 score_hit = 0 avoid_opponent = 0 hit_feat_counts = [] score_feat_counts = [] avoid_feat_counts = [] episode_feat_counts = [] pellet_counts = 0 power_pellet_counts = 0 eat_ghost_counts = 0 eat_cherry_counts = 0 hit_ghost_counts = 0 pellet_feat_counts = [] power_feat_counts = [] eat_ghost_feat_counts = [] eat_cherry_feat_counts = [] hit_ghost_feat_counts = [] ep_scores = [] demo_obs = [] demo_acs = [] prev_ale = 3 curr_ale = 3 while n < num_episodes: #hit_by_opponent = 0 #score_hit = 0 #avoid_opponent = 0 if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) #print(r) if env_name == 'Boxing-ram-v0': if r == 0: avoid_opponent += 1 elif r < 0: hit_by_opponent -= int(r) else: score_hit += int(r) if env_name == 'MsPacman-ram-v0': curr_ale = env.ale.lives() if r == 10: pellet_counts += 1 if r == 50: power_pellet_counts += 1 if r == 200 or r == 400 or r == 800 or r == 1600: eat_ghost_counts += 1 if r == 100: eat_cherry_counts += 1 else: if curr_ale == prev_ale-1: hit_ghost_counts += 1 prev_ale = curr_ale if env_name == 'reacher': if info['constraint']: num_violations += 1 if env.get_features()[0]: num_target += 1 ep_ret += r ep_len += 1 demo_obs.append(o) demo_acs.append(a) if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) if env_name == 'reacher': print('Violations %d, Target %d'%(num_violations,num_target)) if env_name == 'Boxing-ram-v0': print("damage %d"%hit_by_opponent) print("scores %d"%score_hit) print("avoid %d"%avoid_opponent) if env_name == 'MsPacman-ram-v0': print("pellet %d"%pellet_counts) print("power pellet %d"%power_pellet_counts) print("ghosts eaten %d"%eat_ghost_counts) print("cherry %d"%eat_cherry_counts) print("hit ghost %d"%hit_ghost_counts) if env_name == 'Boxing-ram-v0': episode_feat_counts.append([hit_by_opponent, score_hit, avoid_opponent, ep_ret]) if env_name == 'MsPacman-ram-v0': episode_feat_counts.append([pellet_counts, power_pellet_counts, eat_ghost_counts, eat_cherry_counts, hit_ghost_counts]) ep_scores.append(ep_ret) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 violations.append(num_violations) target.append(num_target) hit_feat_counts.append(hit_by_opponent) score_feat_counts.append(score_hit) avoid_feat_counts.append(avoid_opponent) num_violations = 0 num_target = 0 hit_by_opponent = 0 avoid_opponent = 0 score_hit = 0 pellet_counts = 0 power_pellet_counts = 0 eat_ghost_counts = 0 eat_cherry_counts = 0 hit_ghost_counts = 0 prev_ale = 3 n += 1 if args.env_name == 'reacher': print(violations) print(target) if args.env_name == 'Boxing-ram-v0': features = {'Features': episode_feat_counts, "Obs": demo_obs, "Scores":ep_scores, "Acs": demo_acs} pickle.dump(features, open('boxing_demos.pkl', 'wb')) if args.env_name == 'MsPacman-ram-v0': features = {'Features': episode_feat_counts,"Scores": ep_scores, "Obs": demo_obs, "Acs":demo_acs} pickle.dump(features, open('pacman_demos.pkl', 'wb')) logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=False, params={}, verbose=False): from upn.visualize.render import forward_env from numpngw import write_apng assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." test_envs, test_env_names = [], params["test_env_names"][0] for name in test_env_names: test_envs.append(gym.make(name)) logger = EpochLogger() for env_name, env in zip(test_env_names, test_envs): all_feats = [] all_rews = [] o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 coeff = o[-env.coeff_dim:] acs = [] pbar = tqdm(total=num_episodes) while n < num_episodes: #import pdb; pdb.set_trace() if render: env.render() time.sleep(1e-3) # import pdb; pdb.set_trace() a = get_action(o) acs.append(a) o, r, d, info = env.step(a) ep_ret += r ep_len += 1 if "all_feats" in info.keys(): all_feats.append(info["all_feats"]) if d or (ep_len == max_ep_len): if verbose: print(f"Coeff: {coeff}") print(f"All feats", np.array(all_feats).sum(axis=0)) # import pdb; pdb.set_trace() logger.store(**{f"{env_name}_EpRet": ep_ret}) logger.store(**{f"{env_name}_EpLen": ep_len}) # logger.store(EpRet=ep_ret, EpLen=ep_len) all_rews.append(ep_ret) if verbose: print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) print(f"{env_name}: reward {ep_ret:.03f}") if render: frames = forward_env(env, np.array(acs), batch=False, subrender=False, resize=0.4) fps = 10 fname = f"{env_name}_{n:02d}_rew_{ep_ret:.03f}.png" #os.makedirs(osp.dirname(fname), exist_ok=True) write_apng(os.path.join(args.folder, fname), frames, delay=1000 / fps) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 o = env.reset() all_feats = [] acs = [] n += 1 pbar.update(1) print(f"{env_name}: mean reward {np.mean(all_rews):.03f}") pbar.close() logger.log_tabular(f'{env_name}_EpRet', with_min_and_max=True) logger.log_tabular(f'{env_name}_EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, seed=None): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() success_num = 0 Handlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])} CMAESlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])} DRLlog = { 'maxVelocity': np.array([]), 'maxTorque': np.array([]), 'successNum': 0 } tmpMaxVelocity = np.array([]) tmpMaxTorque = np.array([]) env.__init__("GUI", seed=seed) o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 a = get_action(o) while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) for i in range(25): o, r, d, o_dict = env.step(a) time.sleep(1 / SIMULATIONFREQUENCY) tmpMaxTorque = np.append(tmpMaxTorque, np.abs(o_dict['torque']).max()) tmpMaxVelocity = np.append(tmpMaxVelocity, np.abs(o_dict['velocity']).max()) # time.sleep(1/SIMULATIONFREQUENCY) # if env.t >2: # input("hhh") ep_ret += r ep_len += 1 # d = False # if d or (ep_len == max_ep_len): if ep_len == max_ep_len: logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) satisfy = d o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 # satisfy = input("Is it satisfying? y or n:\n") if satisfy: print("done!") success_num += 1 if len(DRLlog['maxVelocity']) != 0: DRLlog['maxVelocity'] += tmpMaxVelocity DRLlog['maxVelocity'] /= success_num DRLlog['maxTorque'] += tmpMaxTorque DRLlog['maxTorque'] /= success_num else: DRLlog['maxVelocity'] = tmpMaxVelocity DRLlog['maxTorque'] = tmpMaxTorque # tmpMaxVelocity, tmpMaxTorque, success = run_Hand(env) # if len(Handlog['maxVelocity']) != 0: # Handlog['maxVelocity'] += tmpMaxVelocity # Handlog['maxVelocity'] /= success_num # Handlog['maxTorque'] += tmpMaxTorque # Handlog['maxTorque'] /= success_num # else: # Handlog['maxVelocity'] = tmpMaxVelocity # Handlog['maxTorque'] = tmpMaxTorque # tmpMaxVelocity, tmpMaxTorque = run_CMAES() # if len(Handlog['maxVelocity']) != 0: # CMAESlog['maxVelocity'] += tmpMaxVelocity # CMAESlog['maxVelocity'] /= success_num # CMAESlog['maxTorque'] += tmpMaxTorque # CMAESlog['maxTorque'] /= success_num # else: # CMAESlog['maxVelocity'] = tmpMaxVelocity # CMAESlog['maxTorque'] = tmpMaxTorque tmpMaxVelocity = np.array([]) tmpMaxTorque = np.array([]) n += 1 DRLlog['successNum'] = success_num logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular() return DRLlog, Handlog, CMAESlog
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, try_rollouts=0, steps_per_try_rollout=0): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." torch.manual_seed(3) np.random.seed(3) random.seed(3) logger = EpochLogger() o, r, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 rollout = [] while n < num_episodes: if try_rollouts != 0: if not rollout: rollout = do_rollouts(get_action, env, o, steps_per_try_rollout, try_rollouts, is_eval=True, take_worst_rollout=False) a, v, logp, _o, _r, _done, _info = rollout.pop(0) o, r, done, info = env.step(a) assert np.array_equal(o, _o) assert r == _r assert done == _done step_output = o, r, done, info else: a = get_action(o)[0] step_output = env.step(a) if render: env.render() # time.sleep(1e-3) if hasattr(env, 'last_step_output'): step_output = env.last_step_output o, r, done, info = step_output ep_ret += r ep_len += 1 if done or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, save_dir, max_ep_len=10000, num_episodes=10, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." dir_name = 'trajectory{}st_{}episode'.format(sample_step_per_trj, num_episodes) #!dirname dir_path = osp.join(save_dir, dir_name) os.makedirs(dir_path) #, exist_ok=True) #すでに存在する場合 if save_movie: env = gym.wrappers.Monitor(env, dir_path + '/movies', video_callable=(lambda n: n < 10)) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 observations = [] actions = [] results = [] while n < num_episodes: for t in range(max_ep_len): if render: env.render() time.sleep(1e-5) #1e-2 a = get_action(o) if t < sample_step_per_trj: observations.append(o) actions.append(a) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) results.append([n, ep_ret, ep_len]) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 break logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular() #save csv df_obs = pd.DataFrame(observations) df_act = pd.DataFrame(actions) df_results = pd.DataFrame( results, columns=['Episode', 'EpRet', 'Eplen'], ) #sample_r_mean= df_results['EpRet'].mean() df_obs.to_csv(osp.join(dir_path, "observations.csv"), sep=",", header=False, index=False) df_act.to_csv(osp.join(dir_path, "actions.csv"), sep=",", header=False, index=False) df_results.to_csv(osp.join(dir_path, "each_results.csv"), sep=",", index=False) df_results.describe().to_csv(osp.join(dir_path, "results_describe.csv"), sep=",")