Exemplo n.º 1
0
def run(config_file,
        render_path,
        file_path,
        result_path,
        load_state,
        max_episodes,
        random_pol=False):
    """Run a policy. Use this as the main testbed before running baselines-fork.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        seed = cfg['seed']
        stuff = '-clip_a-{}-delta_a-{}-obs-{}'.format(
            cfg['env']['clip_act_space'], cfg['env']['delta_actions'],
            cfg['env']['obs_type'])
        result_path = result_path.replace('.pkl', '{}.pkl'.format(stuff))

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path, "state_init.pkl")
    load_state_path = join(NFS, 'state_init_med_49_coverage.pkl')
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    for ep in range(max_episodes):
        obs = env.reset()
        env.save_state(state_path)
        # Go through one episode and put information in `stats_ep`.
        # Put the first observation here to start.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='touch_cloth')
                action = env.get_random_action(atype='over_xy_plane')
            else:
                #action = analytic(env, t=num_steps, cfg=cfg)
                action = analytic_corners(env, t=num_steps, cfg=cfg)

            # Apply the action.
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Exemplo n.º 2
0
def run(config_file, render_path, file_path, result_path, load_state, max_episodes):
    """Run a policy.

    Note that there are many possible interpretations of 'random' actions.
    It's faster if we have `load_state=True`, so if there isn't a state ready
    to load, then run one call to `env.reset()` to get one.

    Actually, we can also do an analytic one where we grip the highest point.
    That part is trivial, but determining the length and direction can be more
    complicated. We can just use hard-coded rules.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.load(fh)
        seed = cfg['seed']

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path,"state_init.pkl")
    load_state_path = join(NFS,'state_init_med_49_coverage.pkl')
    random_pol = True
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    # Fix a Pyrender scene, so that we don't keep re-creating.
    pyr_scene, pyr_rend = _create_scene_and_offscreen_render()

    for ep in range(max_episodes):
        # Do one one episode and put information in `stats_ep`. Save starting state.
        obs = env.reset()
        env.save_state(state_path)
        stats_ep = defaultdict(list)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='over_xy_plane')
                action = env.get_random_action(atype='touch_cloth')
            else:
                raise NotImplementedError()
            _save_trimesh(env, pyr_scene, pyr_rend)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

    assert len(stats_all) == max_episodes, len(stats_all)
    with open(result_path, 'wb') as fh:
        pickle.dump(stats_all, fh)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()