Exemplo n.º 1
0
def run(args, policy, model_name, cost_fn='L2'):
    """Run an analytic policy, using similar setups as baselines-fork.

    If we have a random seed in the args, we use that instead of the config
    file. That way we can run several instances of the policy in parallel for
    faster data collection.

    model_name and cost_fn only have semantic meaning for vismpc
    """
    with open(args.cfg_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        if args.seed is not None:
            seed = args.seed
            cfg['seed'] = seed  # Actually I don't think it's needed but doesn't hurt?
        else:
            seed = cfg['seed']
        if seed == 1500 or seed == 1600:
            print('Ideally, avoid using these two seeds.')
            sys.exit()
        if args.policy != 'vismpc':
            model_name = 'NA'
            cost_fn = 'NA'
        stuff = '-seed-{}-{}-model-{}-cost-{}_epis_{}'.format(
            seed, cfg['init']['type'], model_name.replace('/', '_'), cost_fn,
            args.max_episodes)
        result_path = args.result_path.replace('.pkl', '{}.pkl'.format(stuff))
        #assert not cfg['env']['force_grab'], 'Do not need force_grab for analytic'
        print('\nOur result_path:\n\t{}'.format(result_path))
    np.random.seed(seed)

    # Should seed env this way, following gym conventions.  NOTE: we pass in
    # args.cfg_file here, but then it's immediately loaded by ClothEnv. When
    # env.reset() is called, it uses the ALREADY loaded parameters, and does
    # NOT re-query the file again for parameters (that'd be bad!).
    env = ClothEnv(args.cfg_file)
    env.seed(seed)
    env.render(filepath=args.render_path)
    if args.policy == 'vismpc':
        policy.set_env_cfg(env, cfg, model_name, cost_fn)
    else:
        policy.set_env_cfg(env, cfg)

    # Book-keeping.
    num_episodes = 0
    stats_all = []
    coverage = []
    variance_inv = []
    nb_steps = []

    for ep in range(args.max_episodes):
        obs = env.reset()
        # Go through one episode and put information in `stats_ep`.
        # Don't forget the first obs, since we need t _and_ t+1.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)
        done = False
        num_steps = 0

        while not done:
            action = policy.get_action(obs, t=num_steps)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1
        num_episodes += 1
        coverage.append(info['actual_coverage'])
        variance_inv.append(info['variance_inv'])
        nb_steps.append(num_steps)
        stats_all.append(stats_ep)
        print("\nInfo for most recent episode: {}".format(info))
        print("Finished {} episodes.".format(num_episodes))
        print('  {:.3f} +/- {:.3f} (coverage)'.format(np.mean(coverage),
                                                      np.std(coverage)))
        print('  {:.2f} +/- {:.1f} ((inv)variance)'.format(
            np.mean(variance_inv), np.std(variance_inv)))
        print('  {:.2f} +/- {:.2f} (steps per episode)'.format(
            np.mean(nb_steps), np.std(nb_steps)))

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == args.max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Exemplo n.º 2
0
def run(config_file,
        render_path,
        file_path,
        result_path,
        load_state,
        max_episodes,
        random_pol=False):
    """Run a policy. Use this as the main testbed before running baselines-fork.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        seed = cfg['seed']
        stuff = '-clip_a-{}-delta_a-{}-obs-{}'.format(
            cfg['env']['clip_act_space'], cfg['env']['delta_actions'],
            cfg['env']['obs_type'])
        result_path = result_path.replace('.pkl', '{}.pkl'.format(stuff))

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path, "state_init.pkl")
    load_state_path = join(NFS, 'state_init_med_49_coverage.pkl')
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    for ep in range(max_episodes):
        obs = env.reset()
        env.save_state(state_path)
        # Go through one episode and put information in `stats_ep`.
        # Put the first observation here to start.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='touch_cloth')
                action = env.get_random_action(atype='over_xy_plane')
            else:
                #action = analytic(env, t=num_steps, cfg=cfg)
                action = analytic_corners(env, t=num_steps, cfg=cfg)

            # Apply the action.
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Exemplo n.º 3
0
def test_init(config_file):
    """Test environment initialization, and then hard-coded actions.

    For rendering, use the environment's `env.render()` method.
    """
    load_state = True

    env = ClothEnv(config_file)
    print("\nObs space: {}".format(env.observation_space))
    print("Act space: {}".format(env.action_space))

    # Start the environment and render it.
    this_dir = os.path.dirname(os.path.realpath(__file__))
    path_to_renderer = os.path.join(this_dir, "../render/build")
    env.render(filepath=path_to_renderer)
    start = time.time()

    # Save or load the state -- but use a FULL path. Otherwise it annoyingly
    # saves in a different directory, must be a cython thing?
    state_path = os.path.join(this_dir, "test_saving_state_bed.pkl")
    if load_state:
        obs = env.load_state(cloth_file=state_path)
    else:
        obs = env.reset()
        env.save_state(state_path)

    print("reset took {}".format(time.time() - start))
    print("\nJust reset, obs is:\n{}\nshape: {}".format(obs, obs.shape))

    # Do your hard-coded actions here.

    p0 = env.cloth.pts[-1]
    action = ((p0.x, p0.y), 0.60, 2)
    obs, rew, done, info = env.step(action)
    print("demo_bed.py, done: {}".format(done))

    p1 = env.cloth.pts[-25]
    action = ((p1.x, p1.y), 0.50, 7)
    obs, rew, done, info = env.step(action)
    print("demo_bed.py, done: {}".format(done))

    p2 = env.cloth.pts[-25]
    action = ((p2.x, p2.y), 0.50, 3)
    obs, rew, done, info = env.step(action)
    print("demo_bed.py, done: {}".format(done))

    p3 = env.cloth.pts[-25]
    action = ((p3.x, p3.y), 0.30, 2)
    obs, rew, done, info = env.step(action)
    print("demo_bed.py, done: {}".format(done))

    p4 = env.cloth.pts[-(25 * 3) - 1]
    action = ((p4.x, p4.y), 0.30, 1)
    obs, rew, done, info = env.step(action)
    print("demo_bed.py, done: {}".format(done))

    # Kill the render process. Normally it's invoked when a state is done but
    # we can also hard-code it like this.
    print("\nstopping the render proc and renderer")
    env.render_proc.terminate()
    env.cloth.stop_render()
Exemplo n.º 4
0
def run(args, policy):
    """Run an analytic policy, using similar setups as baselines-fork.

    If we have a random seed in the args, we use that instead of the config
    file. That way we can run several instances of the policy in parallel for
    faster data collection.
    """
    with open(args.cfg_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        if args.seed is not None:
            seed = args.seed
            cfg['seed'] = seed  # Actually I don't think it's needed but doesn't hurt?
        else:
            seed = cfg['seed']
        if seed == 1500 or seed == 1600:
            print('Ideally, avoid using these two seeds.')
            sys.exit()
        assert cfg['env']['clip_act_space'] and cfg['env']['delta_actions']
        stuff = '-seed-{}-obs-{}-depth-{}-rgbd-{}-{}_epis_{}'.format(
            seed, cfg['env']['obs_type'], cfg['env']['use_depth'],
            cfg['env']['use_rgbd'], cfg['init']['type'], args.max_episodes)
        result_path = args.result_path.replace('.pkl', '{}.pkl'.format(stuff))
        assert not cfg['env'][
            'force_grab'], 'Do not need force_grab for analytic'
        print('\nOur result_path:\n\t{}'.format(result_path))
    np.random.seed(seed)

    # Should seed env this way, following gym conventions.  NOTE: we pass in
    # args.cfg_file here, but then it's immediately loaded by ClothEnv. When
    # env.reset() is called, it uses the ALREADY loaded parameters, and does
    # NOT re-query the file again for parameters (that'd be bad!).
    env = ClothEnv(args.cfg_file)
    env.seed(seed)
    env.render(filepath=args.render_path)
    policy.set_env_cfg(env, cfg)
    # policy = UpActor()
    # Book-keeping.
    num_episodes = 0
    stats_all = []
    coverage = []
    variance_inv = []
    nb_steps = []

    cloth_tier = args.tier

    cloth_obs = []

    for ep in range(args.max_episodes):
        obs, obs_1d = env.reset()
        if cloth_tier == 1:
            cloth_obs.extend(obs_1d)
        # cloth_npy = np.array(cloth_obs)
        # test_time_sequence(obs_1d)
        # plot_2d(obs_1d, 0, cloth_tier)
        # Go through one episode and put information in `stats_ep`.
        # Don't forget the first obs, since we need t _and_ t+1.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)

        done = False
        num_steps = 0

        while not done:
            action = policy.get_action(obs, t=num_steps)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            obs_1d = info['obs_1d']
            test_time_sequence(obs_1d)
            num_steps += 1
            cloth_obs.extend(obs_1d)
            plot_2d(obs_1d, num_steps, cloth_tier)
            if num_steps > 80:
                print("Finished")
        num_episodes += 1
        coverage.append(info['actual_coverage'])
        variance_inv.append(info['variance_inv'])
        nb_steps.append(num_steps)
        stats_all.append(stats_ep)
        print("\nInfo for most recent episode: {}".format(info))
        print("Finished {} episodes.".format(num_episodes))
        print('  {:.2f} +/- {:.1f} (coverage)'.format(np.mean(coverage),
                                                      np.std(coverage)))
        print('  {:.2f} +/- {:.1f} ((inv)variance)'.format(
            np.mean(variance_inv), np.std(variance_inv)))
        print('  {:.2f} +/- {:.1f} (steps per episode)'.format(
            np.mean(nb_steps), np.std(nb_steps)))

        # If we have finished one episode save the data and end
        filename = "cloth_action_data_tier_" + str(cloth_tier) + ".hkl"
        cloth_npy = np.array(cloth_obs)
        hkl.dump(cloth_npy, filename, mode='w')
        env.render_proc.terminate()
        # env.cloth.stop_render()
        return 0  # End

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == args.max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Exemplo n.º 5
0
def run(config_file, render_path, file_path, result_path, load_state, max_episodes):
    """Run a policy.

    Note that there are many possible interpretations of 'random' actions.
    It's faster if we have `load_state=True`, so if there isn't a state ready
    to load, then run one call to `env.reset()` to get one.

    Actually, we can also do an analytic one where we grip the highest point.
    That part is trivial, but determining the length and direction can be more
    complicated. We can just use hard-coded rules.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.load(fh)
        seed = cfg['seed']

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path,"state_init.pkl")
    load_state_path = join(NFS,'state_init_med_49_coverage.pkl')
    random_pol = True
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    # Fix a Pyrender scene, so that we don't keep re-creating.
    pyr_scene, pyr_rend = _create_scene_and_offscreen_render()

    for ep in range(max_episodes):
        # Do one one episode and put information in `stats_ep`. Save starting state.
        obs = env.reset()
        env.save_state(state_path)
        stats_ep = defaultdict(list)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='over_xy_plane')
                action = env.get_random_action(atype='touch_cloth')
            else:
                raise NotImplementedError()
            _save_trimesh(env, pyr_scene, pyr_rend)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

    assert len(stats_all) == max_episodes, len(stats_all)
    with open(result_path, 'wb') as fh:
        pickle.dump(stats_all, fh)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()