def _run_model(env: gym.Env,
               controller_fun,
               test_data: pd.DataFrame,
               episode_length=200,
               embed_knowledge=False,
               perfect_knowledge=False,
               vision=False):

    raw_env = env.unwrapped
    assert isinstance(raw_env, GaussianPendulumEnv)

    initial_states = test_data.initial_state
    mass_distr_params = test_data.mass_distr_params
    sampled_masses = test_data.sampled_mass
    episodes = len(test_data)
    pd_index = list(test_data.index)

    # configure the environment
    raw_env.configure(mass_mean=np.mean(sampled_masses),
                      mass_stdev=np.std(sampled_masses),
                      embed_knowledge=embed_knowledge,
                      perfect_knowledge=perfect_knowledge,
                      gym_env=env)

    # Note that mass mean and stdev passed in configure() are not actually used by the environment itself,
    # as we will set our predefined values of masses. It's still useful to have some numbers in there
    # because they will be used to define the bounds of the observation space.

    # run controller_fun on environment
    rewards = np.zeros(episodes)
    for i in tqdm(range(episodes)):
        pdi = pd_index[i]

        # reset the environment to ensure that everything is clean
        env.reset()

        # impose initial state
        raw_env.state = initial_states[pdi]

        # impose sampled mass
        raw_env.sampled_mass = sampled_masses[pdi]

        # inform env about the original distribution used
        raw_env.mass_distr_params = mass_distr_params[pdi]

        observations = get_observations(raw_env)
        for t in range(episode_length):
            if vision:
                observations = env.render(mode='rgb_array')
            action = controller_fun(observations)
            observations, reward, _, _ = env.step(action)
            rewards[i] += reward

    return rewards
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = observation_space.low.size + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = MlpModel(n_inputs,
                     n_outputs,
                     hidden_units=(64, 32),
                     np_random=np_random,
                     device=device)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl')
    data_path = Path(f'./out/{EXP_NAME}_data.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = MlpModel.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        df = pd.read_pickle('../data/push_sphere_v0_details.pkl')
        episode_length = df['step'].max() + 1
        n_episodes = df['episode'].max() + 1

        episodes = []
        for i in range(n_episodes):
            ep_df = df[df['episode'] == i]
            actions = np.array(ep_df['raw_action'].tolist())
            states = np.array(ep_df['raw_obs'].tolist())
            episodes.append((states, actions[1:]))

        if False:
            dataset = EnvDataset(env)
            dataset.generate(n_episodes, episode_length)
            episodes = dataset.data

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                evaluate(MlpModel,
                         path,
                         'FetchPushSphereDense-v1',
                         strategy=push_strategy,
                         strategy_period=episode_length)
                #evaluate(MlpModel, path, 'FetchReachDense-v1')

        print('Training...')
        losses = model.train(episodes,
                             epochs=TRAINING_EPOCHS,
                             batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback,
                             scale_data=True,
                             shuffle_data=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')

        reward_fn = RewardFunction.simplified_push_reward(env)
        controller = MPC(env,
                         model,
                         MPC_HORIZON,
                         MPC_SEQUENCES,
                         np_random,
                         reward_function=reward_fn)

        for e in range(2000):
            env.reset()
            controller.forget_history()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())
Пример #3
0
def generate(config_name: str, overwrite=False, test_run=False):

    if test_run:
        print('******* THIS IS A TEST RUN! *******')

    config = CONFIGS[config_name]
    episodes = config.get('episodes', 200)
    episode_length = config.get('episode_length', 5)
    img_size = config.get('size', (256, 256))
    rgb_options = config.get('rgb_options', None)
    env_setup = config.get('env_setup', None)
    env_reset = config.get('env_reset', None)
    env_step = config.get('env_step', None)
    action_strategy = config.get('action_strategy', None)
    action_strategy_eps = config.get('action_strategy_eps', 1.0)

    images_path = f'../data/{config_name}_imgs.npz'
    details_path = f'../data/{config_name}_details.pkl'
    if not overwrite and (Path(images_path).exists()
                          or Path(details_path).exists()):
        print(f'Skipping {config_name}.')
        return

    env = gym.make(config['env'])
    raw_env = env.unwrapped

    render_kwargs = dict(mode='rgb_array')
    if rgb_options is not None:
        render_kwargs['rgb_options'] = rgb_options

    if callable(env_setup):
        env_setup(env)

    print(f'Generating {config_name}...')
    images = []
    details = []

    for e in tqdm(range(episodes)):
        env_obs = env.reset()

        action = 0.0 * raw_env.action_space.sample()
        episode_info = None
        if callable(env_reset):
            episode_info = env_reset(env)
        episode_info = episode_info or dict()

        rand_action = raw_env.action_space.sample()
        sample_action = np.random.uniform() < action_strategy_eps

        for s in range(episode_length):

            raw_obs = get_observations(env)

            step_info = None
            if callable(env_step):
                step_info = env_step(env, env_obs, action)
            step_info = step_info or dict()

            while True:
                img = env.render(**render_kwargs)
                img = resize(img, dsize=img_size, interpolation=INTER_AREA)
                if not is_image_corrupted(img):
                    break
                print('Corrupted image!')

            img_i = len(images)
            images.append(img)

            details.append(
                dict(image_index=img_i,
                     step=s,
                     episode=e,
                     raw_obs=raw_obs,
                     raw_action=action,
                     **episode_info,
                     **step_info))

            if sample_action:
                action = rand_action
            elif callable(action_strategy):
                action = action_strategy(env, raw_obs)
            else:
                raise ValueError('Action strategy must be callable!')

            env_obs, _, _, _ = env.step(action)

            if test_run:
                # print(pd.DataFrame([details[-1]]))
                for v, k in step_info.items():
                    print(v, k)

    if not test_run:
        images = np.array(images)
        np.savez_compressed(images_path, images)

        details = pd.DataFrame(details)
        pd.to_pickle(details, details_path)

    print('Done.')
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = observation_space.low.size + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = MDN_Model(n_inputs,
                      n_outputs,
                      MDN_COMPONENTS,
                      hidden_units=(20, ),
                      np_random=np_random,
                      device=device)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e10.pkl')
    data_path = Path(f'./out/{EXP_NAME}_data.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = MDN_Model.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        do_generate = False
        dataset = EnvDataset(env)
        if data_path.exists():
            print('Loading data...')
            dataset.load(data_path)
            if dataset.episodes != N_EPISODES or dataset.episode_length != EPISODE_LENGTH:
                print(
                    'Existing data is not compatible with the desired parameters.'
                )
                do_generate = True
        else:
            do_generate = True

        if do_generate:
            print('Generating data...')
            dataset.generate(N_EPISODES, EPISODE_LENGTH, strategy=STRATEGY)
            dataset.save(data_path)
        episodes = dataset.data

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                evaluate_mdn(path, ENV_ID, strategy=STRATEGY)

        if False:
            eps_with_changes = 0
            for e in episodes:
                prev_pos = np.zeros(3)
                changed = -1
                for s in e[0]:
                    sphere_pos = s[3:6].copy()
                    changed += int(
                        not np.allclose(sphere_pos, prev_pos, atol=0.001))
                    prev_pos = sphere_pos
                eps_with_changes += int(changed > 0)
            print(eps_with_changes)

        print('Training...')
        losses = model.train(episodes,
                             TRAINING_EPOCHS,
                             batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback,
                             scale_data=True,
                             shuffle_data=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')
        controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random)

        for e in range(2000):
            env.reset()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = 16 + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = LSTM_Model(n_inputs, 32, n_outputs, n_layers=2, np_random=np_random, device=device, window_size=5)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = LSTM_Model.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        ##########################################################

        df = pd.read_pickle(DATA_PATH)
        n_episodes = df['episode'].max() + 1
        # episode_length = df['step'].max() + 1

        episodes = []
        targets, all_z = None, None

        if Z_TO_OBS:
            all_z = np.load(Z_PATH)['arr_0']
            targets = []

        for i in range(n_episodes):
            ep_df = df[df['episode'] == i]
            actions = np.array(ep_df['raw_action'].tolist())
            raw_obs = np.array(ep_df['raw_obs'].tolist())

            if Z_TO_OBS:
                targets.append(raw_obs[1:])
                z = all_z[i]
                episodes.append((z, actions[1:]))
            else:
                episodes.append((raw_obs, actions[1:]))

        if targets is not None:
            targets = np.array(targets)

        ##########################################################

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                # evaluate(LSTM_Model, path, ENV_ID)

        print('Training...')
        losses = model.train(episodes, targets=targets, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback, scale_data=True, scale_targets=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')

        controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random)

        for e in range(2000):
            env.reset()
            controller.forget_history()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())