def _run_model(env: gym.Env, controller_fun, test_data: pd.DataFrame, episode_length=200, embed_knowledge=False, perfect_knowledge=False, vision=False): raw_env = env.unwrapped assert isinstance(raw_env, GaussianPendulumEnv) initial_states = test_data.initial_state mass_distr_params = test_data.mass_distr_params sampled_masses = test_data.sampled_mass episodes = len(test_data) pd_index = list(test_data.index) # configure the environment raw_env.configure(mass_mean=np.mean(sampled_masses), mass_stdev=np.std(sampled_masses), embed_knowledge=embed_knowledge, perfect_knowledge=perfect_knowledge, gym_env=env) # Note that mass mean and stdev passed in configure() are not actually used by the environment itself, # as we will set our predefined values of masses. It's still useful to have some numbers in there # because they will be used to define the bounds of the observation space. # run controller_fun on environment rewards = np.zeros(episodes) for i in tqdm(range(episodes)): pdi = pd_index[i] # reset the environment to ensure that everything is clean env.reset() # impose initial state raw_env.state = initial_states[pdi] # impose sampled mass raw_env.sampled_mass = sampled_masses[pdi] # inform env about the original distribution used raw_env.mass_distr_params = mass_distr_params[pdi] observations = get_observations(raw_env) for t in range(episode_length): if vision: observations = env.render(mode='rgb_array') action = controller_fun(observations) observations, reward, _, _ = env.step(action) rewards[i] += reward return rewards
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = observation_space.low.size + env.action_space.low.size n_outputs = observation_space.low.size model = MlpModel(n_inputs, n_outputs, hidden_units=(64, 32), np_random=np_random, device=device) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl') data_path = Path(f'./out/{EXP_NAME}_data.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = MlpModel.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: df = pd.read_pickle('../data/push_sphere_v0_details.pkl') episode_length = df['step'].max() + 1 n_episodes = df['episode'].max() + 1 episodes = [] for i in range(n_episodes): ep_df = df[df['episode'] == i] actions = np.array(ep_df['raw_action'].tolist()) states = np.array(ep_df['raw_obs'].tolist()) episodes.append((states, actions[1:])) if False: dataset = EnvDataset(env) dataset.generate(n_episodes, episode_length) episodes = dataset.data def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) evaluate(MlpModel, path, 'FetchPushSphereDense-v1', strategy=push_strategy, strategy_period=episode_length) #evaluate(MlpModel, path, 'FetchReachDense-v1') print('Training...') losses = model.train(episodes, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') reward_fn = RewardFunction.simplified_push_reward(env) controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random, reward_function=reward_fn) for e in range(2000): env.reset() controller.forget_history() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())
def generate(config_name: str, overwrite=False, test_run=False): if test_run: print('******* THIS IS A TEST RUN! *******') config = CONFIGS[config_name] episodes = config.get('episodes', 200) episode_length = config.get('episode_length', 5) img_size = config.get('size', (256, 256)) rgb_options = config.get('rgb_options', None) env_setup = config.get('env_setup', None) env_reset = config.get('env_reset', None) env_step = config.get('env_step', None) action_strategy = config.get('action_strategy', None) action_strategy_eps = config.get('action_strategy_eps', 1.0) images_path = f'../data/{config_name}_imgs.npz' details_path = f'../data/{config_name}_details.pkl' if not overwrite and (Path(images_path).exists() or Path(details_path).exists()): print(f'Skipping {config_name}.') return env = gym.make(config['env']) raw_env = env.unwrapped render_kwargs = dict(mode='rgb_array') if rgb_options is not None: render_kwargs['rgb_options'] = rgb_options if callable(env_setup): env_setup(env) print(f'Generating {config_name}...') images = [] details = [] for e in tqdm(range(episodes)): env_obs = env.reset() action = 0.0 * raw_env.action_space.sample() episode_info = None if callable(env_reset): episode_info = env_reset(env) episode_info = episode_info or dict() rand_action = raw_env.action_space.sample() sample_action = np.random.uniform() < action_strategy_eps for s in range(episode_length): raw_obs = get_observations(env) step_info = None if callable(env_step): step_info = env_step(env, env_obs, action) step_info = step_info or dict() while True: img = env.render(**render_kwargs) img = resize(img, dsize=img_size, interpolation=INTER_AREA) if not is_image_corrupted(img): break print('Corrupted image!') img_i = len(images) images.append(img) details.append( dict(image_index=img_i, step=s, episode=e, raw_obs=raw_obs, raw_action=action, **episode_info, **step_info)) if sample_action: action = rand_action elif callable(action_strategy): action = action_strategy(env, raw_obs) else: raise ValueError('Action strategy must be callable!') env_obs, _, _, _ = env.step(action) if test_run: # print(pd.DataFrame([details[-1]])) for v, k in step_info.items(): print(v, k) if not test_run: images = np.array(images) np.savez_compressed(images_path, images) details = pd.DataFrame(details) pd.to_pickle(details, details_path) print('Done.')
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = observation_space.low.size + env.action_space.low.size n_outputs = observation_space.low.size model = MDN_Model(n_inputs, n_outputs, MDN_COMPONENTS, hidden_units=(20, ), np_random=np_random, device=device) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e10.pkl') data_path = Path(f'./out/{EXP_NAME}_data.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = MDN_Model.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: do_generate = False dataset = EnvDataset(env) if data_path.exists(): print('Loading data...') dataset.load(data_path) if dataset.episodes != N_EPISODES or dataset.episode_length != EPISODE_LENGTH: print( 'Existing data is not compatible with the desired parameters.' ) do_generate = True else: do_generate = True if do_generate: print('Generating data...') dataset.generate(N_EPISODES, EPISODE_LENGTH, strategy=STRATEGY) dataset.save(data_path) episodes = dataset.data def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) evaluate_mdn(path, ENV_ID, strategy=STRATEGY) if False: eps_with_changes = 0 for e in episodes: prev_pos = np.zeros(3) changed = -1 for s in e[0]: sphere_pos = s[3:6].copy() changed += int( not np.allclose(sphere_pos, prev_pos, atol=0.001)) prev_pos = sphere_pos eps_with_changes += int(changed > 0) print(eps_with_changes) print('Training...') losses = model.train(episodes, TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random) for e in range(2000): env.reset() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = 16 + env.action_space.low.size n_outputs = observation_space.low.size model = LSTM_Model(n_inputs, 32, n_outputs, n_layers=2, np_random=np_random, device=device, window_size=5) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = LSTM_Model.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: ########################################################## df = pd.read_pickle(DATA_PATH) n_episodes = df['episode'].max() + 1 # episode_length = df['step'].max() + 1 episodes = [] targets, all_z = None, None if Z_TO_OBS: all_z = np.load(Z_PATH)['arr_0'] targets = [] for i in range(n_episodes): ep_df = df[df['episode'] == i] actions = np.array(ep_df['raw_action'].tolist()) raw_obs = np.array(ep_df['raw_obs'].tolist()) if Z_TO_OBS: targets.append(raw_obs[1:]) z = all_z[i] episodes.append((z, actions[1:])) else: episodes.append((raw_obs, actions[1:])) if targets is not None: targets = np.array(targets) ########################################################## def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) # evaluate(LSTM_Model, path, ENV_ID) print('Training...') losses = model.train(episodes, targets=targets, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, scale_targets=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random) for e in range(2000): env.reset() controller.forget_history() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())