def get_data(N=10000, test_p=0.9, use_cached=True, render=False): filename = "/tmp/point2d_" + str(N) + ".npy" if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: # if not cached now = time.time() e = MultitaskImagePoint2DEnv(render_size=84, render_onscreen=False, ball_radius=1) dataset = np.zeros((N, 84 * 84)) for i in range(N): if i % 100 == 0: e.reset() u = np.random.rand(2) * 2 - 1 img, _, _, _ = e.step(u) dataset[i, :] = img if render: cv2.imshow('img', img.reshape(1, 84, 84).transpose()) cv2.waitKey(1) # dataset[i, :] = e.reset() print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset
def experiment(variant): rdim = variant["rdim"] vae_paths = { 2: "/home/ashvin/data/s3doodad/ashvin/vae/point2d-conv-sweep2/run0/id1/params.pkl", 4: "/home/ashvin/data/s3doodad/ashvin/vae/point2d-conv-sweep2/run0/id4/params.pkl" } vae_path = vae_paths[rdim] vae = joblib.load(vae_path) print("loaded", vae_path) if variant['multitask']: env = MultitaskImagePoint2DEnv(**variant['env_kwargs']) env = VAEWrappedEnv(env, vae, use_vae_obs=True, use_vae_reward=False, use_vae_goals=False) env = MultitaskToFlatEnv(env) # else: # env = Pusher2DEnv(**variant['env_kwargs']) if variant['normalize']: env = NormalizedBoxEnv(env) exploration_type = variant['exploration_type'] if exploration_type == 'ou': es = OUStrategy(action_space=env.action_space) elif exploration_type == 'gaussian': es = GaussianStrategy( action_space=env.action_space, max_sigma=0.1, min_sigma=0.1, # Constant sigma ) elif exploration_type == 'epsilon': es = EpsilonGreedy( action_space=env.action_space, prob_random_action=0.1, ) else: raise Exception("Invalid type: " + exploration_type) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, hidden_sizes=[400, 300], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) algorithm = TD3(env, training_env=env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, **variant['algo_kwargs']) print("use_gpu", variant["use_gpu"], bool(variant["use_gpu"])) if variant["use_gpu"]: gpu_id = variant["gpu_id"] ptu.set_gpu_mode(True) ptu.set_device(gpu_id) algorithm.to(ptu.device) env._wrapped_env.vae.to(ptu.device) algorithm.train()