Exemplo n.º 1
0
    def __init__(self, env, model, nsteps=5, gamma=0.99):
        super().__init__(env=env, model=model, nsteps=nsteps)
        self.gamma = gamma
        self.batch_action_shape = [
            x if x is not None else -1
            for x in model.train_model.action.shape.as_list()
        ]
        self.ob_dtype = model.train_model.X.dtype.as_numpy_dtype
        if USE_IMMITATION_ENV:
            print("a2c runner")
            self.featurizer = TDCFeaturizer(92,
                                            92,
                                            84,
                                            84,
                                            feature_vector_size=1024,
                                            learning_rate=0,
                                            experiment_name='PE')
            self.featurizer.load("zelda")
            video_dataset = generate_dataset('zelda',
                                             framerate=60,
                                             width=84,
                                             height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = None  #[0] * nenvs

            self.rewards = 0
            self.counter = 0
 def __init__(self, *args, **kwargs):
     print("MontezumaImmitationEnv")
     print(args)
     print(kwargs)
     featurizer = TDCFeaturizer(92,
                                92,
                                84,
                                84,
                                feature_vector_size=1024,
                                learning_rate=0,
                                experiment_name='default')
     featurizer.load()
     video_dataset = generate_dataset('default',
                                      framerate=30 / 15,
                                      width=84,
                                      height=84)[0]
     featurized_dataset = featurizer.featurize(video_dataset)
     self._env = ImmitationWrapper(
         gym.make('MontezumaRevengeNoFrameskip-v4'),
         featurizer=featurizer,
         featurized_dataset=featurized_dataset)
     self.observation_space = self._env.unwrapped.observation_space
     self.action_space = self._env.unwrapped.action_space
     self.np_random = self._env.unwrapped.np_random
     self.ale = self._env.unwrapped.ale
Exemplo n.º 3
0
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.nenvs = nenvs
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        self.viewer = None
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        if USE_IMMITATION_ENV:
            self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='default')
            self.featurizer.load()
            video_dataset = generate_dataset('default', framerate=30/15, width=84, height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = [0] * nenvs

            self.rewards = 0
            self.counter = 0
Exemplo n.º 4
0
def visualize_features(videos_path, featurizer_type, initial_width,
                       initial_height, desired_width, desired_height, costar):
    if featurizer_type == 'vae':
        featurizer_class = VAEFeaturizer
    elif featurizer_type == 'tdc':
        featurizer_class = TDCFeaturizer
    elif featurizer_type == 'forward_model':
        featurizer_class = ForwardModelFeaturizer
    else:
        raise TypeError

    featurizer = featurizer_class(initial_width,
                                  initial_height,
                                  desired_width,
                                  desired_height,
                                  costar,
                                  feature_vector_size=1024,
                                  learning_rate=0.0001,
                                  experiment_name='default')

    d = generate_dataset(videos_path, 10, initial_width, initial_height,
                         costar)
    features1 = featurizer.featurize(d[0])
    features2 = featurizer.featurize(d[1])
    #features3 = featurizer.featurize(d[2])

    features_all = [features1, features2]  #, features3]
    visualize_embeddings(features_all)
Exemplo n.º 5
0
    def __init__(self,
                 env_fns,
                 spaces=None,
                 level_selector=None,
                 experiment=None,
                 dataset=None):
        """
        Arguments:
        env_fns: iterable of callables      functions that build environments
        """
        self.envs = [fn() for fn in env_fns]
        self.num_envs = len(env_fns)
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)

        obs_space = env.observation_space
        self.keys, shapes, dtypes = obs_space_info(obs_space)

        self.buf_obs = {
            k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
            for k in self.keys
        }
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec

        self.finsihed = [False for _ in range(self.num_envs)]
        self.last_mes = [None for _ in range(self.num_envs)]
        self.level_selector = level_selector
        if USE_IMMITATION_ENV:
            self.featurizer = TDCFeaturizer(92,
                                            92,
                                            84,
                                            84,
                                            feature_vector_size=1024,
                                            learning_rate=0,
                                            experiment_name=experiment)
            self.featurizer.load(dataset)
            video_dataset = generate_dataset(experiment,
                                             framerate=60,
                                             width=84,
                                             height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = [0] * nenvs

            self.rewards = 0
            self.counter = 0
 def __init__(self, *args, **kwargs):
     print("ZeldaImmitationEnv")
     featurizer = TDCFeaturizer(84,
                                84,
                                84,
                                84,
                                feature_vector_size=1024,
                                learning_rate=0,
                                experiment_name='PE')
     featurizer.load("zelda")
     video_dataset = generate_dataset('PE',
                                      framerate=60,
                                      width=84,
                                      height=84)[0]
     featurized_dataset = featurizer.featurize(video_dataset)
     self._env = ImmitationWrapper(gym.make('gvgai-zelda-lvl0-v0'),
                                   featurizer=featurizer,
                                   featurized_dataset=featurized_dataset)
     # self._env = gym.make('gvgai-zelda-lvl0-v0')
     self.observation_space = self._env.unwrapped.observation_space
     self.action_space = self._env.unwrapped.action_space
Exemplo n.º 7
0
    def __init__(self, *, env, model, nsteps, gamma, lam):
        super().__init__(env=env, model=model, nsteps=nsteps)
        self.lam = lam
        self.gamma = gamma

        if USE_IMMITATION_ENV:
            self.featurizer = TDCFeaturizer(92,
                                            92,
                                            84,
                                            84,
                                            feature_vector_size=1024,
                                            learning_rate=0,
                                            experiment_name='default',
                                            is_variational=False)
            self.featurizer.load()
            video_dataset = generate_dataset('default',
                                             framerate=30 / 15,
                                             width=84,
                                             height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = None  #[0] * nenvs

            self.rewards = 0
            self.counter = 0
Exemplo n.º 8
0
from train_featurizer import generate_dataset
from embedding_visualization import visualize_embeddings
from TDCFeaturizer import TDCFeaturizer

featurizer = TDCFeaturizer(92, 92, 84, 84)
featurizer.load('montezuma')

d = generate_dataset('montezuma.txt', .25, 84, 84)
features1 = featurizer.featurize(d[0])
features2 = featurizer.featurize(d[1])
features3 = featurizer.featurize(d[2])

features_all = [features1, features2, features3]
visualize_embeddings(features_all, experiment_name='montezuma')