def __init__(self,
              make_env_f,
              *args,
              gpus=get_available_gpus() * 4,
              input_queue=None,
              done_queue=None,
              **kwargs):
     self.sess = None
     if not gpus:
         gpus = ['/cpu:0']
     with tf.Session() as sess:
         import gym_tensorflow
         ref_batch = gym_tensorflow.get_ref_batch(make_env_f, sess, 128)
         ref_batch = ref_batch[:, ...]
     if input_queue is None and done_queue is None:
         self.workers = [
             RLEvalutionWorker(make_env_f,
                               *args,
                               ref_batch=ref_batch,
                               **dict(kwargs, device=gpus[i]))
             for i in range(len(gpus))
         ]
         self.model = self.workers[0].model
         self.steps_counter = sum([w.steps_counter for w in self.workers])
         self.async_hub = AsyncTaskHub()
         self.hub = WorkerHub(self.workers, self.async_hub.input_queue,
                              self.async_hub)
     else:
         fake_worker = RLEvalutionWorker(*args,
                                         **dict(kwargs, device=gpus[0]))
         self.model = fake_worker.model
         self.workers = []
         self.hub = None
         self.steps_counter = tf.constant(0)
         self.async_hub = AsyncTaskHub(input_queue, done_queue)
 def __init__(self,
              make_env_fs,
              *args,
              gpus=get_available_gpus() * 4,
              **kwargs):
     tlogger.info("=== Calling MTConcurrentWorkers()")
     self.sess = None
     if not gpus:
         gpus = ['/cpu:0']
     print("GPUS: {}".format(gpus))
     with tf.Session() as sess:
         import gym_tensorflow
         self.workers = []
         for i in range(len(gpus)):
             # alternate between games for multi task learning
             if (i + 1) % 2 == 0:
                 game_index = 1  # second game
             else:
                 game_index = 0  # first game
             game_make_env = make_env_fs[game_index]
             ref_batch = gym_tensorflow.get_ref_batch(
                 game_make_env, sess, 128, game_max_action_space=4)
             ref_batch = ref_batch[:, ...]
             worker = RLEvalutionWorkerCappedActionSpace(
                 game_index,
                 game_make_env,
                 *args,
                 ref_batch=ref_batch,
                 **dict(kwargs, device=gpus[i]))
             self.workers.append(worker)
         self.model = self.workers[0].model
         self.steps_counter = sum([w.steps_counter for w in self.workers])
         self.async_hub = AsyncTaskHub()
         self.hub = WorkerHub(self.workers, self.async_hub.input_queue,
                              self.async_hub)
Exemplo n.º 3
0
def main(game,
         filename=None,
         outfile=None,
         model_name="LargeModel",
         no_video=False,
         add_text=False,
         num_runs=RUNS,
         graph=None):

    seeds = default_seeds
    outvid = None
    viewer = None
    iteration = None
    state = None

    if filename:
        with open(filename, 'rb+') as file:
            state = pickle.load(file)
            #if hasattr(state, 'best_score'):
            #    seeds = state.best_score.seeds
            #    iteration = len(seeds)
            #    print("Loading GA snapshot from best_score, iteration: ", len(seeds))
            if hasattr(state, 'elite'):
                seeds = state.elite.seeds
                iteration = state.it
                print("Loading GA snapshot from elite, iteration: {} / {}",
                      len(seeds), iteration)
            else:
                seeds = None
                iteration = state.it
                print("Loading ES snapshot, iteration: {}".format(state.it))

    if outfile:
        pass
        fourcc = cv.VideoWriter_fourcc(*'MJPG')
        outvid = cv.VideoWriter(outfile, fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))

    env = gym_tensorflow.make(game, 1)

    model = get_model(model_name)
    obs_op = env.observation()
    reset_op = env.reset()

    if model.requires_ref_batch:

        def make_env(b):
            return gym_tensorflow.make(game=game, batch_size=1)

        with tf.Session() as sess:
            ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128)
            ref_batch = ref_batch[:, ...]
    else:
        ref_batch = None

    action_op = model.make_net(tf.expand_dims(obs_op, axis=1),
                               env.action_space,
                               batch_size=1,
                               ref_batch=ref_batch)
    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    if not no_video:
        from gym.envs.classic_control import rendering
        viewer = rendering.SimpleImageViewer()

    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()

        def display_obs(im):
            # pdb.set_trace()
            if im.shape[1] > 1:
                im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...])
            else:
                im = im[0, 0, ...]
            handle_frame(im, outvid, viewer, game, iteration, add_text)
    else:

        def display_obs(im):
            pdb.set_trace()
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)
            handle_frame(im, outvid, viewer, game, iteration, add_text)

    rewards = []

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.initialize()
        tlogger.info(model.description)

        import pdb
        pdb.set_trace()
        if seeds:
            noise = SharedNoiseTable()
            weights = model.compute_weights_from_seeds(noise, seeds)
            model.load(sess, 0, weights, seeds)
        else:
            weights = state.theta
            model.load(sess, 0, weights, (weights, 0))

        if graph:
            saver.save(sess, graph)

        for i in range(num_runs):
            sess.run(reset_op)
            sess.run(obs_op)
            #recorder.capture_frame()
            display_obs(sess.run(obs_op))

            total_rew = 0
            num_frames = 0
            while True:
                rew, done = sess.run([rew_op, done_op])
                num_frames += 1
                total_rew += rew[0]
                display_obs(sess.run(obs_op))
                time.sleep(4 / 60)
                if done[0]:
                    break

            rewards += [total_rew]
            print('Final reward: ', total_rew, 'after', num_frames, 'steps')

    print(rewards)
    print("Mean: ", np.mean(rewards))
    print("Std: ", np.std(rewards))

    if outvid:
        outvid.release()
Exemplo n.º 4
0
def main(game, filename=None, out_dir=None, model_name='LargeModel',
         add_text=False, num_runs=RUNS, layer=None):

    seeds = default_seeds
    outvid = None
    viewer = None
    iteration = None
    state = None

    if filename:
        with open(filename, 'rb+') as file:
            state = pickle.load(file)
            #if hasattr(state, 'best_score'):
            #    seeds = state.best_score.seeds
            #    iteration = len(seeds)
            #    print("Loading GA snapshot from best_score, iteration: ", len(seeds))
            if hasattr(state, 'elite'):
                seeds = state.elite.seeds
                iteration = state.it
                print("Loading GA snapshot from elite, iteration: {} / {}".format(len(seeds), iteration))
            else:
                seeds = None
                iteration = state.it
                print("Loading ES snapshot, iteration: {}", state.it)

    fourcc = cv.VideoWriter_fourcc(*'H264')

    env = gym_tensorflow.make(game, 1)

    model = get_model(model_name)
    obs_op = env.observation()
    reset_op = env.reset()

    if model.requires_ref_batch:
        def make_env(b):
            return gym_tensorflow.make(game=game, batch_size=1)
        with tf.Session() as sess:
            ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128)
            ref_batch = ref_batch[:, ...]
    else:
        ref_batch = None

    input_op = tf.expand_dims(obs_op, axis=1)
    action_op = model.make_net(input_op, env.action_space, batch_size=1, ref_batch=ref_batch)
    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    out_vids = {'all': cv.VideoWriter(os.path.join(out_dir, 'all.mp4'),
                                      fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))}

    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()

        def display_obs(im, viz):
            # pdb.set_trace()
            if im.shape[1] > 1:
                im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...])
            else:
                im = im[0, 0, ...]
            for key in out_vids.keys():
                im = combine_viz(im, viz, key)
                handle_frame(im, out_vids[key], viewer, game, iteration, add_text)
    else:
        def display_obs(im, viz):
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)
            for key in out_vids.keys():
                im = combine_viz(im, viz, key)
                handle_frame(im, out_vids[key], viewer, game, iteration, add_text)

    rewards = []

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.initialize()
        tlogger.info(model.description)

        if seeds:
            noise = SharedNoiseTable()
            weights = model.compute_weights_from_seeds(noise, seeds)
            model.load(sess, 0, weights, seeds)
        else:
            weights = state.theta
            model.load(sess, 0, weights, (weights, 0))

        success, images = get_nn_images(sess, input_op, model)

        for key in images.keys():
            out_vids[key] = cv.VideoWriter(
                os.path.join(out_dir, '{}.mp4'.format(key.replace('/', '-'))),
                fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))

        for i in range(num_runs):
            sess.run(reset_op)
            # recorder.capture_frame()

            total_rew = 0
            num_frames = 0
            while True:
                img = sess.run(obs_op)
                success, images = get_nn_images(sess, input_op, model)

                rew, done = sess.run([rew_op, done_op])
                num_frames += 1
                total_rew += rew[0]
                display_obs(img, images)
                # time.sleep(4/60)
                if done[0] or num_frames == 50:
                    rewards += [total_rew]
                    print('Final reward: ', total_rew, 'after', num_frames, 'steps')
                    break

    print(rewards)
    print("Mean: ", np.mean(rewards))
    print("Std: ", np.std(rewards))

    for key in out_vids:
        out_vids[key].release()