Exemplo n.º 1
0
 def init(self):
     import tensorflow as tf
     self.env_opts = environments.get_env_options(self.env_name, self.env_producer.get_use_gpu())
     self.env = self.env_producer.get_new_environment()
     self.s0 = self.env.reset()
     self.session = utils.create_session(self.env_opts, False)
     with tf.device("/cpu:0"):
         with tf.variable_scope("gather-%s" % self.idx):
             pol = get_policy(self.env_opts, self.session)
             self.agent = PPOAgent(pol, self.session, "gather-%s" % self.idx, self.env_opts)
             self.trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "gather-%s" % self.idx)
             self.accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in
                                self.trainable_vars]
             assign_ops = [self.trainable_vars[i].assign(self.accum_vars[i]) for i in
                           range(len(self.trainable_vars))]
             self.assign_op = tf.group(assign_ops)
         self.session.run(tf.global_variables_initializer())
         self.cur_hidden_state = self.agent.get_init_hidden_state()
         self.episode = [self.s0], [], [], [], [], [self.cur_hidden_state], []
Exemplo n.º 2
0
def start(env):
    env = gym.make(env)
    frames = []
    MASTER_NAME = "master-0"
    IMAGE_PATH = "images/%s.gif" % env.spec.id
    tf.reset_default_graph()

    with tf.Session() as session:
        with tf.variable_scope(MASTER_NAME) as scope:
            env_opts = environments.get_env_options(env, False)
            policy = get_policy(env_opts, session)
            master_agent = PPOAgent(policy, session, MASTER_NAME, env_opts)

        saver = tf.train.Saver(max_to_keep=1)
        saver = tf.train.import_meta_graph(
            tf.train.latest_checkpoint("models/%s/" % env.spec.id) + ".meta")
        saver.restore(session,
                      tf.train.latest_checkpoint("models/%s/" % env.spec.id))
        try:
            pass
        except:
            print("Failed to restore model, starting from scratch")
            session.run(tf.global_variables_initializer())

        global_step = 0
        while global_step < 1000:
            terminal = False
            s0 = env.reset()
            cum_rew = 0
            cur_hidden_state = master_agent.get_init_hidden_state()
            episode_count = 0
            while not terminal:
                episode_count += 1
                frames.append(env.render(mode='rgb_array'))
                action, h_out = master_agent.get_strict_sample(
                    s0, cur_hidden_state)
                cur_hidden_state = h_out
                s0, r, terminal, _ = env.step(action)
                cum_rew += r
                global_step += 1
            print(episode_count, cum_rew)
        imageio.mimsave(IMAGE_PATH, frames, duration=1.0 / 60.0)
Exemplo n.º 3
0
    def init_agent(self):
        import tensorflow as tf
        env_opts = environments.get_env_options(
            self.env_name, self.env_producer.get_use_gpu())
        self.session = utils.create_session(env_opts, True)
        with tf.variable_scope("worker-%s" % self.idx):
            pol = get_policy(env_opts, self.session)
            self.agent = PPOAgent(pol, self.session, "worker-%s" % self.idx,
                                  env_opts)
            self.trainable_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "worker-%s" % self.idx)
            self.accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in self.trainable_vars
            ]
            p_vars = self.agent.p_opt.variables()
            v_vars = self.agent.v_opt.variables()
            self.p_opt_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in p_vars
            ]
            self.v_opt_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in v_vars
            ]
            p_assign_ops = [
                p_vars[i].assign(self.p_opt_vars[i])
                for i in range(len(p_vars))
            ]
            v_assign_ops = [
                v_vars[i].assign(self.v_opt_vars[i])
                for i in range(len(v_vars))
            ]

            assign_ops = [
                self.trainable_vars[i].assign(self.accum_vars[i])
                for i in range(len(self.trainable_vars))
            ]
            self.assign_op = tf.group(assign_ops + p_assign_ops + v_assign_ops)

        self.session.run(tf.global_variables_initializer())
        self.run()
def start(env, gpu):
    env = gym.make(env)

    env_name = env.spec.id

    if not os.path.exists('logs'):
        os.mkdir('logs')

    if not os.path.exists('models'):
        os.mkdir('models')

    try:
        shutil.rmtree("logs/" + env_name)
    except:
        pass

    env_producer = environments.EnvironmentProducer(env.spec.id)
    env_opts = environments.get_env_options(env, gpu)
    worker_num = env_opts["worker_num"]
    gather_per_worker = env_opts["gather_per_worker"]
    master = SimpleMaster(worker_num, gather_per_worker, env_opts,
                          env_producer)
Exemplo n.º 5
0
def start(env):
    env = gym.make(env)

    MASTER_NAME = "master-0"

    tf.reset_default_graph()

    with tf.Session() as session:
        with tf.variable_scope(MASTER_NAME) as scope:
            env_opts = environments.get_env_options(env, False)
            policy = get_policy(env_opts, session)
            master_agent = PPOAgent(policy, session, MASTER_NAME, env_opts)

        saver = tf.train.Saver(max_to_keep=1)
        saver = tf.train.import_meta_graph(tf.train.latest_checkpoint("models/%s/" % env.spec.id) + ".meta")
        saver.restore(session, tf.train.latest_checkpoint("models/%s/" % env.spec.id))
        try:
            pass
        except:
            print("Failed to restore model, starting from scratch")
            session.run(tf.global_variables_initializer())


        while True:
            terminal = False
            s0 = env.reset()
            cum_rew = 0
            cur_hidden_state = master_agent.get_init_hidden_state()
            episode_count = 0
            while not terminal:
                episode_count += 1
                env.render()
                action, h_out = master_agent.get_strict_sample(s0, cur_hidden_state)
                cur_hidden_state = h_out
                s0, r, terminal, _ = env.step(action)
                cum_rew += r
            print(episode_count, cum_rew)
Exemplo n.º 6
0
    def start(self):
        import tensorflow as tf
        env_opts = environments.get_env_options(
            self.env_name, self.env_producer.get_use_gpu())
        self.summary_writer = tf.summary.FileWriter("logs/%s" % self.env_name)
        self.session = utils.create_session(env_opts, True)
        with tf.variable_scope("master-0"):
            pol = get_policy(env_opts, self.session)
            self.agent = PPOAgent(pol, self.session, "master-0", env_opts)
            self.trainable_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "master-0")
            self.accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in self.trainable_vars
            ]
            p_vars = self.agent.p_opt.variables()
            v_vars = self.agent.v_opt.variables()
            self.p_opt_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in p_vars
            ]
            self.v_opt_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in v_vars
            ]
            p_assign_ops = [
                p_vars[i].assign(self.p_opt_vars[i])
                for i in range(len(p_vars))
            ]
            v_assign_ops = [
                v_vars[i].assign(self.v_opt_vars[i])
                for i in range(len(v_vars))
            ]

            assign_ops = [
                self.trainable_vars[i].assign(self.accum_vars[i])
                for i in range(len(self.trainable_vars))
            ]
            self.assign_op = tf.group(assign_ops + p_assign_ops + v_assign_ops)

        self.restore_variables()
        self.saver = tf.train.Saver(max_to_keep=1)
        self.session.run(tf.global_variables_initializer())
        try:
            self.saver = tf.train.import_meta_graph(
                tf.train.latest_checkpoint("models/%s/" %
                                           env_opts["env_name"]) + ".meta")
            self.saver.restore(
                self.session,
                tf.train.latest_checkpoint("models/%s/" %
                                           env_opts["env_name"]))
        except:
            print("failed to restore model")

        while True:
            if self.iter_count % 10 == 0:
                print("Saving model...")
                self.save_variables()
                self.saver.save(self.session, self.model_path, self.iter_count)
                print("Model saved")
            self.broadcast_weights()
            self.merge_weights()
            self.iter_count += 1