示例#1
0
def main(_):
    config = get_config(FLAGS) or FLAGS
    config.cnn_format = 'NHWC'

    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        env = GymEnvironment(config)

        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):
            lr_op = tf.placeholder('float', None, name='learning_rate')
            optimizer = tf.train.RMSPropOptimizer(lr_op,
                                                  decay=0.99,
                                                  momentum=0,
                                                  epsilon=0.1)
            agent = Agent(config, env, optimizer, lr_op)

            agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0]

        print(agent.model_dir)

        # Create a "supervisor", which oversees the training process.
        is_chief = (FLAGS.task_index == 0)
        sv = tf.train.Supervisor(is_chief=is_chief,
                                 logdir="./logs/" + agent.model_dir,
                                 init_op=agent.init_op,
                                 summary_op=None,
                                 saver=agent.saver,
                                 global_step=agent.step_op,
                                 save_model_secs=600)

        if FLAGS.is_train:
            if is_chief:
                train_or_play = agent.train_with_summary
            else:
                train_or_play = agent.train
        else:
            train_or_play = agent.play

        with sv.managed_session(server.target) as sess:
            agent.sess = sess
            agent.update_target_q_network()

            train_or_play(sv, is_chief)

    # Ask for all the services to stop.
    sv.stop()
示例#2
0
def main(_):
  config = get_config(FLAGS) or FLAGS
  config.cnn_format = 'NHWC'

  ps_hosts = FLAGS.ps_hosts.split(",")
  worker_hosts = FLAGS.worker_hosts.split(",")

  cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
  server = tf.train.Server(cluster,
                           job_name=FLAGS.job_name,
                           task_index=FLAGS.task_index)

  if FLAGS.job_name == "ps":
    server.join()
  elif FLAGS.job_name == "worker":
    env = GymEnvironment(config)

    with tf.device(tf.train.replica_device_setter(
        worker_device="/job:worker/task:%d" % FLAGS.task_index,
        cluster=cluster)):
      lr_op = tf.placeholder('float', None, name='learning_rate')
      optimizer = tf.train.RMSPropOptimizer(
          lr_op, decay=0.99, momentum=0, epsilon=0.1)
      agent = Agent(config, env, optimizer, lr_op)

      agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0]

    print(agent.model_dir)

    # Create a "supervisor", which oversees the training process.
    is_chief = (FLAGS.task_index == 0)
    sv = tf.train.Supervisor(is_chief=is_chief,
                             logdir="./logs/" + agent.model_dir,
                             init_op=agent.init_op,
                             summary_op=None,
                             saver=agent.saver,
                             global_step=agent.step_op,
                             save_model_secs=600)

    if FLAGS.is_train:
      if is_chief:
        train_or_play = agent.train_with_summary
      else:
        train_or_play = agent.train
    else:
      train_or_play = agent.play

    with sv.managed_session(server.target) as sess:
      agent.sess = sess
      agent.update_target_q_network()

      train_or_play(sv, is_chief)

  # Ask for all the services to stop.
  sv.stop()
示例#3
0
def main(_):
    config = get_config(FLAGS) or FLAGS
    config.cnn_format = 'NHWC'

    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        env = GymEnvironment(config)

        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):
            lr_op = tf.placeholder('float', None, name='learning_rate')
            optimizer = tf.train.RMSPropOptimizer(lr_op,
                                                  decay=0.99,
                                                  momentum=0,
                                                  epsilon=0.1)
            agent = Agent(config, env, optimizer, lr_op)

            agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0]

        print(agent.model_dir)
        logdir = "./logs/" + agent.model_dir

        is_chief = (FLAGS.task_index == 0)
        if is_chief:
            agent.summary_writer = tf.summary.FileWriter(logdir)

        if FLAGS.is_train:
            if is_chief:
                train_or_play = agent.train_with_summary
            else:
                train_or_play = agent.train
        else:
            train_or_play = agent.play

        hooks = [
            tf.train.StopAtStepHook(last_step=config.max_step),
        ]
        scaffold = tf.train.Scaffold(saver=agent.saver)

        with tf.train.MonitoredTrainingSession(
                master=server.target,
                is_chief=is_chief,
                scaffold=scaffold,
                checkpoint_dir=logdir,
                save_summaries_steps=None,  # Disable default SummarySaverHook.
                hooks=hooks) as sess:
            agent.sess = sess
            agent.update_target_q_network()

            train_or_play(is_chief)