def main(_): config = get_config(FLAGS) or FLAGS config.cnn_format = 'NHWC' ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": env = GymEnvironment(config) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): lr_op = tf.placeholder('float', None, name='learning_rate') optimizer = tf.train.RMSPropOptimizer(lr_op, decay=0.99, momentum=0, epsilon=0.1) agent = Agent(config, env, optimizer, lr_op) agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0] print(agent.model_dir) # Create a "supervisor", which oversees the training process. is_chief = (FLAGS.task_index == 0) sv = tf.train.Supervisor(is_chief=is_chief, logdir="./logs/" + agent.model_dir, init_op=agent.init_op, summary_op=None, saver=agent.saver, global_step=agent.step_op, save_model_secs=600) if FLAGS.is_train: if is_chief: train_or_play = agent.train_with_summary else: train_or_play = agent.train else: train_or_play = agent.play with sv.managed_session(server.target) as sess: agent.sess = sess agent.update_target_q_network() train_or_play(sv, is_chief) # Ask for all the services to stop. sv.stop()
def main(_): config = get_config(FLAGS) or FLAGS config.cnn_format = 'NHWC' ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": env = GymEnvironment(config) with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): lr_op = tf.placeholder('float', None, name='learning_rate') optimizer = tf.train.RMSPropOptimizer( lr_op, decay=0.99, momentum=0, epsilon=0.1) agent = Agent(config, env, optimizer, lr_op) agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0] print(agent.model_dir) # Create a "supervisor", which oversees the training process. is_chief = (FLAGS.task_index == 0) sv = tf.train.Supervisor(is_chief=is_chief, logdir="./logs/" + agent.model_dir, init_op=agent.init_op, summary_op=None, saver=agent.saver, global_step=agent.step_op, save_model_secs=600) if FLAGS.is_train: if is_chief: train_or_play = agent.train_with_summary else: train_or_play = agent.train else: train_or_play = agent.play with sv.managed_session(server.target) as sess: agent.sess = sess agent.update_target_q_network() train_or_play(sv, is_chief) # Ask for all the services to stop. sv.stop()
def main(_): config = get_config(FLAGS) or FLAGS config.cnn_format = 'NHWC' ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": env = GymEnvironment(config) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): lr_op = tf.placeholder('float', None, name='learning_rate') optimizer = tf.train.RMSPropOptimizer(lr_op, decay=0.99, momentum=0, epsilon=0.1) agent = Agent(config, env, optimizer, lr_op) agent.ep_end = random.sample([0.1, 0.01, 0.5], 1)[0] print(agent.model_dir) logdir = "./logs/" + agent.model_dir is_chief = (FLAGS.task_index == 0) if is_chief: agent.summary_writer = tf.summary.FileWriter(logdir) if FLAGS.is_train: if is_chief: train_or_play = agent.train_with_summary else: train_or_play = agent.train else: train_or_play = agent.play hooks = [ tf.train.StopAtStepHook(last_step=config.max_step), ] scaffold = tf.train.Scaffold(saver=agent.saver) with tf.train.MonitoredTrainingSession( master=server.target, is_chief=is_chief, scaffold=scaffold, checkpoint_dir=logdir, save_summaries_steps=None, # Disable default SummarySaverHook. hooks=hooks) as sess: agent.sess = sess agent.update_target_q_network() train_or_play(is_chief)