Пример #1
0
 def load_history_policy_model(self, model_file):
     AI_net.create_policy_network(3, "/cpu:0", "/cpu:0")
     history_policy = PolicyRLNetwork(self.planes,
                                      None,
                                      filters=self.filters,
                                      board_size=self.board_size,
                                      model_dir=self.model_dir,
                                      device="cpu",
                                      distributed_train=False)
     history_policy.session = tf.Session(config=tf.ConfigProto(
         log_device_placement=False, allow_soft_placement=True))
     history_policy.session.run(tf.initialize_all_variables())
     history_policy.restore_model(model_file=model_file)
     return history_policy
Пример #2
0
 def __init__(self, planes, args, phase=1, filters=192, board_size=15, model_dir="./value_net_models",
              model_file=None,
              device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False):
     self.board_size = board_size
     self.phase = phase
     self.planes = planes
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index, args.gpu_id)
     else:
         ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
     self.tf_var = dict()
     self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network(
         planes, ps_device, worker_device, filters=filters, board_size=self.board_size, name_prefix="value_net")
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         self.loss_function(optimizer, learn_rate, args.values_net_batch_size)
Пример #3
0
 def __init__(self, planes, corpus, policy_args, filters=192, board_size=15, model_dir="./policy_dl_models",
              device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False):
     self.board_size = board_size
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (policy_args.task_index, policy_args.gpu_id)
     else:
         # ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
         ps_device = worker_device
     self.tf_var = dict({"in": tf.placeholder(tf.float32, [None, board_size, board_size, planes])})
     with tf.device(worker_device):
         with tf.name_scope('tower_%d' % 0) as scope:
             self.tf_var["out"] = AI_net.create_policy_network(self.tf_var["in"],
                                                               planes, filters=filters, board_size=self.board_size,
                                                               layers=5)
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         # num_batchs = corpus.num_batchs_per_epochs(policy_args.policy_dl_batch_size)
         # decay_steps = policy_args.policy_dl_epochs_per_decay * num_batchs
         # learn_rate = tf.train.exponential_decay(learn_rate, self.global_step, decay_steps,
         #                                         policy_args.policy_dl_decay_rate, staircase=True)
         self.tf_var["lr"] = tf.Variable(learn_rate)
         self.loss_function(optimizer, learn_rate)
Пример #4
0
 def __init__(self,
              planes,
              corpus,
              policy_args,
              filters=192,
              board_size=15,
              model_dir="./policy_dl_models",
              device="gpu",
              gpu=1,
              optimizer="sgd",
              learn_rate=1e-6,
              distributed_train=False):
     self.board_size = board_size
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (
             policy_args.task_index, policy_args.gpu_id)
     else:
         # ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
         ps_device = worker_device
     self.tf_var = dict({
         "in":
         tf.placeholder(tf.float32, [None, board_size, board_size, planes])
     })
     with tf.device(worker_device):
         with tf.name_scope('tower_%d' % 0) as scope:
             self.tf_var["out"] = AI_net.create_policy_network(
                 self.tf_var["in"],
                 planes,
                 filters=filters,
                 board_size=self.board_size,
                 layers=5)
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(
         self.param_unserierlize(
             init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         # num_batchs = corpus.num_batchs_per_epochs(policy_args.policy_dl_batch_size)
         # decay_steps = policy_args.policy_dl_epochs_per_decay * num_batchs
         # learn_rate = tf.train.exponential_decay(learn_rate, self.global_step, decay_steps,
         #                                         policy_args.policy_dl_decay_rate, staircase=True)
         self.tf_var["lr"] = tf.Variable(learn_rate)
         self.loss_function(optimizer, learn_rate)
Пример #5
0
 def __init__(self,
              planes,
              policy_args,
              phase=1,
              filters=192,
              board_size=15,
              model_dir="./policy_rl_models",
              device="gpu",
              gpu=1,
              optimizer="sgd",
              learn_rate=1e-6,
              distributed_train=False):
     self.board_size = board_size
     self.phase = phase
     self.planes = planes
     self.filters = filters
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (
             policy_args.task_index, policy_args.gpu_id)
     else:
         ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
     self.tf_var = dict({
         "in":
         tf.placeholder(tf.float32, [None, board_size, board_size, planes])
     })
     with tf.device(worker_device):
         with tf.name_scope('tower_%d' % 0) as scope:
             self.tf_var["out"] = AI_net.create_policy_network(
                 self.tf_var["in"],
                 planes,
                 filters=filters,
                 board_size=self.board_size,
                 layers=5)
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(
         self.param_unserierlize(
             init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         self.loss_function(optimizer, learn_rate)
Пример #6
0
 def __init__(self,
              planes,
              args,
              phase=1,
              filters=192,
              board_size=15,
              model_dir="./value_net_models",
              model_file=None,
              device="gpu",
              gpu=1,
              optimizer="sgd",
              learn_rate=1e-6,
              distributed_train=False):
     self.board_size = board_size
     self.phase = phase
     self.planes = planes
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index,
                                                         args.gpu_id)
     else:
         ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
     self.tf_var = dict()
     self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network(
         planes,
         ps_device,
         worker_device,
         filters=filters,
         board_size=self.board_size,
         name_prefix="value_net")
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(
         self.param_unserierlize(
             init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         self.loss_function(optimizer, learn_rate,
                            args.values_net_batch_size)