Пример #1
0
 def __init__(self, planes, patterns, policy_args, board_size=15, model_dir="./policy_rollout_models", model_file=None,
              device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False):
     self.board_size = board_size
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (policy_args.task_index, policy_args.gpu_id)
     else:
         ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
     self.tf_var = dict({"in": tf.placeholder(tf.float32, [None, board_size*board_size*planes])})
     with tf.device(worker_device):
         with tf.name_scope('tower_%d' % 0) as scope:
             self.tf_var["out"] = AI_net.create_rollout_network(self.tf_var["in"],
                                                                planes, board_size=self.board_size)
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         # num_batchs = patterns.num_batchs_per_epochs(policy_args.policy_rollout_batch_size)
         # decay_steps = policy_args.policy_rollout_epochs_per_decay * num_batchs
         # learn_rate = tf.train.exponential_decay(learn_rate, self.global_step, decay_steps,
         #                                         policy_args.policy_rollout_decay_rate, staircase=True)
         self.tf_var["lr"] = tf.Variable(learn_rate)
         self.loss_function(optimizer, learn_rate)