def load_history_policy_model(self, model_file): AI_net.create_policy_network(3, "/cpu:0", "/cpu:0") history_policy = PolicyRLNetwork(self.planes, None, filters=self.filters, board_size=self.board_size, model_dir=self.model_dir, device="cpu", distributed_train=False) history_policy.session = tf.Session(config=tf.ConfigProto( log_device_placement=False, allow_soft_placement=True)) history_policy.session.run(tf.initialize_all_variables()) history_policy.restore_model(model_file=model_file) return history_policy
def __init__(self, planes, args, phase=1, filters=192, board_size=15, model_dir="./value_net_models", model_file=None, device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size self.phase = phase self.planes = planes # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index, args.gpu_id) else: ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu self.tf_var = dict() self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network( planes, ps_device, worker_device, filters=filters, board_size=self.board_size, name_prefix="value_net") # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): self.loss_function(optimizer, learn_rate, args.values_net_batch_size)
def __init__(self, planes, corpus, policy_args, filters=192, board_size=15, model_dir="./policy_dl_models", device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % (policy_args.task_index, policy_args.gpu_id) else: # ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu ps_device = worker_device self.tf_var = dict({"in": tf.placeholder(tf.float32, [None, board_size, board_size, planes])}) with tf.device(worker_device): with tf.name_scope('tower_%d' % 0) as scope: self.tf_var["out"] = AI_net.create_policy_network(self.tf_var["in"], planes, filters=filters, board_size=self.board_size, layers=5) # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): # num_batchs = corpus.num_batchs_per_epochs(policy_args.policy_dl_batch_size) # decay_steps = policy_args.policy_dl_epochs_per_decay * num_batchs # learn_rate = tf.train.exponential_decay(learn_rate, self.global_step, decay_steps, # policy_args.policy_dl_decay_rate, staircase=True) self.tf_var["lr"] = tf.Variable(learn_rate) self.loss_function(optimizer, learn_rate)
def __init__(self, planes, corpus, policy_args, filters=192, board_size=15, model_dir="./policy_dl_models", device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % ( policy_args.task_index, policy_args.gpu_id) else: # ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu ps_device = worker_device self.tf_var = dict({ "in": tf.placeholder(tf.float32, [None, board_size, board_size, planes]) }) with tf.device(worker_device): with tf.name_scope('tower_%d' % 0) as scope: self.tf_var["out"] = AI_net.create_policy_network( self.tf_var["in"], planes, filters=filters, board_size=self.board_size, layers=5) # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int( self.param_unserierlize( init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): # num_batchs = corpus.num_batchs_per_epochs(policy_args.policy_dl_batch_size) # decay_steps = policy_args.policy_dl_epochs_per_decay * num_batchs # learn_rate = tf.train.exponential_decay(learn_rate, self.global_step, decay_steps, # policy_args.policy_dl_decay_rate, staircase=True) self.tf_var["lr"] = tf.Variable(learn_rate) self.loss_function(optimizer, learn_rate)
def __init__(self, planes, policy_args, phase=1, filters=192, board_size=15, model_dir="./policy_rl_models", device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size self.phase = phase self.planes = planes self.filters = filters # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % ( policy_args.task_index, policy_args.gpu_id) else: ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu self.tf_var = dict({ "in": tf.placeholder(tf.float32, [None, board_size, board_size, planes]) }) with tf.device(worker_device): with tf.name_scope('tower_%d' % 0) as scope: self.tf_var["out"] = AI_net.create_policy_network( self.tf_var["in"], planes, filters=filters, board_size=self.board_size, layers=5) # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int( self.param_unserierlize( init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): self.loss_function(optimizer, learn_rate)
def __init__(self, planes, args, phase=1, filters=192, board_size=15, model_dir="./value_net_models", model_file=None, device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size self.phase = phase self.planes = planes # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index, args.gpu_id) else: ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu self.tf_var = dict() self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network( planes, ps_device, worker_device, filters=filters, board_size=self.board_size, name_prefix="value_net") # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int( self.param_unserierlize( init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): self.loss_function(optimizer, learn_rate, args.values_net_batch_size)