def do_train(self): if self.push_done < self.arg_dict['train_memory']: return feed_dict = {} for k in self.train_ph_dict: feed_dict[self.train_ph_dict[k]] = self.queue[k] for k in self.var_ph_dict: feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess) _, loss, score_diff = self.sess.run( [self.train, self.loss, self.score_diff], feed_dict=feed_dict) logging.debug('ZPDDPYFD loss ' + str(loss) + ' ' + str(score_diff)) self.train_count += 1 if self.train_count % 1000 == 0: output_file_name = os.path.join(self.arg_dict['output_path'], 'sess', str(self.train_count)) timestamp_last = time.time() logging.info( 'CLPNAVGR save session: {}, loss: {}, time: {}, time_i: {}'. format(output_file_name, loss, int((time.time() - self.timestamp) * 1000), int((timestamp_last - self.timestamp_last) * 1000))) self.timestamp_last = timestamp_last py23.makedirs(os.path.dirname(output_file_name), exist_ok=True) self.saver.save(self.sess, output_file_name) logging.debug('HZQQMSQT ' + MY_NAME + ' ' + str(self.train_count))
def do_train(self): if len(self.queue['state_0']) < 100: return feed_dict = {} for k in self.train_ph_dict: feed_dict[self.train_ph_dict[k]] = list(self.queue[k]) for k in self.var_ph_dict: feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess) _, loss, score_diff = self.sess.run([self.train,self.loss,self.score_diff],feed_dict=feed_dict) logging.debug('ZPDDPYFD loss '+str(loss)+' '+str(score_diff)) self.train_count += 1 if self.train_count % 1000 == 0: output_file_name = "{}/sess/{}.ckpt".format(self.arg_dict['output_path'],self.train_count) py23.makedirs(os.path.dirname(output_file_name),exist_ok=True) self.saver.save(self.sess,output_file_name) logging.info('CLPNAVGR save session: {}, loss: {}, time: {}'.format(output_file_name,loss,int((time.time()-self.timestamp)*1000))) logging.debug('HZQQMSQT '+MY_NAME+' '+str(self.train_count))
def do_train(self): if self.push_done < self.arg_dict['train_memory']: return feed_dict = {} for k in self.train_ph_dict: feed_dict[self.train_ph_dict[k]] = self.queue[k] for k in self.var_ph_dict: feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess) _, loss, score_diff = self.sess.run([self.train,self.loss,self.score_diff],feed_dict=feed_dict) logging.debug('ZPDDPYFD loss '+str(loss)+' '+str(score_diff)) self.train_count += 1 if self.train_count % 1000 == 0: output_file_name = os.path.join(self.arg_dict['output_path'],'sess',str(self.train_count)) timestamp_last = time.time() logging.info('CLPNAVGR save session: {}, loss: {}, -log(loss): {}, time: {}, time_i: {}'.format(output_file_name,loss,-math.log(loss),int((time.time()-self.timestamp)*1000),int((timestamp_last-self.timestamp_last)*1000))) self.timestamp_last = timestamp_last py23.makedirs(os.path.dirname(output_file_name),exist_ok=True) self.saver.save(self.sess,output_file_name) logging.debug('HZQQMSQT '+MY_NAME+' '+str(self.train_count))
def do_train(self): if len(self.queue['state_0']) < 100: return feed_dict = {} for k in self.train_ph_dict: feed_dict[self.train_ph_dict[k]] = list(self.queue[k]) for k in self.var_ph_dict: feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess) _, loss, score_diff = self.sess.run( [self.train, self.loss, self.score_diff], feed_dict=feed_dict) logging.debug('ZPDDPYFD loss ' + str(loss) + ' ' + str(score_diff)) self.train_count += 1 if self.train_count % 1000 == 0: output_file_name = "{}/sess/{}.ckpt".format( self.arg_dict['output_path'], self.train_count) py23.makedirs(os.path.dirname(output_file_name), exist_ok=True) self.saver.save(self.sess, output_file_name) logging.info( 'CLPNAVGR save session: {}, loss: {}, time: {}'.format( output_file_name, loss, int((time.time() - self.timestamp) * 1000))) logging.debug('HZQQMSQT ' + MY_NAME + ' ' + str(self.train_count))
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None) argparser.add_argument('--random_stddev', type=float, help='random_stddev', default=0.1) argparser.add_argument('--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05) argparser.add_argument('--train_beta', type=float, help='TRAIN_BETA', default=0.99) argparser.add_argument('--turn_count', type=int, help='turn_count', default=None) argparser.add_argument('--device', type=str, help='device', default=None) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument('--train_memory', type=int, help='TRAIN_MEMORY', default=20000) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if (arg_dict['output_path'] == None): timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output', MY_NAME, 'deeplearn', str(timestamp)) py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug)
str(timestamp), 'deeplearn') py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug) if __name__ == '__main__': log_filename = os.path.join( 'log', '{}-{}-deeplearn.log'.format(str(int(time.time())), MY_NAME)) py23.makedirs(os.path.dirname(log_filename), exist_ok=True) logging.basicConfig(level=logging.INFO, filename=log_filename) tf.app.run()
def run_vs_dict(vs_dict,arg_dict): tf.reset_default_graph() po = create_player(vs_dict['O'],arg_dict) px = create_player(vs_dict['X'],arg_dict) result = vs(po,px,1000) po.close() px.close() logging.info(vs_dict) logging.info(json.dumps(result)) return result if __name__ == '__main__': timestamp = int(time.time()) log_filename = os.path.join('log','{}-{}-dlcompare.log'.format(timestamp,MY_NAME)) py23.makedirs(os.path.dirname(log_filename),exist_ok=True) logging.basicConfig(level=logging.INFO,filename=log_filename) import argparse parser = argparse.ArgumentParser(description='Compare trained model with random and perfect AI') parser.add_argument('--path', metavar='N', type=str, nargs='?', default=None, help='working path') parser.add_argument('--count', metavar='N', type=int, nargs='?', default=None, help='compare count') args = parser.parse_args() arg_dict = vars(args) if arg_dict['path'] == None: scan_dir = os.path.join('output',MY_NAME) filename_list = os.listdir(scan_dir) if len(filename_list) <= 0:
py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug) if __name__ == '__main__': py23.makedirs(os.path.join('log', MY_NAME), exist_ok=True) logging.basicConfig(level=logging.INFO, filename=os.path.join( 'log', MY_NAME, '{}.log'.format(str(int(time.time()))))) tf.app.run()
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None ) argparser.add_argument( '--random_stddev', type=float, help='random_stddev', default=0.1 ) argparser.add_argument( '--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05 ) argparser.add_argument( '--train_beta', type=float, help='TRAIN_BETA', default=0.99 ) argparser.add_argument( '--turn_count', type=int, help='turn_count', default=None ) argparser.add_argument( '--device', type=str, help='device', default=None ) argparser.add_argument( '--continue', action='store_true', help='continue' ) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument( '--train_memory', type=int, help='TRAIN_MEMORY', default=10000 ) argparser.add_argument( '--dropout_keep_prob', type=float, help='dropout_keep_prob', default=0.5 ) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if(arg_dict['output_path']==None): if not arg_dict['continue']: timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output',MY_NAME,str(timestamp),'deeplearn') else: filename_list = os.listdir(os.path.join('output',MY_NAME)) filename_int_list = [util.to_int(filename,-1) for filename in filename_list] arg_timestamp = str(max(filename_int_list)) arg_dict['output_path'] = os.path.join('output',MY_NAME,str(arg_timestamp),'deeplearn') if arg_dict['continue']: with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'r') as deeplearn_arg_dict_file: deeplearn_arg_dict = json.load(deeplearn_arg_dict_file) arg_dict = deeplearn_arg_dict arg_dict['continue'] = True else: py23.makedirs(arg_dict['output_path'],exist_ok=True) with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'w') as out_file: json.dump(arg_dict,out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None ) argparser.add_argument( '--random_stddev', type=float, help='random_stddev', default=0.1 ) argparser.add_argument( '--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05 ) argparser.add_argument( '--train_beta', type=float, help='TRAIN_BETA', default=0.99 ) argparser.add_argument( '--turn_count', type=int, help='turn_count', default=None ) argparser.add_argument( '--device', type=str, help='device', default=None ) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument( '--train_memory', type=int, help='TRAIN_MEMORY', default=20000 ) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if(arg_dict['output_path']==None): timestamp = int(time.time()) arg_dict['output_path'] = 'output/{}/deeplearn/{}'.format(MY_NAME, timestamp) py23.makedirs(arg_dict['output_path'],exist_ok=True) with open('{}/input_arg_dict.json'.format(arg_dict['output_path']),'w') as out_file: json.dump(arg_dict,out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None) argparser.add_argument('--random_stddev', type=float, help='random_stddev', default=0.1) argparser.add_argument('--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05) argparser.add_argument('--train_beta', type=float, help='TRAIN_BETA', default=0.99) argparser.add_argument('--turn_count', type=int, help='turn_count', default=None) argparser.add_argument('--device', type=str, help='device', default=None) argparser.add_argument('--continue', action='store_true', help='continue') # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument('--train_memory', type=int, help='TRAIN_MEMORY', default=10000) argparser.add_argument('--starter_learning_rate', type=float, help='starter_learning_rate', default=0.01) argparser.add_argument('--learning_rate_decay_steps', type=int, help='learning_rate_decay_steps', default=1000000) argparser.add_argument('--learning_rate_decay_rate', type=float, help='learning_rate_decay_rate', default=0.5) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if (arg_dict['output_path'] == None): if not arg_dict['continue']: timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output', MY_NAME, str(timestamp), 'deeplearn') else: filename_list = os.listdir(os.path.join('output', MY_NAME)) filename_int_list = [ util.to_int(filename, -1) for filename in filename_list ] arg_timestamp = str(max(filename_int_list)) arg_dict['output_path'] = os.path.join('output', MY_NAME, str(arg_timestamp), 'deeplearn') if arg_dict['continue']: with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'r') as deeplearn_arg_dict_file: deeplearn_arg_dict = json.load(deeplearn_arg_dict_file) arg_dict = deeplearn_arg_dict arg_dict['continue'] = True else: py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug)
logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if(arg_dict['output_path']==None): timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output',MY_NAME,'deeplearn',str(timestamp)) py23.makedirs(arg_dict['output_path'],exist_ok=True) with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'w') as out_file: json.dump(arg_dict,out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'],p=logging.debug) if __name__ == '__main__': py23.makedirs(os.path.join('log',MY_NAME),exist_ok=True) logging.basicConfig(level=logging.INFO,filename=os.path.join('log',MY_NAME,'{}.log'.format(str(int(time.time()))))) tf.app.run()