def gtp_io(): global board PASS_FLAG = 0 known_commands = [ 'boardsize', 'clear_board', 'komi', 'play', 'genmove', 'quit', 'name', 'version', 'known_command', 'list_commands', 'protocal_version' ] while True: try: line = raw_input().strip() except EOFError: break if line == '': continue command = [s.lower() for s in line.split()] if re.match('\d+', command[0]): cmdid = command[0] command = command[1:] else: cmdid = '' ret = '' if command[0] == 'boardsize': debug_print( "Warning: Trying to set incompatible boardsize %s (!= %d)" % (command[1], 19)) ret = None elif command[0] == 'clear_board': board = board.Go() elif command[0] == 'komi': pass elif command[0] == 'play': if command[2].upper() == 'PASS': go.place_stone_num(-1) PASS_FLAG = 1 if command[1].upper() == 'B': # and board.current_player == BLACK: # print command[2] go.place_stone_num(util.gtppos_to_num(command[2].upper())) elif command[1].upper( ) == 'W': # and board.current_player == WHITE: # print command[2] go.place_stone_num(util.gtppos_to_num(command[2].upper())) elif command[0] == 'genmove': move = -1 if PASS_FLAG == 1: PASS_FLAG = 0 move = -1 ret = 'pass' else: move = make_prediction() if move is None: ret = 'pass' if move == -1: ret = 'resign' else: ret = util.pos_to_gtppos(util.num_to_pos(move)) go.place_stone_num(move) elif command[0] == 'name': ret = 'PikachuP, 2018' elif command[0] == 'version': ret = '0.2' elif command[0] == 'list_commands': ret = '\n'.join(known_commands) elif command[0] == 'protocol_version': ret = '2' elif command[0] == 'quit': print '=%s \n\n' % (cmdid, ), exit(0) else: debug_print("Unknown Command! ") ret = None if ret is not None: print '=%s %s\n\n' % (cmdid, ret), else: print '?%s ???\n\n' % (cmdid, ), sys.stdout.flush()
def load_file(self): while True: filename_train = '' filename_label = '' # 获取需要载入的文件名 if self.is_train: index = self.train_list[self.train_index] my_print('[pl %s' % str(index).rjust(4)) filename_train = config.train_prefix + str(index) + '.npy' filename_label = config.label_prefix + str(index) + '.npy' else: filename_train = config.train_prefix + 'val.npy' filename_label = config.label_prefix + 'val.npy' my_print('[pl validate data') # 生成一个新的棋局 go = board.Go() for i in range(300): feature = go.generate() iter = mx.io.NDArrayIter(data=feature) res = self.module.predict(iter).asnumpy() print res training_data = np.load(filename_train) training_data = training_data.reshape(-1, 1, 361) exp = exp2.T exp = exp.reshape(1, input_filters, 1) training_data = ((np.bitwise_and(training_data, exp) > 0) + 0).reshape(-1, input_filters, 19, 19) label = np.load(filename_label) NUM = training_data.shape[0] label_data_ = np.zeros((NUM, 361)) for i in range(NUM): label_data_[i][label[i][0]] = 1 label_data_ = label_data_.reshape(-1, 19, 19) if self.is_train and config.apply_symmetry: symmetry.apply_random_symmetry(training_data, label_data_) label_data = label_data_.reshape(-1, 361) label_data = np.argmax(label_data, axis=1) label = None label_data_ = None # 表示加载完成 my_print(']') if self.is_train: self.queue.put(obj=[training_data, label_data], block=True, timeout=None) self.train_index = self.train_index + 1 # 如果已经完成全部文件的训练,那么就重新打散文件的顺序。 if self.train_index >= len(self.train_list): self.train_index = 0 random.shuffle(self.train_list) else: # 如果是测试数据,那么一次载入 self.data_list = [mx.ndarray.array(training_data, config.data_device), \ mx.ndarray.array(label_data, config.data_device)] gc.collect() # 要求垃圾回收 if not self.is_train: return # 停下来等待信号 if self.is_train: self.can_load_file.wait() self.can_load_file.clear()
import zobrist import time import math import random import go_plot import config ROLLOUTS = config.search_times_ucb DEPTH = config.search_depth_ucb UCB_C = config.para_c_ucb POLICY_WEIGHT = config.policy_weight """ 当前全局盘面go,在函数中访问,请用global进行声明。 """ go = board.Go() """ 如果需要进行调试,取消下面两行的注释。 """ # np.set_printoptions(threshold='nan') # np.set_printoptions(precision=2, suppress=True) """ debug_print用来在console中进行人机交互 """ def debug_print(args): print >> sys.stderr, args sys.stderr.flush() return
def self_play(): global module label_black = [] label_white = [] t0 = time.clock() black = None white = None TOTAL = 350 NUM = 300 go = board.Go() for i in range(TOTAL): # 生成特征 if i % 10 == 0: print i, feature = go.generate() # 把特征喂入神经网络,获得预测 iter = mx.io.NDArrayIter(data=feature) pred = module.predict(iter).asnumpy() # 将不可入点扔掉 pred = feature[0][2].reshape(1, 361) * pred * 10 # 排个序 out = np.argsort(-pred) predsort = -np.sort(-pred) # print predsort[0: K] # 看看前K个的值 process = out[0][0:K] idx = random_pick(predsort[0][0:K].reshape(-1)) if i == 0: black = feature.reshape(-1, 16, 19, 19) label_black.append(out[0][idx]) go.place_stone_num(out[0][idx]) elif i == 1: white = feature.reshape(-1, 16, 19, 19) label_white.append(out[0][idx]) go.place_stone_num(out[0][idx]) elif i < NUM: f = feature.reshape(-1, 16, 19, 19) if i % 2 == 0: black = np.vstack((black, f)) label_black.append(out[0][idx]) else: white = np.vstack((white, f)) label_white.append(out[0][idx]) go.place_stone_num(out[0][idx]) else: go.place_stone_num(out[0][0]) go = None go = board.Go() # print len(label_black) for i in range(TOTAL): # 生成特征 if i % 10 == 0: print i, feature = go.generate() # 把特征喂入神经网络,获得预测 iter = mx.io.NDArrayIter(data=feature) pred = module.predict(iter).asnumpy() # 将不可入点扔掉 pred = feature[0][2].reshape(1, 361) * pred * 10 # 排个序 out = np.argsort(-pred) predsort = -np.sort(-pred) # print predsort[0: K] # 看看前K个的值 process = out[0][0:K] idx = random_pick(predsort[0][0:K].reshape(-1)) if i < NUM: f = feature.reshape(-1, 16, 19, 19) if i % 2 == 0: black = np.vstack((black, f)) label_black.append(out[0][idx]) else: white = np.vstack((white, f)) label_white.append(out[0][idx]) go.place_stone_num(out[0][idx]) else: go.place_stone_num(out[0][0]) print cnt = 0 for x in label_black: print x, cnt += 1 if cnt == 150: print print label_white BLACK_LABEL = np.zeros((300, 361), dtype=np.int16) WHITE_LABEL = np.zeros((300, 361), dtype=np.int16) for i in range(600): if i % 2 == 0: # 偶数,黑棋盘面 # print "BLACK", i BLACK_LABEL[i // 2][label_black[i // 2]] = 1 else: # print "WHITE", i WHITE_LABEL[(i - 1) // 2][label_white[(i - 1) // 2]] = 1 permutation = np.random.permutation(black.shape[0]) black = black[permutation, :] white = white[permutation, :] BLACK_LABEL = BLACK_LABEL.reshape(-1, 19, 19) WHITE_LABEL = WHITE_LABEL.reshape(-1, 19, 19) BLACK_LABEL = BLACK_LABEL.reshape(-1, 361) WHITE_LABEL = WHITE_LABEL.reshape(-1, 361) BLACK_LABEL = BLACK_LABEL[permutation, :] WHITE_LABEL = WHITE_LABEL[permutation, :] BLACK_LABEL = np.argmax(BLACK_LABEL, axis=1) WHITE_LABEL = np.argmax(WHITE_LABEL, axis=1) print BLACK_LABEL print WHITE_LABEL """ # 判断胜负, 黑胜返回1,白胜利返回0 def evaluate(self): """ # go_plot.go_plot(terminal // 50) result = go.evaluate() if result == 1: print("B+") else: print("W+") print "generate_used", time.clock() - t0 BLACK = black WHITE = white # print self.BLACK # print self.WHITE """ 0代表黑胜利,1代表白胜利 """ if result == 1: winner = 1 else: winner = 0 go = None label_black = None label_white = None return result