def main(): connection = connect() config.set_device(args.gpu) # while True: name = None while name is None: name = connection.recv() model = Model('model_' + name) model.forced_restore() # print(model) agent = Agent(model) simulations = None while simulations is None: simulations = connection.recv() simulations = int(simulations) config.simulations = simulations env = GameEnv() init = deal_init(connection) env.load(init) game = Game(env, args.player) game_start(agent, simulations, game, connection)
def __call__(self): if self.empty(): return None file, init, player, p, v = self.data[self.iter] env = GameEnv(False) env.load(init) game = Game(env, player) self.iter += 1 return game, p, v
def unpack(init, ps, player): env = GameEnv(False) env.load(init) game = Game(env, player) histories = [] for action in ps: state = game.to_model_input() histories.append((histories[-1] if len(histories) else []) + [state]) game.move(action) for i in range(len(histories)): histories[i] = np.array(histories[i], dtype=np.int8) return histories
def main(): model: Model config.set_device(1) env = GameEnv() # model = Model('model') model = Model('model_201912080009') # model = Model('model_tencent0824') model.forced_restore() # agent = Agent(model) agent = Agent(model) init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0], [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1], [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]] actions = [ 352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0, 29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317, 320, 0, 0, 31 ] #, 42, 0, 0, 15, 18] init = np.array(init, dtype=np.int32) env.load(init) print(env) root, player = new_Root(), 2 game = Game(env, player) for action in actions: mct = MCT(game, root) agent.simulates([mct], store_rnn_states='Always') root.root(action) game.move(action) print(game) print('GAUSS', game.gauss()) print(game.curr_player(), game.my_player()) print('====') print(game.curr_player()) print(game.my_player()) print(game.lord_player()) print(game.hand_cards_num()) print(game.bottom()) print('====') mct = MCT(game, root) for cnt in range(2000): agent.simulates([mct], store_rnn_states='Always') # if cnt == 0: # history, rnn_state = root.get_model_input() # print(history) # if (cnt + 1) % 10 == 0: if (cnt + 1) % 10 == 0: print(cnt + 1) for action, P, son in mct.root.edges(): print('%d: %.8f %d %.3f' % (action, P, son.N(), son.Q())) print('-------------------------') t = 1.0 s = np.array([son.N() for action, P, son in mct.root.edges()]) p = np.array([P for action, P, son in mct.root.edges()]) print(s) print(np.mean(s)) w = s + 0.001 - np.mean(s) w[w < 0] = 0 w = (w**t) / (w**t).sum() print(w) print(s / s.sum()) print(p) mct.json()
def main(): np.set_printoptions(precision=2, linewidth=128, suppress=True) path = 'test' config.device = torch.device('cuda:1') config.set_device(-1) model = Model('model_201912080009') model.restore() files = os.listdir('gen/%s/' % path) files.sort(key=lambda k: k.split('_')[0], reverse=True) # print(files[-1]) file = np.random.choice(files[:100]) # file = '201912101326_85d94af6fe1a588b.pkl' print(file) data = pickle_load('gen/%s/%s' % (path, file)) # data = pickle_load('gen/test/' + files[-2]) # np.random.shuffle(data) player = 2 init, actions, _v = [None, [], -1.0] init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0], [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1], [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]] actions = [ 352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0, 29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317, 320, 0, 0, 31, 42, 0, 0, 15, 18 ] init = np.array(init, dtype=np.int32) # actions = [353, 352, 352, 339, 349, 349, 15] # init = [[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0], # [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0], # [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, 4, 4, 1, 1]] # init = np.array(init, dtype=np.int32) # actions = [353, 352, 352, 344, 345, 346, 151] # init, actions, _v = data[player] print(_v, player) print(init) print(actions) print('=============================================') print('player:', player) histories = unpack(init, actions, player) histories, lengths = history_regularize(histories) histories, lengths = to_cuda(histories, lengths) vs, ps, _ = model(histories, lengths, None) env = GameEnv(False) env.load(init) game = Game(env, player) for v, p, action in zip(vs, ps, actions): print('----------------------------------') print('my_player: %d, curr_player: %d' % (player, game.curr_player())) # for action, _dist in enumerate(dist): # print(action, _dist) idx = np.argsort(p)[::-1] for i in range(8): print(game.could_move(idx[i]), end=' ') print('(%d, %.2f%%)' % (idx[i], p[idx[i]] * 100), end='\n') print('action: %d, %.2f%%' % (action, p[action] * 100)) if idx[0] == 60 and p[idx[0]] > 0.3: print(game) print(game.policy()) print(game.hand_cards_num()) print(game.bottom()) print(game.curr_player(), game.lord_player()) return 0 # model_input = game.to_model_input() # for channel in range(26, 28): # print(channel) # print(model_input[channel]) print('%.1f, %.3f' % (_v, v[0])) game.move(action) print(game) print('Gauss:', game.gauss())
import time from game import Game, GameEnv import numpy as np path = 'data/' files = os.listdir(path) file = np.random.choice(files) print(file) data = pickle_load(path + file) init, policy, v = data[0] env = GameEnv() print(init) env.load(init) print(policy) for action in policy: print(env.curr_player(), env.lord_player()) print(action) if env.period() != 3: env.move(action) else: env.move(-1) print(env) print('---------------------') time.sleep(0.5) print(v) # TODO: MCTS simulate should be same as action choice