def choose_move(self, possible_moves, own_mark, history): current_turn = history.length() # we would use model to impute future provided desired outcome, game length and immediate history self._all_moves += 1 incomplete_game = copy.deepcopy(history) incomplete_game.record_outcome(1) input = torch.tensor(incomplete_game.dense_feature()).float() output = self._model(input) reconstructed_game = game_history.GameHistory() reconstructed_game.parse_dense_feature(output.tolist()) # there is no guarantee that reconstructed moves are actually possible choosen_move = reconstructed_game.moves()[current_turn] if choosen_move in possible_moves: return choosen_move # if no winning move was found let us move to forcing a draw incomplete_game = copy.deepcopy(history) incomplete_game.record_outcome(0) input = torch.tensor(incomplete_game.dense_feature()).float() output = self._model(input) reconstructed_game = game_history.GameHistory() reconstructed_game.parse_dense_feature(output.tolist()) # there is no guarantee that reconstructed moves are actually possible choosen_move = reconstructed_game.moves()[current_turn] if choosen_move in possible_moves: return choosen_move # at this point network failed to provide possible move, just use random self._random_moves += 1 random_index = random.randint(0, len(possible_moves) - 1) return possible_moves[random_index]
def test_padded_sparse_feature(self): self.history.record_move((0, 0)) self.history.record_move((0, 1)) self.history.record_move((0, 2)) self.history.record_move((1, 0)) self.history.record_move((1, 1)) self.history.record_outcome(1) feature = self.history.sparse_feature() parsed_history = game_history.GameHistory() parsed_history.parse_sparse_feature(feature) self.assertEqual(self.history.states(), parsed_history.states()) self.assertEqual(self.history.moves(), parsed_history.moves()) self.assertEqual(self.history.outcome(), parsed_history.outcome())
def train(self, games, save_path): training_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #training_device = torch.device("cpu") model = SparseAutoencoder().to(training_device) data = [game.sparse_feature() for game in games] # 80% of data will be used for training and 20% for testing training_data_size = int(len(data) * 0.8); training_data = torch.tensor(data[:training_data_size]).float().to(training_device) testing_data = torch.tensor(data[training_data_size:]).float().to(training_device) batch_size = training_data_size #batch_size = 4000 loss_fn = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr = 0.002) print("\nTraining using", training_device, "on", training_data_size, "games") for t in range(20000): # shuffle data between epochs if batch_size != training_data_size: permutation = torch.randperm(training_data.size()[0]).to(training_device) training_data = training_data[permutation] for base_index in range(0, training_data_size, batch_size): # Forward pass: compute predicted y by passing x to the model. batch = training_data[base_index:base_index + batch_size] output_games = model(batch) #loss = loss_fn(output_games, gpu_data) loss = loss_fn(output_games, batch) # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable # weights of the model). This is because by default, gradients are # accumulated in buffers( i.e, not overwritten) whenever .backward() # is called. Checkout docs of torch.autograd.backward for more details. optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model # parameters loss.backward() # Calling the step function on an Optimizer makes an update to its # parameters optimizer.step() # Check model perfomance if t % 100 == 0: output_games = model(testing_data) validation_loss = loss_fn(output_games, testing_data) if batch_size != training_data_size: output_games = model(training_data) training_loss = loss_fn(output_games, training_data) print("Epoch", t,"training loss", training_loss.item(), "validation loss", validation_loss.item()) else: print("Epoch", t, "validation loss", validation_loss.item()) # output how much games are actually reconstructed right move_errors = 0 parse_errors = 0 outcome_errors = 0 valid_reconstruction = 0 all_data = torch.tensor(data).float().to(training_device) output_games = model(all_data).cpu() for i, game in enumerate(games): diff = [a_i - b_i for a_i, b_i in zip(all_data[i].tolist(), output_games[i].tolist())] reconstructed_game = game_history.GameHistory() try: reconstructed_game.parse_sparse_feature(output_games[i].tolist()) except: parse_errors += 1 continue outcome_mismatch = reconstructed_game.outcome() != game.outcome() move_mismatch = reconstructed_game.moves()[:game.length()] != game.moves() if outcome_mismatch: outcome_errors += 1 if move_mismatch: move_errors += 1 if not (outcome_mismatch or move_mismatch): valid_reconstruction += 1 print("Testing reconstruction") print("Ill-formed features:", 100*parse_errors /len(all_data), "%") print("Outcome errors:", 100*outcome_errors /len(all_data), "%") print("Move errors:", 100*move_errors /len(all_data), "%") print("Valid reconstruction:", 100*valid_reconstruction /len(all_data), "%\n") self._model = model.cpu() torch.save(self._model, save_path)
def __init__(self, hostname, port, n_clients, n_iterations, sim_name): #Create history object for this simulation self.gamehistory = game_history.GameHistory(sim_name) self.gamedata = game_data.GameData() #Program Process #Initialize Server with Values print("Started game at " + hostname + ':' + str(port)) self.hostname = hostname self.port = port #Creating and binding a server socket to a port self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: self.sock.bind((hostname, port)) except socket.error as e: return #Waiting for and connecting to clients print('Waiting for ' + str(n_clients) + ' clients...') self.sock.listen(n_clients) #Accepting connections and saving them for c in range(0, n_clients): conn, addr = self.sock.accept() self.connections.append(conn) print('Client ' + str(len(self.connections) - 1) + ' Connected at: ' + str(addr[0]) + ':' + str(addr[1])) self.gamedata.players.append( game_data.Player(c, float(random.randint(-8000, 8000)), float(random.randint(-8000, 8000)), float(random.randint(0, 360)))) print('\tExchanging init information...') cli_init = socket_utilities.recv_data(conn) cli_init = socket_utilities.convert_to_object(cli_init) ser_init = init_message.InitMessageFromServer(c) ser_init = socket_utilities.convert_to_bytes(ser_init) socket_utilities.send_data(conn, ser_init) print('\tFinished exchanging init information...\n') #Starting Game Process print('Starting ' + str(n_iterations) + ' iterations...') start = time.time() for c in self.connections: #c.send('START'.encode()) socket_utilities.send_data(c, 'START'.encode()) for i in range(0, n_iterations): if self.gamedata.game_over: break self.broadcast_game_data() given = self.receive_client_inputs() self.process_game_data(given) if (not self.gamedata.game_over): self.gamedata.game_over = True print("The game ended in a draw!") #Saves last bit of data if not self.gamehistory.is_empty(): self.gamehistory.save_to_file() #Closes client connections self.close_connections() #print(self.gamedatahistory) print('Simulation ending...') print('Elapsed time: ' + str(time.time() - start))
def train(self, games, save_path): # tried ReLU but model is shallow, so tanh adds more non-linearity and decreases loss # tried mini batching but it actually increases loss without finding new minimum # tried adding new layer for better generalization but it increases loss # seems that dense feature just does not work training_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = torch.nn.Sequential( torch.nn.Linear(1 + 9, 9), torch.nn.Tanh(), torch.nn.Linear(9, 8), torch.nn.Tanh(), torch.nn.Linear(8, 7), torch.nn.Tanh(), torch.nn.Linear(7, 8), torch.nn.Tanh(), torch.nn.Linear(8, 9), torch.nn.Tanh(), torch.nn.Linear(9, 1 + 9) ).to(training_device) data = [game.dense_feature() for game in games] # 80% of data will be used for training and 20% for testing training_data_size = int(len(data) * 0.8); training_data = torch.tensor(data[:training_data_size]).float().to(training_device) testing_data = torch.tensor(data[training_data_size:]).float().to(training_device) loss_fn = torch.nn.MSELoss() optimizer = torch.optim.RMSprop(model.parameters()) print("\nTraining using", training_device, "on", training_data_size, "games") for t in range(10000): output_games = model(training_data) loss = loss_fn(output_games, training_data) optimizer.zero_grad() loss.backward() optimizer.step() # Verify model output_games = model(testing_data) loss = loss_fn(output_games, testing_data) if t % 100 == 0: print(t, loss.item()) # output how much games are actually reconstructed right move_errors = 0 outcome_errors = 0 valid_reconstruction = 0 all_data = torch.tensor(data).float().to(training_device) output_games = model(all_data).cpu() for i, game in enumerate(games): reconstructed_game = game_history.GameHistory() reconstructed_game.parse_dense_feature(output_games[i]) outcome_mismatch = reconstructed_game.outcome() != game.outcome() move_mismatch = reconstructed_game.moves()[:game.length()] != game.moves() if outcome_mismatch: outcome_errors += 1 if move_mismatch: move_errors += 1 if not (outcome_mismatch or move_mismatch): valid_reconstruction += 1 print("Testing reconstruction") print("Outcome errors:", 100*outcome_errors /len(all_data), "%") print("Move errors:", 100*move_errors /len(all_data), "%") print("Valid reconstruction:", 100*valid_reconstruction /len(all_data), "%\n") self._model = model.cpu() torch.save(self._model, save_path)
def __init__(self, first_player, second_player): self._player1 = first_player self._player2 = second_player self._board = board.Board() self._history = game_history.GameHistory()
def setUp(self): self.history = game_history.GameHistory()