def generate_dataset(gen_function=gen_random_data, num_training_samples=60000, num_testing_samples=10000): """Return generated boards and scores as training/testing data. Arguments: gen_function (function name): The name of the function used to generate a dataset. num_training_samples (int): The number of training samples to generate. num_testing_samples (int): The numebr of testing samples to generate. Returns: (tuple), (tuple): Numpy arrays with the training and test data. """ x_train = [] y_train = [] for n in range(num_training_samples): data = gen_function() score, score_pos = bs.analyze_board(data) x_train.append(data) y_train.append(score) x_test = [] y_test = [] for n in range(num_testing_samples): data = gen_function() score, score_pos = bs.analyze_board(data) x_test.append(data) y_test.append(score) # Convert lists to numpy arrays np.array(x_train) np.array(y_train) np.array(x_test) np.array(y_test) return (x_train, y_train), (x_test, y_test)
def choose_swap(self, board, bOffline=False): swaps = [] potential_boards = [] for index1 in range(0, self.game.numTiles): for index2 in range(index1+1, self.game.numTiles): if board[index1] != '.' and board[index2] != '.': r1 = int(index1 // self.game.numCols) c1 = int(index1 % self.game.numCols) r2 = int(index2 // self.game.numCols) c2 = int(index2 % self.game.numCols) swapped_board = np.copy(board) swapped_board[index1] = board[index2] swapped_board[index2] = board[index1] swaps.append( ((r1, c1), (r2, c2)) ) potential_boards.append(swapped_board) if bOffline: predictions = np.array([BoardScorer.analyze_board(b)[0] for b in potential_boards]) else: predictions = self.nn.predict(estimatorModel.split_into_channels(potential_boards)).flatten() best_index = predictions.argmax() ''' from sortedcontainers import SortedDict sd = SortedDict() for i in range(len(potential_boards)): sd[predictions[i]] = potential_boards[i] #for val, board in sd.items(): #print("board value: ", val) #self.print_board(board) ''' #print("\n\n") print("chose index", best_index) #print(predictions[best_index]) #self.print_board(potential_boards[best_index]) return (swaps[best_index], potential_boards[best_index])
else: #inputs = (eval_features, eval_labels) inputs = (features, scores) # Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices(inputs) # Batch the examples assert batch_size is not None, "batch_size must not be None" dataset = dataset.batch(batch_size) # Return the dataset. return dataset if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) nn = CreateNetwork() for i in range(0, 100): batch_boards = [] batch_scores = [] for j in range(0, 100000): b = list(BoardGen.gen_random_data()) batch_boards.append(b) batch_scores.append(BoardScorer.analyze_board(b)[0]) nn.train(input_fn=lambda: train_input_fn(batch_boards, batch_scores)) #nn.evaluate(input_fn=lambda:eval_input_fn(batch_boards, batch_scores))
print("run 13") player = SuperBall.SuperBall() boards = [] rewards = [] for i in range(0, 75): print("episode", i) player.StartGame() while not player.gameOver: start_board = copy.deepcopy(player.board) if player.numOpenTiles < 5: score, score_pos = BoardScorer.analyze_board(start_board) if score_pos == (-1, -1): boards.append((start_board, start_board)) rewards.append(-1) player.gameOver = True print("game over") else: tiles_scored = player.score_ignore_color( score_pos[0], score_pos[1]) end_board = copy.deepcopy(player.board) boards.append((start_board, end_board)) rewards.append(tiles_scored)
def generate_episode(self, self_play=True): self.game.StartGame() saved_afterstate = None iter_count = 0 while (not self.game.gameOver): actions, afterstates, action_types = self.get_actions( self.game.board) # self.game.print() # Evaluate each action's value from self-play or otherwise if (self_play): channels = estimatorModel.split_into_channels(afterstates) action_values = self.nn.predict(channels) else: action_values = np.zeros(len(afterstates)) for i in range(len(afterstates)): score, score_pos = bs.analyze_board(afterstates[i]) action_values[i] = score # If there are fewer than 5 open tiles, all swaps yield GAME OVER if (self.game.numOpenTiles < 5): for a in range(len(actions)): if (action_types[a] == 'swap'): action_values[a] = -1 best_index = action_values.argmax() best_action = actions[best_index] # print(action_values[best_index]) # All actions lead to game over state if (action_values[best_index] == -1): self.game.gameOver = True # Swap or score the tiles based on chosen action elif (action_types[best_index] == 'swap'): r1 = best_action[0][0] c1 = best_action[0][1] r2 = best_action[1][0] c2 = best_action[1][1] self.game.swap(r1, c1, r2, c2) elif (action_types[best_index] == 'score'): r = best_action[0] c = best_action[1] self.game.score(r, c) new_afterstate = np.copy(self.game.board) # Give large negative reward for losing if (self.game.gameOver): reward = -1000 else: reward = 0 # Find the target for the value function channels = estimatorModel.split_into_channels([new_afterstate]) v_new_afterstate = self.nn.predict(channels) v_target = reward + self.discount_factor * v_new_afterstate # Determine the saved afterstate value if (iter_count > 0): channels = estimatorModel.split_into_channels( [saved_afterstate]) v_saved_afterstate = self.nn.predict(channels) player.nn.fit(channels, [v_target], epochs=1, batch_size=1) else: v_saved_afterstate = 0 # print('target:', v_target, 'v_sa:', v_saved_afterstate) # if (iter_count > 1): # exit() saved_afterstate = new_afterstate iter_count += 1 print('Final score:', self.game.totalScore) # print('DONE') # Sound the bell! print('\007') return