def load_raw_data(path, i, training_datasets): # for i, path in enumerate(replay_files): print("file idx :", i) game = utils.HaliteV2(path) print("loading file from ", path) if game.game_play_list is not None: """ Four features as training input: 1) halite available 2) my ship 3) cargo on my ship 4) my shipyard 5) other players' ships Sequence is a string that record the actions of ships/shipyards """ training_input, _, sequence = game.prepare_vae_encoder_input() """ Target ship actions: """ pad_offset = 6 board_size = game.config["size"] # target actions assert (inference_decoder.dictionary_size == 450) decoder_input_sequence = [] decoder_target_sequence = [] # TODO: validate max sequence for step, each_sequence in enumerate(sequence): # Add ( and ) for teacher forcing input_sequence = '( ' + each_sequence output_sequence = each_sequence + ')' decoder_input_sequence.append(input_sequence) decoder_target_sequence.append(output_sequence) assert (len(decoder_target_sequence) == len(decoder_input_sequence) == 400) train_dataset = [ training_input, decoder_input_sequence, decoder_target_sequence ] training_datasets.append(train_dataset)
def test_build_gameplay_list(self): file = "replayJson.json" game = utils.HaliteV2(file) self.assertIsNotNone(game.game_play_list) self.assertEqual(game.total_turns - 1, len(game.game_play_list))
def test_convert_to_gameplay(self): file = "replayJson.json" game = utils.HaliteV2(file) gameplay = game.convert_to_game_play(2) self.assertIsNotNone(gameplay)
def test_load_total_turns(self): file = "replayJson.json" game = utils.HaliteV2(file) self.assertGreater(game.total_turns, 1) self.assertLessEqual(game.total_turns, 400)
def test_load_replay_v2(self): file = "replayJson.json" game = utils.HaliteV2(file) self.assertIsNotNone(game.replay) self.assertIsNotNone(game.config)
"""Data Extraction""" PATH = 'train/top_replay/' replay_files = [] # r=root, d=directories, f = files for r, d, f in os.walk(PATH): for file in f: if '.json' in file: replay_files.append(os.path.join(r, file)) for f in replay_files: print(f) game = None seq_list = [] training_datasets = [] for i, path in enumerate(replay_files): game = utils.HaliteV2(path) print("index", i) if i == 2: break if game.game_play_list is not None and game.winner_id == 0: game.prepare_data_for_vae() """ Four features as training input: 1) halite available 2) my ship 3) cargo on my ship 4) my shipyard """ training_input = np.zeros((400, 32, 32, 4), dtype=np.float32) my_ship_positions = game.ship_position
def save_training_data(): from numpy import save PATH = 'top_replay/' replay_files = [] # r=root, d=directories, f = files for r, d, f in os.walk(PATH): for file in f: if '.json' in file: replay_files.append(os.path.join(r, file)) for f in replay_files: print(f) game = None # tables.open_file for i, path in enumerate(replay_files): game = utils.HaliteV2(path) print("index", i) # if i == 3: # break if game.game_play_list is not None and game.winner_id == 0: game.prepare_data_for_vae() training_input = np.zeros((400, 32, 32, 4), dtype='float32') my_ship_positions = game.ship_position target_ship_actions = game.ship_actions halite_available = game.halite my_shipyard = game.shipyard_position my_cargo = game.cargo """ Target ship actions: """ training_label = np.zeros((400, 32, 32, 6), dtype='float32') pad_offset = 6 # 1) halite available for i, halite_map in enumerate(zip(halite_available)): # print("halite_map", halite_map) for row_indx, row in enumerate(halite_map[0]): row = np.squeeze(row) for col_indx, item in enumerate(row): # print(item) training_input[i, row_indx + pad_offset, col_indx + pad_offset, 0] = item * 10 # 2) my ship position for i, my_ship_position in enumerate(my_ship_positions): for row_indx, row in enumerate(my_ship_position): for col_indx, item in enumerate(row): training_input[i, row_indx + pad_offset, col_indx + pad_offset, 1] = item * 10 # 3) cargo on my ship for i, cargo_map in enumerate(my_cargo): for row_indx, row in enumerate(cargo_map): for col_indx, item in enumerate(row): training_input[i, row_indx + pad_offset, col_indx + pad_offset, 2] = item * 10 # 4) my ship yard position for i, shipyard_map in enumerate(my_shipyard): for row_indx, row in enumerate(shipyard_map): for col_indx, item in enumerate(row): training_input[i, row_indx + pad_offset, col_indx + pad_offset, 3] = item * 10 # target actions for i, target_ship_action in enumerate(target_ship_actions): for row_indx, row in enumerate(target_ship_action): for col_indx, item in enumerate(row): training_label[i, row_indx + pad_offset, col_indx + pad_offset, int(item)] = 1. print("training input shape", training_input.shape) # Do word embedding board_size = game.config["size"] vocab_dict = {} num_dict = {} for i in range(board_size**2): vocab_dict[str(i)] = i num_dict[i] = str(i) vocab_idx = board_size**2 move_option = [ "EAST", "WEST", "SOUTH", "NORTH", "CONVERT", "SPAWN", "NO", "(", ")" ] for option in move_option: vocab_dict[option] = vocab_idx num_dict[vocab_idx] = option vocab_idx += 1 # target actions decoder_input_data = np.zeros((400, 50, len(vocab_dict)), dtype='float32') decoder_target_data = np.zeros((400, 50, len(vocab_dict)), dtype='float32') sequence = game.move_sequence sequence.append(sequence[-1]) # TODO: validate max sequence for step, each_sequence in enumerate(sequence): each_sequence_list = each_sequence.split() idx = 0 last_word = "" for each_word in each_sequence_list: assert (each_word in vocab_dict) # TODO: Bug index > 50 if idx == 49: break assert (idx < 50) if idx == 0: decoder_input_data[step][idx][-2] = 1. decoder_target_data[step][idx][ vocab_dict[each_word]] = 1. else: decoder_input_data[step][idx][ vocab_dict[last_word]] = 1. decoder_target_data[step][idx][ vocab_dict[each_word]] = 1. idx += 1 last_word = each_word decoder_input_data[step][idx][vocab_dict[last_word]] = 1. decoder_target_data[step][idx][-1] = 1. print("target action shape", decoder_target_data.shape) # data_tensor = tf.convert_to_tensor(decoder_input_data) # data_tensor2 = tf.convert_to_tensor(decoder_target_data) # train_dataset = tf.data.Dataset.from_tensor_slices(([training_input, decoder_input_data], decoder_target_data)) # save('encoder_input.npy', training_input) # save('decoder_input.npy', decoder_input_data) # save('decoder_target.npy', decoder_target_data) filename = str(path.split('.')[0]) p = Path(filename) np.savez( filename, encoder_input=training_input, decoder_input=decoder_input_data, decoder_output=decoder_target_data ) #[training_input, decoder_input_data, decoder_target_data])