Exemplo n.º 1
0
        def generator():
            sample_files = files_from_directory(os.path.abspath(
                self.sample_dir),
                                                file_type="h5")
            tuples = np.empty(shape=(0, 15, 773), dtype=bool)

            for file in sample_files:
                fname = correct_file_ending(file, 'h5')

                with h5py.File(fname, 'r') as hf:

                    for key in hf.keys():
                        if self.H5_COL_KEY in key:
                            new_tuples = np.asarray(hf[key][:], dtype=bool)
                            tuples = np.concatenate((tuples, new_tuples))

                            while len(tuples) >= self.batch_size:
                                if isinstance(self.subsampling_functions,
                                              (list, tuple, np.ndarray)):
                                    for fn in self.subsampling_functions:
                                        triplets = fn(tuples[:self.batch_size])
                                        yield triplets
                                else:
                                    triplets = self.subsampling_functions(
                                        tuples[:self.batch_size])
                                    yield triplets
                                tuples = tuples[self.batch_size:]
Exemplo n.º 2
0
def save_tuples(tuples, file, dset_num=0):
    fname = correct_file_ending(file, "h5")

    with h5py.File(fname, "a") as f:
        data1 = f.create_dataset(f"tuples_{dset_num}",
                                 shape=(len(tuples), 15, 773),
                                 dtype=bool,
                                 compression="gzip",
                                 compression_opts=9)

        data1[:] = tuples[:]
Exemplo n.º 3
0
 def number_samples(self):
     samples = 0
     sample_files = files_from_directory(os.path.abspath(self.sample_dir),
                                         file_type="h5")
     for file in sample_files:
         fname = correct_file_ending(file, 'h5')
         with h5py.File(fname, 'r') as hf:
             for key in hf.keys():
                 if self.H5_COL_KEY in key:
                     samples += len(hf[key])
     return samples
Exemplo n.º 4
0
def tuples_from_file(file, table_id_prefix, tuple_indices=[0, 1, 6]):
    '''
	Return specified tuples from all relevant tables in a file.
	'''
    fname = correct_file_ending(file, 'h5')
    tuples = []
    with h5py.File(fname, 'r') as hf:
        print(hf.keys())
        for key in hf.keys():
            if table_id_prefix in key:
                tuples.extend(hf[key][:, tuple_indices])
    return np.asarray(tuples, dtype=bool)
Exemplo n.º 5
0
def save_bb(game_list, game_id, file, dset_num=0):
    fname = correct_file_ending(file, "h5")
    position = []
    gid = []

    for (i, game) in enumerate(game_list):
        for pos in game:
            position.append(pos)
            gid.append(game_id[i])

    with h5py.File(fname, "a") as f:
        data1 = f.create_dataset(f"position_{dset_num}",
                                 shape=(len(position), 773),
                                 dtype=bool,
                                 compression="gzip",
                                 compression_opts=9)
        data2 = f.create_dataset(f"game_id_{dset_num}",
                                 shape=(len(position), ),
                                 dtype=np.int,
                                 compression="gzip",
                                 compression_opts=9)

        data1[:] = position[:]
        data2[:] = gid[:]
Exemplo n.º 6
0
def pgn_to_bitboard(pgn_file,
                    generate_tuples=False,
                    save_file=None,
                    tuple_file=None,
                    chunksize=100000,
                    game_filter=None):
    game_list = []
    game_id = []
    counter = 1
    game_index = -1
    save_number = 0
    pgn_name = correct_file_ending(pgn_file, "pgn")

    with open(pgn_name, 'r') as f:
        while True:
            next_game = chess.pgn.read_game(f)
            game_index += 1

            if next_game is not None and game_filter is not {} and \
             filter_out(next_game.headers, game_filter):
                continue

            if counter % chunksize == 0 or next_game is None:
                print("")
                if save_file is not None:
                    save_bb(game_list,
                            game_id,
                            save_file,
                            dset_num=save_number)
                    print("Game positions saved.")
                else:
                    raise ValueError("Save bitbaord file path not provided.")

                if generate_tuples:
                    tup = tuple_generator(game_list)
                    print("Tuples generated.")
                    if tuple_file is not None:
                        save_tuples(tup, tuple_file, dset_num=save_number)
                        print("Tuples saved.")
                    else:
                        raise ValueError("Save tuple file path not provided.")

                print(f"\rChunk {save_number} processed.")
                save_number += 1
                counter = 1
                game_list = []
                if next_game is None:
                    break  # end of file, break the while True loop

            else:
                temp_game = game_bb(next_game, game_nr=counter)
                if len(temp_game) > 0:
                    game_list.append(temp_game)
                    game_id.append(game_index)
                print(
                    f" Games parsed: {game_index} Games processed: {counter}",
                    end="\r")

                counter += 1

    return 0
Exemplo n.º 7
0
def test_correct_file_ending():
    assert ut.correct_file_ending("data/hello", "txt") == "data/hello.txt"
    assert ut.correct_file_ending("one_file.pyc", "pyc") == "one_file.pyc"