Пример #1
0
def create_datasets(output, source, size):

    """Prepare datasets for merged file (based on one of input files).

    Keyword argument:
    output -- output merged hdf5 file
    source -- path to one of input hdf5 files
    size -- total number of entries per dataset
    """

    data = load(source)

    for key in data:
        shape = list(data[key].shape)
        shape[0] = size
        output.create_dataset(key, shape, dtype=data[key].dtype,
                              compression='gzip')

    data.close()
Пример #2
0
def create_datasets(output, source, size):

    """Prepare datasets for merged file (based on one of input files).

    Keyword argument:
    output -- output merged hdf5 file
    source -- path to one of input hdf5 files
    size -- total number of entries per dataset
    """

    data = load(source)

    for key in data:
        shape = list(data[key].shape)
        shape[0] = size
        output.create_dataset(key, shape, dtype=data[key].dtype,
                              compression='gzip')

    data.close()
Пример #3
0
"""
import os
from parser import get_args_split as parser
import msg
import hdf5
from combine_big import load
from split import generate_uneven_filelist
from split import save_filelist

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: SPLIT")

    args = parser()

    data = load(args.input)

    # TODO - come up with a clever way to generalize this...
    new_sizes = [(0, 15000), (15000, 17500), (17500, 20000)]
    new_names_ext = ['_train.hdf5', '_valid.hdf5', '_test.hdf5']
    new_filelist = zip(new_names_ext, new_sizes)

    filelist = generate_uneven_filelist(
        args.prefix or os.path.splitext(args.input)[0], new_filelist)

    print("\nSaving output files:\n")

    for f, r in filelist.items():
        msg.list_fileinfo(f, r)
        hdf5.save_subset_big(f, data, r[0], r[1])
Пример #4
0
from parser import get_args_split as parser
import msg
import hdf5
import h5py
import check
from combine_big import load
from split import generate_filelist
from split import save_filelist

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: SPLIT")

    args = parser()

    data = load(args.input)

    filelist = generate_filelist(
        args.prefix or os.path.splitext(args.input)[0],
        check.get_size(data), int(args.size))

    print "\nSaving output files:\n"

    for f, r in filelist.iteritems():
        msg.list_fileinfo(f, r)
        hdf5.save_subset_big(f, data, r[0], r[1])

    if args.filelist:
        save_filelist(args.filelist, filelist.keys())

    data.close()
Rename dataset
"""
import os
import sys
import h5py
from parser import get_args_rename as parser
from combine_big import load
import msg

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    f = load(args.input, 'r+')

    if args.dataset not in f:
        msg.error("There is no %(key)s in %(file)s."
                  % {"key": args.dataset, "file": args.input})
        sys.exit(1)

    if args.name in f:
        msg.error("There is %(key)s already in %(file)s."
                  % {"key": args.name, "file": args.input})
        sys.exit(1)

    f[args.name] = f[args.dataset]
    del f[args.dataset]

    f.close()
Пример #6
0
Rename dataset
"""
import os
import sys
import h5py
from parser import get_args_rename as parser
from combine_big import load
import msg

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    f = load(args.input, 'r+')

    if args.dataset not in f:
        msg.error("There is no %(key)s in %(file)s." % {
            "key": args.dataset,
            "file": args.input
        })
        sys.exit(1)

    if args.name in f:
        msg.error("There is %(key)s already in %(file)s." % {
            "key": args.name,
            "file": args.input
        })
        sys.exit(1)