Пример #1
0
split_big_special is special, haha, and hacky and requires inputs of a
specific shape -- just hack the script if you want to use it. note that
`--size` is a required argument, but it is a dummy arg.
"""
import os
from parser import get_args_split as parser
import msg
import hdf5
from combine_big import load
from split import generate_uneven_filelist
from split import save_filelist

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: SPLIT")

    args = parser()

    data = load(args.input)

    # TODO - come up with a clever way to generalize this...
    new_sizes = [(0, 15000), (15000, 17500), (17500, 20000)]
    new_names_ext = ['_train.hdf5', '_valid.hdf5', '_test.hdf5']
    new_filelist = zip(new_names_ext, new_sizes)

    filelist = generate_uneven_filelist(
        args.prefix or os.path.splitext(args.input)[0], new_filelist)

    print("\nSaving output files:\n")
Пример #2
0
    data = hdf5.load(filename)

    print "\nThe following datasets were found in %s:\n" % filename
    msg.list_dataset(data)

    check.key_exists(match, data, filename)

    if keys:
        msg.info("Using only: " + keys)
        update_data(data, [k.strip() for k in keys.split(',')], args.match)

    return data

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    data1 = get_data(args.input1, args.match, args.keys1)
    data2 = get_data(args.input2, args.match, args.keys2)

    check.different_keys(data1, data2, args.match)

    data = merge_data(data1, data2, args.match,
                      args.print_warnings, args.show_progress)

    print "\nThe following datasets will be saved in %s:\n" % args.output
    msg.list_dataset(data)

    hdf5.save(args.output, data)
Пример #3
0
        metavar="[path/to/filename]", required=True,
        help="path to output hdf5 file"
    )
    required.add_argument(
        "--imgnames", action="store", dest="imgnames",
        metavar="[imgs tensor]", required=True,
        help="name of images tensor to be rescaled"
    )

    optional = parser.add_argument_group("optional arguments")
    optional.add_argument(
        "--low", action="store", dest="low", metavar="[lower bound]",
        required=False, help="lower float bound", default=0.05
    )
    optional.add_argument(
        "--high", action="store", dest="high", metavar="[upper bound]",
        required=False, help="upper float bound", default=0.95
    )

    return parser.parse_args()


if __name__ == '__main__':
    msg.box("HDF5 MANIPULATOR: UINT8FLOAT_RESCALER")
    args = get_args_conv_rescale()
    print(args)
    data = hdf5.load(args.input)
    data[args.imgnames] = data[args.imgnames] * \
        (args.high - args.low) / 255.0 + args.low
    hdf5.save(args.output, data)
Пример #4
0
"""
import os
import sys
from collections import OrderedDict
from parser import get_args_split as parser
import msg
import hdf5
import h5py
import check
from combine_big import load
from split import generate_filelist
from split import save_filelist

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: SPLIT")

    args = parser()

    data = load(args.input)

    filelist = generate_filelist(
        args.prefix or os.path.splitext(args.input)[0],
        check.get_size(data), int(args.size))

    print "\nSaving output files:\n"

    for f, r in filelist.iteritems():
        msg.list_fileinfo(f, r)
        hdf5.save_subset_big(f, data, r[0], r[1])
Пример #5
0
    output -- output file
    keys -- keys to be copied
    """

    for k in keys:
        if k not in source:
            msg.warning("%s requested, but not found." % k)
            continue
        else:
            msg.info("Copying %s" % k)
            source.copy(k, output)


if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: EXTRACT")

    args = parser()
    f = h5py.File(args.input, 'r')
    o = h5py.File(args.output, 'w')

    print("The following datasets were found in %s:\n" % args.input)
    msg.list_dataset(f)

    copy(f, o, [k.strip() for k in args.keys.split(',')])

    if len(o):
        print("\nThe following dataset were saved in %s:\n" % args.output)
        msg.list_dataset(o)
    else:
        msg.warning("No datasets were copied.")
Пример #6
0
    for f in filelist:
        print "\t - %s" % f

    data = OrderedDict()
    attrs = OrderedDict()

    for f in filelist:
        data[f], attrs[f] = hdf5.load(f)

    hdf5.save(outputfile, *merge_data(data, attrs))

    msg.info("Done")


if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: MERGE")

    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--input', type=str, nargs='+', help='input hdf5 list')
    parser.add_argument('--output', type=str, help='output hdf5 file')
    args = parser.parse_args()

    filelist = args.input

    if not filelist:
        msg.error("No files matching --input were found.")
        sys.exit(1)

    merge_data_filenames(filelist, args.output)
Пример #7
0
    """

    data = h5py.File(source, 'r')

    print "\nAdding entries from %(f)s in [%(i)d:%(f)d]" \
          % {"f": source, "i": range[0], "f": range[1]}
    check.check_keys(data, output)
    check.check_shapes(data, output)
    for key in data:
        output[key][range[0]:range[1]] = data[key]

    data.close()

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: MERGE")

    args = parser()

    filelist = get_filelist([f.strip() for f in args.input_files.split(',')])

    if not filelist:
        msg.error("No files matching --input were found.")
        sys.exit(1)

    print "The following input files were found:\n"

    for f in filelist:
        print "\t - %s" % f

    output = h5py.File(args.output, 'w')
Пример #8
0
    required = parser.add_argument_group("required arguments")
    required.add_argument("--input",
                          action="store",
                          dest="input",
                          metavar="[path/to/filename]",
                          required=True,
                          help="path to input hdf5 file")
    required.add_argument("--output",
                          action="store",
                          dest="output",
                          metavar="[path/to/filename]",
                          required=True,
                          help="path to output hdf5 file")
    required.add_argument("--tensor",
                          action="store",
                          dest="tensor",
                          metavar="[imgs tensor]",
                          required=True,
                          help="name of tensor to be changed")

    return parser.parse_args()


if __name__ == '__main__':
    msg.box("HDF5 MANIPULATOR: TFTNSR2PTTNSR")
    args = get_args_tnsr()
    data = hdf5.load(args.input)
    data[args.tensor] = np.moveaxis(data[args.tensor], -1, 1)
    hdf5.save(args.output, data)
Пример #9
0
    output -- output file
    keys -- keys to be copied
    """

    for k in keys:
        if k not in source:
            msg.warning("%s requested, but not found." % k)
            continue
        else:
            msg.info("Copying %s" % k)
            source.copy(k, output)


if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: EXTRACT")

    args = parser()
    f = h5py.File(args.input, 'r')
    o = h5py.File(args.output, 'w')

    print "The following datasets were found in %s:\n" % args.input
    msg.list_dataset(f)

    copy(f, o, [k.strip() for k in args.keys.split(',')])

    if len(o):
        print "\nThe following dataset were saved in %s:\n" % args.output
        msg.list_dataset(o)
    else:
        msg.warning("No datasets were copied.")