Exemplo n.º 1
0
def ingest_mnist(root_dir, overwrite=False):
    '''
    Save MNIST dataset as PNG files
    '''
    out_dir = os.path.join(root_dir, 'mnist')

    set_names = ('train', 'val')
    manifest_files = [os.path.join(out_dir, setn + '-index.csv') for setn in set_names]

    if (all([os.path.exists(manifest) for manifest in manifest_files]) and not overwrite):
        return manifest_files

    dataset = {k: s for k, s in zip(set_names, MNIST(out_dir, False).load_data())}

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(os.path.join(out_dir, 'labels', str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(os.path.join(out_dir, setn, str(lbl) + '/'))

    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))):  # noqa pylint: disable=zip-builtin-not-iterating
            img_path = os.path.join(img_paths[setn][lbl], str(idx) + '.png')
            im = Image.fromarray(img)
            im.save(os.path.join(out_dir, img_path), format='PNG')
            records.append((os.path.relpath(img_path, out_dir),
                            os.path.relpath(lbl_paths[lbl], out_dir)))
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Exemplo n.º 2
0
def ingest_cifar10(out_dir, padded_size, overwrite=False):
    """
    Save CIFAR-10 dataset as PNG files
    """
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()
    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    set_names = ('train', 'val')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'train.cfg')
    log_file = os.path.join(out_dir, 'train.log')
    manifest_list_cfg = ', '.join(
        [k + ':' + v for k, v in zip(set_names, manifest_files)])

    with open(cfg_file, 'w') as f:
        f.write('manifest = [{}]\n'.format(manifest_list_cfg))
        f.write('manifest_root = {}\n'.format(out_dir))
        f.write('log = {}\n'.format(log_file))
        f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n')
        f.write('backend = gpu\nbatch_size = 64\n')

    if all([os.path.exists(manifest)
            for manifest in manifest_files]) and not overwrite:
        return manifest_files

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(
            os.path.join(out_dir, 'labels',
                         str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(
                os.path.join(out_dir, setn,
                             str(lbl) + '/'))

    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))):
            img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png')
            im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean')
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            # im.save(os.path.join(out_dir, img_path), format='PNG')
            im.save(img_path, format='PNG')
            records.append((os.path.relpath(img_path, out_dir),
                            os.path.relpath(lbl_paths[lbl[0]], out_dir)))
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Exemplo n.º 3
0
def dump_metrics(dump_file, experiment_file, start_time, elapsed_time,
                 backend_name, metrics, field_sep="\t"):
    """
    Write or append collected metric values to the specified flat file.

    Arguments:
        dump_file (str): path to file to write. Will be created if doesn't
                         exist, or appended to (without header if it does)
        experiment_file (str): path to yaml file used to run this experiment
        start_time (str): date and time at which experiment was started.
        elapsed_time (float): time taken to run the experiment.
        metrics (dict): Collection of metric values, as returned from
                        FitPredictErrorExperiment.run() call.
        field_sep (str, optional): string used to separate each field in
                                   dump_file.  Defaults to tab character.
    """
    if dump_file is None or dump_file == '':
        df = sys.stdout()
    elif not os.path.exists(dump_file) or os.path.getsize(dump_file) == 0:
        ensure_dirs_exist(dump_file)
        df = open(dump_file, 'w')
        metric_names = []
        if isinstance(metrics, dict):
            metric_names = ["%s-%s" % (metric.lower(), dset.lower())
                            for metric in sorted(metrics.keys())
                            for dset in sorted(metrics[metric].keys())]
        df.write(field_sep.join(["host", "architecture", "os",
                                 "os_kernel_release", "neon_version",
                                 "backend",
                                 "yaml_name", "yaml_sha1", "start_time",
                                 "elapsed_time"] + metric_names) + "\n")
    else:
        df = open(dump_file, 'a')
    info = os.uname()
    trunc_exp_name = ("..." + os.path.sep +
                      os.path.dirname(experiment_file).split(os.path.sep)[-1] +
                      os.path.sep +
                      os.path.basename(experiment_file))
    # TODO: better handle situation where metrics recorded differ from those
    # already in file
    metric_vals = []
    if isinstance(metrics, dict):
        metric_vals = ["%.5f" % metrics[metric][dset] for metric in
                       sorted(metrics.keys()) for dset in
                       sorted(metrics[metric].keys())]
    df.write(field_sep.join([x.replace("\t", " ") for x in
                             [info[1], info[4], info[0], info[2],
                              neon.__version__, backend_name, trunc_exp_name,
                              hashlib.sha1(open(experiment_file,
                                                'rb').read()).hexdigest(),
                              start_time, "%.3f" % elapsed_time] +
                             metric_vals]) + "\n")
    df.close()
Exemplo n.º 4
0
 def test_dir_creation(self):
     test_dir = os.path.join('.', 'temp_dir')
     test_file = os.path.join(test_dir, 'temp_file.txt')
     assert not os.path.exists(test_file)
     assert not os.path.isdir(test_dir)
     ensure_dirs_exist(test_file)
     try:
         assert os.path.isdir(test_dir)
     finally:
         try:
             os.rmdir(test_dir)
         except OSError:
             pass
Exemplo n.º 5
0
 def test_dir_creation(self):
     test_dir = os.path.join('.', 'temp_dir')
     test_file = os.path.join(test_dir, 'temp_file.txt')
     assert not os.path.exists(test_file)
     assert not os.path.isdir(test_dir)
     ensure_dirs_exist(test_file)
     try:
         assert os.path.isdir(test_dir)
     finally:
         try:
             os.rmdir(test_dir)
         except OSError:
             pass
Exemplo n.º 6
0
 def test_empty_dir_path(self):
     test_file = ('temp_file.txt')
     assert not os.path.exists(test_file)
     assert not os.path.isdir(test_file)
     ensure_dirs_exist(test_file)
     try:
         assert not os.path.isdir(test_file)
         assert not os.path.exists(test_file)
     finally:
         try:
             os.rmdir(test_file)
         except OSError:
             pass
Exemplo n.º 7
0
    def _target_filename(self, target):
        """
        Return a filename of a file containing a binary representation of
        target.  If no such file exists, make one.
        """
        target_filename = self._target_filenames.get(target)
        if target_filename is None:
            target_filename = os.path.join(self.out_dir, 'labels', str(target) + '.txt')
            ensure_dirs_exist(target_filename)
            np.savetxt(target_filename, [target], '%d')
            self._target_filenames[target] = target_filename

        return target_filename
Exemplo n.º 8
0
 def test_empty_dir_path(self):
     test_file = ('temp_file.txt')
     assert not os.path.exists(test_file)
     assert not os.path.isdir(test_file)
     ensure_dirs_exist(test_file)
     try:
         assert not os.path.isdir(test_file)
         assert not os.path.exists(test_file)
     finally:
         try:
             os.rmdir(test_file)
         except OSError:
             pass
Exemplo n.º 9
0
    def _target_filename(self, target):
        """
        Return a filename of a file containing a binary representation of
        target.  If no such file exists, make one.
        """
        target_filename = self._target_filenames.get(target)
        if target_filename is None:
            target_filename = os.path.join(self.out_dir, 'labels',
                                           str(target) + '.txt')
            ensure_dirs_exist(target_filename)
            np.savetxt(target_filename, [target], '%d')
            self._target_filenames[target] = target_filename

        return target_filename
Exemplo n.º 10
0
Arquivo: data.py Projeto: rlugojr/neon
def ingest_cifar10(out_dir, padded_size, overwrite=False):
    '''
    Save CIFAR-10 dataset as PNG files
    '''
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()
    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    set_names = ('train', 'val')
    manifest_files = [os.path.join(out_dir, setn + '-index.csv') for setn in set_names]

    cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train.cfg')
    log_file = os.path.join(out_dir, 'train.log')
    manifest_list_cfg = ', '.join([k+':'+v for k, v in zip(set_names, manifest_files)])

    with open(cfg_file, 'w') as f:
        f.write('manifest = [{}]\n'.format(manifest_list_cfg))
        f.write('manifest_root = {}\n'.format(out_dir))
        f.write('log = {}\n'.format(log_file))
        f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n')
        f.write('backend = gpu\nbatch_size = 64\n')

    if (all([os.path.exists(manifest) for manifest in manifest_files]) and not overwrite):
        return manifest_files

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(os.path.join(out_dir, 'labels', str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(os.path.join(out_dir, setn, str(lbl) + '/'))

    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))):
            img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png')
            im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean')
            im = Image.fromarray(np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            im.save(os.path.join(out_dir, img_path), format='PNG')
            records.append((os.path.relpath(img_path, out_dir),
                            os.path.relpath(lbl_paths[lbl[0]], out_dir)))
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Exemplo n.º 11
0
def ingest_cifar10(out_dir, overwrite=False):
    '''
    Save CIFAR-10 dataset as PNG files
    '''
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()

    set_names = ('train', 'val')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    if (all([os.path.exists(manifest) for manifest in manifest_files])
            and not overwrite):
        return manifest_files

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(
            os.path.join(out_dir, 'labels',
                         str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(
                os.path.join(out_dir, setn,
                             str(lbl) + '/'))

    np.random.seed(0)
    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in tqdm(enumerate(zip(*dataset[setn]))):
            img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png')
            im = img.reshape((3, 32, 32))
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            im.save(img_path, format='PNG')
            records.append((img_path, lbl_paths[lbl[0]]))

        np.random.shuffle(records)
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Exemplo n.º 12
0
                           n_extra_layers=4,
                           batch_norm=True,
                           dis_iters=5,
                           wgan_param_clamp=0.01,
                           wgan_train_sched=True)

# setup optimizer
optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8)

# setup data provider
train = make_loader(args.manifest['train'], args.manifest_root, model.be,
                    args.subset_pct, random_seed)

# configure callbacks
callbacks = Callbacks(model, **args.callback_args)
fdir = ensure_dirs_exist(
    os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/'))
fname = os.path.splitext(os.path.basename(__file__))[0] +\
    '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']'
im_args = dict(filename=os.path.join(fdir, fname),
               hw=64,
               num_samples=args.batch_size,
               nchan=3,
               sym_range=True)
callbacks.add_callback(GANPlotCallback(**im_args))
callbacks.add_callback(GANCostCallback())

# model fit
model.fit(train,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
Exemplo n.º 13
0
def ingest_kitti(input_dir, out_dir, train_percent=90, overwrite=False):
    """
    Ingests the KITTI dataset. Peforms the following ops:
    0. Unzips the files into output directory.
    1. Convert annotations to json format
    2. Split the training data into train and validation sets
    3. Write manifest file
    4. Write configuration file

    Arguments:
        input_dir (string): path to folder with KITTI zip files.
        out_dir (string): path to unzip KITTI data
        train_percent (float): percent of data to use for training.
        overwrite (bool): overwrite existing files
    """

    # define paths
    data_dir = ensure_dirs_exist(os.path.join(out_dir, 'kitti'))
    train_manifest = os.path.join(data_dir, 'train.csv')
    val_manifest = os.path.join(data_dir, 'val.csv')

    if not overwrite and os.path.exists(train_manifest) and os.path.exists(val_manifest):
        print("""Found existing manfiest files, skipping ingest,
              Use --overwrite to rerun ingest anyway.""")
        return (train_manifest, val_manifest)

    # unzip files to output directory
    zipfiles = [os.path.join(input_dir, zipfile) for
                zipfile in ['data_object_image_2.zip', 'data_object_label_2.zip']]

    for file in zipfiles:
        with ZipFile(file, 'r') as zf:
            print("Extracting {} to {}".format(file, data_dir))
            zf.extractall(data_dir)

    # get list of images
    img_path = os.path.join(data_dir, 'training', 'image_2')
    annot_path = os.path.join(data_dir, 'training', 'label_2')

    images = [os.path.splitext(os.path.basename(im))[0] for
              im in glob.glob(os.path.join(img_path, '*.png'))]

    print "Found {} images".format(len(images))
    assert len(images) > 0, "Did not found any images. Check your input_dir."

    # for each image, convert the annotation to json

    # create folder names for annotations
    annot_save_dir = ensure_dirs_exist(os.path.join(data_dir, 'training', 'label_2-json/'))
    annot_save_dir_difficult = ensure_dirs_exist(os.path.join(
                                   data_dir, 'training', 'label_2-json-difficult/'))

    print("Writing annotations to: {} and {}".format(annot_save_dir, annot_save_dir_difficult))
    for im in tqdm(images):
        path = os.path.join(annot_path, im + '.txt')
        im_path = os.path.join(img_path, im + '.png')

        assert os.path.exists(im_path)

        out_path = os.path.join(annot_save_dir, im + '.json')
        convert_annot_to_json(path, im_path, out_path, difficult=False)

        out_path = os.path.join(annot_save_dir_difficult, im + '.json')
        convert_annot_to_json(path, im_path, out_path, difficult=True)

    # shuffle files and split into training and validation set.
    np.random.seed(0)
    np.random.shuffle(images)

    train_count = (len(images) * train_percent) // 100
    train = images[:train_count]
    val = images[train_count:]

    # write manifest files
    create_manifest(train_manifest, train, annot_save_dir, img_path, data_dir)
    create_manifest(val_manifest, val, annot_save_dir_difficult, img_path, data_dir)

    # write configuration file
    config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'kitti.cfg')
    with open(config_path, 'w') as f:
        f.write('manifest = [train:{}, val:{}]\n'.format(train_manifest, val_manifest))
        f.write('manifest_root = {}\n'.format(data_dir))
        f.write('epochs = 14\n')
        f.write('height = 375\n')
        f.write('width = 1242\n')
        f.write('batch_size = 1\n')
    print("Wrote config file to: {}".format(config_path))
Exemplo n.º 14
0
random_seed = args.rng_seed if args.rng_seed else 0

# load up the mnist data set, padding images to size 32
dataset = MNIST(path=args.data_dir, sym_range=True, size=32, shuffle=True)
train = dataset.train_iter

# create a GAN
model, cost = create_model(dis_model=args.dmodel, gen_model=args.gmodel,
                           cost_type='wasserstein', noise_type='normal',
                           im_size=32, n_chan=1, n_noise=128,
                           n_gen_ftr=args.n_gen_ftr, n_dis_ftr=args.n_dis_ftr,
                           depth=4, n_extra_layers=4,
                           batch_norm=True, dis_iters=5,
                           wgan_param_clamp=0.01, wgan_train_sched=True)

# setup optimizer
optimizer = RMSProp(learning_rate=2e-4, decay_rate=0.99, epsilon=1e-8)

# configure callbacks
callbacks = Callbacks(model, **args.callback_args)
fdir = ensure_dirs_exist(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/'))
fname = os.path.splitext(os.path.basename(__file__))[0] +\
    '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']'
im_args = dict(filename=os.path.join(fdir, fname), hw=32,
               num_samples=args.batch_size, nchan=1, sym_range=True)
callbacks.add_callback(GANPlotCallback(**im_args))
callbacks.add_callback(GANCostCallback())

# model fit
model.fit(train, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Exemplo n.º 15
0
    def ingest_lsun(self,
                    category,
                    dset,
                    lbl_map,
                    tag='latest',
                    overwrite=False,
                    png_conv=False):
        """
        Save LSUN dataset as WEBP or PNG files and generate config and log files
        Arguments:
            category (str): LSUN category
            dset (str): dataset, "train", "val", or "test"
            lbl_map (dict(str:int)): maps a category to an integer
            overwrite (bool): whether to overwrite existing data
            png_conv (bool): whether to convert to PNG images
        """
        self.download_lsun(category, dset, tag=tag, overwrite=overwrite)

        dpath = 'test' if dset == 'test' else '{0}_{1}'.format(category, dset)
        dpath = os.path.join(self.path, dpath)
        manifest_file = '{}_index.csv'.format(dpath)

        if os.path.exists(manifest_file) and not overwrite:
            print("LSUN {0} {1} dataset ingested.".format(category, dset))
            print("Manifest file is: " + manifest_file)
            return manifest_file
        if os.path.exists(dpath):
            shutil.rmtree(dpath)
        if os.path.exists(manifest_file):
            os.remove(manifest_file)
        os.makedirs(dpath)

        lbl_paths = dict()
        for lbl in lbl_map:
            lbl_paths[lbl] = ensure_dirs_exist(
                os.path.join(self.path, 'labels', lbl + '.txt'))
            np.savetxt(lbl_paths[lbl], [lbl_map[lbl]], fmt='%d')

        print('Exporting images...')
        env = lmdb.open(dpath + '_lmdb',
                        map_size=MAP_SIZE,
                        max_readers=MAX_NUM_INGEST_PROC,
                        readonly=True)
        count, records = 0, []
        with env.begin(write=False) as txn:
            cursor = txn.cursor()
            for key, val in tqdm(cursor):
                image_out_path = os.path.join(dpath, key + '.webp')
                with open(image_out_path, 'w') as fp:
                    fp.write(val)
                count += 1
                if png_conv:  # in case WEBP is not supported, extra step of conversion to PNG
                    image_out_path_ = image_out_path
                    image_out_path = os.path.join(dpath, key + '.png')
                    im = Image.open(image_out_path_).convert('RGB')
                    im.save(image_out_path, 'png')
                    os.remove(image_out_path_)
                records.append((os.path.relpath(image_out_path, self.path),
                                os.path.relpath(lbl_paths[category],
                                                self.path)))
            np.savetxt(manifest_file, records, fmt='%s\t%s')
        print("LSUN {0} {1} dataset ingested.".format(category, dset))
        print("Manifest file is: " + manifest_file)
        return manifest_file
Exemplo n.º 16
0
def ingest_lsun(lsun_dir,
                category,
                dset,
                lbl_map,
                overwrite=False,
                png_conv=False):
    """
    Save LSUN dataset as WEBP or PNG files and generate config and log files

    Arguments:
        lsun_dir (str): LSUN data directory
        category (str): LSUN category
        dset (str): dataset, "train", "val", or "test"
        lbl_map (dict(str:int)): maps a category to an integer
        overwrite (bool): whether to overwrite existing data
        png_conv (bool): whether to convert to PNG images
    """
    cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'train.cfg')
    log_file = os.path.join(lsun_dir, 'train.log')
    dpath = 'test' if dset == 'test' else '{0}_{1}'.format(category, dset)
    dpath = os.path.join(lsun_dir, dpath)
    manifest_file = '{}_index.csv'.format(dpath)

    with open(cfg_file, 'w') as f:
        f.write('manifest = [{}:{}]\n'.format(dset, manifest_file))
        f.write('manifest_root = {}\n'.format(lsun_dir))
        f.write('log = {}\n'.format(log_file))
        f.write('epochs = 8\nrng_seed = 0\nverbose = True\neval_freq = 0\n')
        f.write('backend = gpu\nbatch_size = 64\n')
    if os.path.exists(manifest_file) and not overwrite:
        print("LSUN {0} {1} dataset ingested.".format(category, dset))
        print("Manifest file is: " + manifest_file)
        return manifest_file
    if os.path.exists(dpath):
        shutil.rmtree(dpath)
    if os.path.exists(manifest_file):
        os.remove(manifest_file)
    os.makedirs(dpath)

    lbl_paths = dict()
    for lbl in lbl_map:
        lbl_paths[lbl] = ensure_dirs_exist(
            os.path.join(lsun_dir, 'labels', lbl + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl_map[lbl]], fmt='%d')

    print('Exporting images...')
    env = lmdb.open(dpath + '_lmdb',
                    map_size=MAP_SIZE,
                    max_readers=MAX_NUM_INGEST_PROC,
                    readonly=True)
    count, records = 0, []
    with env.begin(write=False) as txn:
        cursor = txn.cursor()
        for key, val in tqdm(cursor):
            image_out_path = os.path.join(dpath, key + '.webp')
            with open(image_out_path, 'w') as fp:
                fp.write(val)
            count += 1
            if png_conv:  # in case WEBP is not supported, extra step of conversion to PNG
                image_out_path_ = image_out_path
                image_out_path = os.path.join(dpath, key + '.png')
                im = Image.open(image_out_path_).convert('RGB')
                im.save(image_out_path, 'png')
                os.remove(image_out_path_)
            records.append((os.path.relpath(image_out_path, lsun_dir),
                            os.path.relpath(lbl_paths[category], lsun_dir)))
        np.savetxt(manifest_file, records, fmt='%s,%s')
    print("LSUN {0} {1} dataset ingested.".format(category, dset))
    print("Manifest file is: " + manifest_file)
    return manifest_file
Exemplo n.º 17
0
def dump_metrics(dump_file,
                 experiment_file,
                 start_time,
                 elapsed_time,
                 backend_name,
                 metrics,
                 field_sep="\t"):
    """
    Write or append collected metric values to the specified flat file.

    Arguments:
        dump_file (str): path to file to write. Will be created if doesn't
                         exist, or appended to (without header if it does)
        experiment_file (str): path to yaml file used to run this experiment
        start_time (str): date and time at which experiment was started.
        elapsed_time (float): time taken to run the experiment.
        metrics (dict): Collection of metric values, as returned from
                        FitPredictErrorExperiment.run() call.
        field_sep (str, optional): string used to separate each field in
                                   dump_file.  Defaults to tab character.
    """
    if dump_file is None or dump_file == '':
        df = sys.stdout()
    elif not os.path.exists(dump_file) or os.path.getsize(dump_file) == 0:
        ensure_dirs_exist(dump_file)
        df = open(dump_file, 'w')
        metric_names = []
        if isinstance(metrics, dict):
            metric_names = [
                "%s-%s" % (metric.lower(), dset.lower())
                for metric in sorted(metrics.keys())
                for dset in sorted(metrics[metric].keys())
            ]
        df.write(
            field_sep.join([
                "host", "architecture", "os", "os_kernel_release",
                "neon_version", "backend", "yaml_name", "yaml_sha1",
                "start_time", "elapsed_time"
            ] + metric_names) + "\n")
    else:
        df = open(dump_file, 'a')
    info = os.uname()
    trunc_exp_name = ("..." + os.path.sep +
                      os.path.dirname(experiment_file).split(os.path.sep)[-1] +
                      os.path.sep + os.path.basename(experiment_file))
    # TODO: better handle situation where metrics recorded differ from those
    # already in file
    metric_vals = []
    if isinstance(metrics, dict):
        metric_vals = [
            "%.5f" % metrics[metric][dset] for metric in sorted(metrics.keys())
            for dset in sorted(metrics[metric].keys())
        ]
    df.write(
        field_sep.join([
            x.replace("\t", " ") for x in [
                info[1], info[4], info[0], info[2], neon.__version__,
                backend_name, trunc_exp_name,
                hashlib.sha1(open(experiment_file,
                                  'rb').read()).hexdigest(), start_time,
                "%.3f" % elapsed_time
            ] + metric_vals
        ]) + "\n")
    df.close()
Exemplo n.º 18
0
def ingest_kitti(input_dir, out_dir, train_percent=90, overwrite=False):
    """
    Ingests the KITTI dataset. Peforms the following ops:
    0. Unzips the files into output directory.
    1. Convert annotations to json format
    2. Split the training data into train and validation sets
    3. Write manifest file
    4. Write configuration file

    Arguments:
        input_dir (string): path to folder with KITTI zip files.
        out_dir (string): path to unzip KITTI data
        train_percent (float): percent of data to use for training.
        overwrite (bool): overwrite existing files
    """

    # define paths
    data_dir = ensure_dirs_exist(os.path.join(out_dir, 'kitti'))
    train_manifest = os.path.join(data_dir, 'train.csv')
    val_manifest = os.path.join(data_dir, 'val.csv')

    if not overwrite and os.path.exists(train_manifest) and os.path.exists(val_manifest):
        print("""Found existing manfiest files, skipping ingest,
              Use --overwrite to rerun ingest anyway.""")
        return (train_manifest, val_manifest)

    # unzip files to output directory
    zipfiles = [os.path.join(input_dir, zipfile) for
                zipfile in ['data_object_image_2.zip', 'data_object_label_2.zip']]

    for file in zipfiles:
        with ZipFile(file, 'r') as zf:
            print("Extracting {} to {}".format(file, data_dir))
            zf.extractall(data_dir)

    # get list of images
    img_path = os.path.join(data_dir, 'training', 'image_2')
    annot_path = os.path.join(data_dir, 'training', 'label_2')

    images = [os.path.splitext(os.path.basename(im))[0] for
              im in glob.glob(os.path.join(img_path, '*.png'))]

    print("Found {} images".format(len(images)))
    assert len(images) > 0, "Did not found any images. Check your input_dir."

    # for each image, convert the annotation to json

    # create folder names for annotations
    annot_save_dir = ensure_dirs_exist(os.path.join(data_dir, 'training', 'label_2-json/'))
    annot_save_dir_difficult = ensure_dirs_exist(os.path.join(
                                   data_dir, 'training', 'label_2-json-difficult/'))

    print("Writing annotations to: {} and {}".format(annot_save_dir, annot_save_dir_difficult))
    for im in tqdm(images):
        path = os.path.join(annot_path, im + '.txt')
        im_path = os.path.join(img_path, im + '.png')

        assert os.path.exists(im_path)

        out_path = os.path.join(annot_save_dir, im + '.json')
        convert_annot_to_json(path, im_path, out_path, difficult=False)

        out_path = os.path.join(annot_save_dir_difficult, im + '.json')
        convert_annot_to_json(path, im_path, out_path, difficult=True)

    # shuffle files and split into training and validation set.
    np.random.seed(0)
    np.random.shuffle(images)

    train_count = (len(images) * train_percent) // 100
    train = images[:train_count]
    val = images[train_count:]

    # write manifest files
    create_manifest(train_manifest, train, annot_save_dir, img_path, data_dir)
    create_manifest(val_manifest, val, annot_save_dir_difficult, img_path, data_dir)

    # write configuration file
    config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'kitti.cfg')
    with open(config_path, 'w') as f:
        f.write('manifest = [train:{}, val:{}]\n'.format(train_manifest, val_manifest))
        f.write('manifest_root = {}\n'.format(data_dir))
        f.write('epochs = 14\n')
        f.write('height = 375\n')
        f.write('width = 1242\n')
        f.write('batch_size = 1\n')
    print("Wrote config file to: {}".format(config_path))