示例#1
0
def test_augmentation_data_generator(tmpdir):
    dist = DistributionCollection(examplary_tag_distribution())
    dset_fname = str(tmpdir.join("dset.hdf5"))
    samples = 6000
    dset = DistributionHDF5Dataset(dset_fname,
                                   nb_samples=samples,
                                   distribution=dist)
    labels = dist.sample(samples)
    labels = dist.normalize(labels)
    fake = np.random.random((samples, 1, 8, 8))
    discriminator = np.random.random((samples, 1))
    dset.append(labels=labels, fake=fake, discriminator=discriminator)
    dset.close()

    dset = DistributionHDF5Dataset(dset_fname)
    bs = 32
    names = ['labels', 'fake']
    assert 'labels' in next(dset.iter(bs, names))
    assert next(dset.iter(bs))['labels'].dtype.names == tuple(dist.names)

    dset_iters = [lambda bs: bit_split(dataset_iterator(dset, bs))]
    data_gen = lambda bs: zip_dataset_iterators(dset_iters, bs)
    label_names = ['bit_0', 'bit_11', 'x_rotation']
    aug_gen = augmentation_data_generator(data_gen, 'fake', label_names)
    outs = next(aug_gen(bs))
    assert len(outs[0]) == 32
    assert len(outs[1]) == len(label_names)

    gen = aug_gen(bs)
    for i, batch in enumerate(gen):
        if i == 2 * samples // bs:
            break
        assert batch is not None
        assert batch[0].shape == (bs, 1, 8, 8)
        assert len(batch[1]) == len(label_names)
示例#2
0
    def data_generator_factory(self):
        datasets = [
            DistributionHDF5Dataset(fname) for fname in self.train_sets
        ]
        dist = None
        for dset in datasets:
            if dist is None:
                dist = dset.get_tag_distribution()
            else:
                if dist != dset.get_tag_distribution():
                    raise Exception("Distribution of datasets must match")
        label_output_sizes = self.get_label_output_sizes()
        all_label_names = ['bit_{}'.format(i) for i in range(12)] + \
            [n for n, _ in label_output_sizes]
        dataset_names = ['labels', self.data_name]
        if 'discriminator' in list(dset.keys()):
            dataset_names.append('discriminator')
        dataset_iterators = [
            lambda bs: bit_split(
                dataset_iterator(dset, bs, dataset_names, self.
                                 discriminator_threshold, self.shuffle_data))
            for dset in datasets
        ]

        augmentation = self.augmentation()

        def wrapper(bs):
            for batch in zip_dataset_iterators(dataset_iterators, bs):
                data = batch[self.data_name]
                labels = [batch[l] for l in all_label_names]
                yield augmentation(data), labels

        return wrapper
示例#3
0
def run(gt_file,
        videos,
        images,
        visualize_debug,
        output,
        fix_utc_2014,
        nb_bits=12):
    """
    Converts bb_binary ground truth Cap'n Proto files to hdf5 files and
    extracts the corresponding rois from videos or images.
    """
    def get_filenames(f):
        if f is None:
            return []
        else:
            return [line.rstrip('\n') for line in f.readlines()]

    gen_factory = FrameGeneratorFactory(get_filenames(videos),
                                        get_filenames(images))
    if os.path.exists(output):
        os.remove(output)

    distribution = DistributionCollection([('bits', Bernoulli(), nb_bits)])
    dset = DistributionHDF5Dataset(output, distribution)
    camIdxs = []
    periods = []
    for fname in gt_file:
        fc = load_frame_container(fname)
        camIdx, start_dt, end_dt = parse_video_fname(fname)
        if fix_utc_2014 and start_dt.year == 2014:
            start_dt -= timedelta(hours=2)
        gt_frames = []
        gen = gen_factory.get_generator(camIdx, start_dt)
        for frame, (video_frame, video_filename) in zip(fc.frames, gen):
            gt = {}
            np_frame = convert_frame_to_numpy(frame)
            rois, mask, positions = extract_gt_rois(np_frame, video_frame,
                                                    start_dt)
            for name in np_frame.dtype.names:
                gt[name] = np_frame[name][mask]
            bits = [int_id_to_binary(id)[::-1] for id in gt["decodedId"]]
            gt["bits"] = 2 * np.array(bits, dtype=np.float) - 1
            gt["tags"] = 2 * (rois / 255.).astype(np.float16) - 1
            gt['filename'] = os.path.basename(video_filename)
            gt['camIdx'] = camIdx
            gt_frames.append(gt)
            print('.', end='', flush=True)
        print()
        gt_period = GTPeriod(camIdx, start_dt, end_dt, fname, gt_frames)

        periods.append(
            [int(gt_period.start.timestamp()),
             int(gt_period.end.timestamp())])
        camIdxs.append(gt_period.camIdx)
        append_gt_to_hdf5(gt_period, dset)

    dset.attrs['periods'] = np.array(periods)
    dset.attrs['camIdxs'] = np.array(camIdxs)
    visualize_detection_tiles(dset, os.path.splitext(output)[0])
    dset.close()
示例#4
0
def run(g_weights_fname, d_weights_fname, selected_outputs, nb_samples,
        out_fname):
    generator = load_model(g_weights_fname, render_gan_custom_objects())
    discriminator = load_model(d_weights_fname, render_gan_custom_objects())
    generator._make_predict_function()
    discriminator._make_predict_function()
    dist_json = get_hdf5_attr(g_weights_fname, 'distribution').decode('utf-8')
    dist = diktya.distributions.load_from_json(dist_json)
    os.makedirs(os.path.dirname(out_fname), exist_ok=True)
    dset = DistributionHDF5Dataset(out_fname,
                                   mode='w',
                                   nb_samples=nb_samples,
                                   distribution=dist)
    batch_size = 100
    avialable_datasets = [
        name for name in generator.output_names if name != 'labels'
    ]
    print("Avialable outputs: " + ", ".join(avialable_datasets))
    generator_predict = predict_wrapper(
        lambda x: generator.predict(x, batch_size), generator.output_names)

    def sample_generator():
        z_shape = get_layer(generator.inputs[0]).batch_input_shape
        while True:
            z = np.random.uniform(-1, 1, (batch_size, ) + z_shape[1:])
            outs = generator_predict(z)
            raw_labels = outs.pop('labels')
            pos = 0
            labels = np.zeros(len(raw_labels), dtype=dist.norm_dtype)
            for name, size in dist.norm_nb_elems.items():
                labels[name] = raw_labels[:, pos:pos + size]
                pos += size
            deleted_keys = []
            if selected_outputs != 'all':
                for name in list(outs.keys()):
                    if name not in selected_outputs:
                        del outs[name]
                        deleted_keys.append(name)
            if not outs:
                raise Exception(
                    "Got no outputs. Removed {}. Selected outputs {}".format(
                        deleted_keys, selected_outputs))
            outs['labels'] = labels
            outs['discriminator'] = discriminator.predict(outs['fake'])
            yield outs

    bar = progressbar.ProgressBar(max_value=nb_samples)
    for batch in sample_generator():
        pos = dset.append(**batch)
        bar.update(pos)
        if pos >= nb_samples:
            break
    dset.close()
    print("Saved dataset with fakes and labels to: {}".format(out_fname))
示例#5
0
 def get_label_distributions(self):
     datasets = [
         DistributionHDF5Dataset(fname) for fname in self.train_sets
     ]
     if self._label_distribution is None:
         for dset in datasets:
             if self._label_distribution is None:
                 self._label_distribution = dset.get_tag_distribution()
             else:
                 if self._label_distribution != dset.get_tag_distribution():
                     raise Exception("Distribution of datasets must match")
     return self._label_distribution
示例#6
0
def test_distribution_hdf5_dataset(tmpdir):
    with pytest.raises(Exception):
        DistributionHDF5Dataset(str(
            tmpdir.join('dataset_no_distribution.hdf5')),
                                nb_samples=1000)

    dist = DistributionCollection(examplary_tag_distribution(nb_bits=12))
    labels = dist.sample(32)
    image = np.random.random((32, 1, 8, 8))
    dset = DistributionHDF5Dataset(str(tmpdir.join('dataset.hdf5')),
                                   distribution=dist,
                                   nb_samples=1000)
    dset.append(labels=labels, image=image)
    for name in dist.names:
        assert name in dset
    for batch in dset.iter(batch_size=32):
        for name in dist.names:
            assert name not in batch
        assert 'labels' in batch
        assert batch['labels'].dtype == dist.norm_dtype
        break
示例#7
0
    def data_generator_factory(self, additional_datasets=[], with_batch=False):
        datasets = [
            DistributionHDF5Dataset(fname) for fname in self.train_sets
        ]
        dist = None
        for dset in datasets:
            if dist is None:
                dist = dset.get_tag_distribution()
            else:
                if dist != dset.get_tag_distribution():
                    raise Exception("Distribution of datasets must match")
        label_output_sizes = self.get_label_output_sizes()
        all_label_names = ['bit_{}'.format(i) for i in range(12)] + \
            [n for n, _ in label_output_sizes]
        dataset_names = ['labels'
                         ] + self.iterator_data_names() + additional_datasets
        print("Used datasets: " + str(dataset_names))
        if 'discriminator' in list(dset.keys()):
            dataset_names.append('discriminator')
        dataset_iterators = [
            lambda bs: bit_split(
                dataset_iterator(dset, bs, dataset_names, self.
                                 discriminator_threshold, self.shuffle_data))
            for dset in datasets
        ]

        augmentation = self.augmentation()
        handmade_augmentation = self.get_handmade_augmentation()

        def wrapper(iterator):
            def data_gen(bs):
                for batch in iterator(bs):
                    data = batch[self.data_name]
                    labels = [batch[l] for l in all_label_names]
                    label_mask = [
                        np.ones(l.shape[0], dtype=np.float32) for l in labels
                    ]
                    if self.use_handmade_augmentation:
                        data = handmade_augmentation(batch)

                    to_yield = [augmentation(data), labels, label_mask]
                    if with_batch:
                        to_yield.append(batch)
                    yield to_yield

            return data_gen

        return lambda bs: zip_dataset_iterators(
            list(map(wrapper, dataset_iterators)), bs)
示例#8
0
    def store(self, outputs, fname):
        store_outputs = {}
        for name, arr in outputs.items():
            if name == 'labels':
                labels = arr.view(dtype=self.distribution.norm_dtype)
                store_outputs['labels'] = labels
            else:
                store_outputs[name] = arr
        nb_samples = len(store_outputs['fake'])
        if os.path.exists(fname) and self.overwrite:
            os.remove(fname)
        dset = DistributionHDF5Dataset(fname,
                                       self.distribution,
                                       nb_samples=nb_samples)

        dset.append(**store_outputs)
        dset.close()
示例#9
0
def run(tag_dist, output_fname, force, nb_samples):
    os.makedirs(os.path.dirname(output_fname), exist_ok=True)
    if os.path.exists(output_fname) and force:
        print("Deleted {}".format(output_fname))
        os.remove(output_fname)
    else:
        assert not os.path.exists(output_fname), \
            "File {} already exists. Use --force to override it"
    basename, _ = os.path.splitext(output_fname)
    anit_name = basename + "_anti_{}.png"
    hist_name = basename + "_hist_{}.png"
    plot_anitaliasing(tag_dist, anit_name, 1)
    plot_anitaliasing(tag_dist, anit_name, 2)
    plot_anitaliasing(tag_dist, anit_name, 4)
    plot_anitaliasing(tag_dist, anit_name, 8)

    labels, masks, _ = next(generator(tag_dist, 10000, antialiasing=2))
    for key in labels.dtype.names:
        m = labels[key].mean()
        s = labels[key].std()
        print("{}: {:.3f}, {:.3f}".format(key, m, s))
        assert abs(m) <= 0.03

    for label_name in sorted(set(labels.dtype.names) - set(['bits'])):
        x = labels[label_name]
        plt.hist(x.flatten(), bins=40, normed=True)
        plt.savefig(hist_name.format(label_name))
        plt.clf()

    dset = DistributionHDF5Dataset(output_fname, distribution=tag_dist,
                                   nb_samples=nb_samples, mode='w')
    progbar = Progbar(nb_samples)
    batch_size = min(25000, nb_samples)

    for labels, tags, depth_map in generator(tag_dist, batch_size, antialiasing=4):
        pos = dset.append(labels=labels, tag3d=tags, depth_map=depth_map)
        progbar.update(pos)
        if pos == nb_samples:
            break

    print("Saved tag 3d dataset to: {}".format(output_fname))
    dist_fname = basename + "_distribution.json"
    with open(dist_fname, "w+") as dist_f:
        dist_f.write(tag_dist.to_json())
        print("Saved distribution to: {}".format(dist_fname))
示例#10
0
    def _calculate_predictions():
        from deepdecoder.scripts.train_decoder import save_samples
        name = get_marker(data_set)
        print("Loading GT data: {}".format(data_set))
        h5_truth = h5py.File(data_set)
        decoder = Decoder(tp.model_fname(), {
            'ScaleInTestPhase': ScaleInTestPhase,
            'RandomSwitch': RandomSwitch,
        })
        print(
            "Loaded decoder. Got model with {:.2f} million parameters.".format(
                decoder.model.count_params() / 1e6))
        dist = decoder.distribution
        assert decoder.distribution == tp.get_label_distributions()
        fill_zeros = [(n, s) for (n, s) in dist.norm_nb_elems.items()
                      if n != 'bits']

        batch_size = 128
        gen_factory = tp.truth_generator_factory(h5_truth, fill_zeros,
                                                 tags_name)
        tp.check_generator(gen_factory, os.path.basename(data_set), 400)
        tags, bits, _ = next(gen_factory(20**2))

        nb_bits = 12
        bits = np.array(bits[:12]).T
        normalize_bits = bits.min() == -1
        print(bits.min())
        print(bits.max())
        save_samples(tags[:, 0], bits,
                     tp.outname("evaluate_{}.png".format(name)))
        gen = gen_factory(batch_size)
        bits_true = []
        bits_pred = []
        total_time = 0
        nb_samples = 0
        total_samples = min(len(h5_truth[tags_name]), 100000)

        h5_fname = tp.outname("gt_prediction_{}.hdf5".format(name))
        if os.path.exists(h5_fname):
            os.remove(h5_fname)
        dset_output = DistributionHDF5Dataset(h5_fname,
                                              decoder.distribution,
                                              nb_samples=total_samples)
        with ProgressBar(max_value=total_samples) as pbar:
            for tags, gt, _ in gen:
                if nb_samples + len(tags) > total_samples:
                    break
                nb_samples += len(tags)
                bits = np.array(gt[:nb_bits]).T
                if normalize_bits:
                    bits = bits / 2. + 0.5
                bits_true.append(bits)
                start = time.time()
                outputs = decoder.predict(tags)
                dset_output.append(outputs, tag=tags)
                total_time += time.time() - start
                bits_pred.append(outputs['bits'] / 2. + 0.5)
                pbar.update(nb_samples)

        time_per_sample = total_time / nb_samples
        return np.concatenate(bits_true), np.concatenate(
            bits_pred), time_per_sample
示例#11
0
def run(output_dir, force, tags_3d_hdf5_fname, nb_units, depth, nb_epoch,
        filter_size, project_factor, nb_dense):
    batch_size = 64
    basename = "network_tags3d_n{}_d{}_e{}".format(nb_units, depth, nb_epoch)
    output_basename = os.path.join(output_dir, basename)

    tag_dataset = DistributionHDF5Dataset(tags_3d_hdf5_fname)
    tag_dataset._dataset_created = True
    print("Got {} images from the 3d model".format(tag_dataset.nb_samples))
    weights_fname = output_basename + ".hdf5"
    if os.path.exists(weights_fname) and not force:
        raise OSError("File {} already exists. Use --force to override it")
    elif os.path.exists(weights_fname) and force:
        os.remove(weights_fname)
    os.makedirs(output_dir, exist_ok=True)

    def generator(batch_size):
        for batch in tag_dataset.iter(batch_size):
            labels = []
            for name in batch['labels'].dtype.names:
                labels.append(batch['labels'][name])

            assert not np.isnan(batch['tag3d']).any()
            assert not np.isnan(batch['depth_map']).any()
            labels = np.concatenate(labels, axis=-1)
            yield labels, [batch['tag3d'], batch['depth_map']]

    labels = next(generator(batch_size))[0]
    print("labels.shape ", labels.shape)
    print("labels.dtype ", labels.dtype)
    nb_input = next(generator(batch_size))[0].shape[1]

    x = Input(shape=(nb_input, ))
    tag3d, depth_map = tag3d_network_dense(x,
                                           nb_units=nb_units,
                                           depth=depth,
                                           nb_dense_units=nb_dense)
    g = Model(x, [tag3d, depth_map])
    # optimizer = SGD(momentum=0.8, nesterov=True)
    optimizer = Nadam()

    g.compile(optimizer, loss=['mse', 'mse'], loss_weights=[1, 1 / 3.])

    scheduler = AutomaticLearningRateScheduler(optimizer,
                                               'loss',
                                               epoch_patience=5,
                                               min_improvement=0.0002)
    history = HistoryPerBatch()
    save = SaveModels({basename + '_snapshot_{epoch:^03}.hdf5': g},
                      output_dir=output_dir,
                      hdf5_attrs=tag_dataset.get_distribution_hdf5_attrs())
    history_plot = history.plot_callback(fname=output_basename + "_loss.png",
                                         every_nth_epoch=10)
    g.fit_generator(generator(batch_size),
                    samples_per_epoch=800 * batch_size,
                    nb_epoch=nb_epoch,
                    verbose=1,
                    callbacks=[scheduler, save, history, history_plot])

    nb_visualize = 18**2
    vis_labels, (tags_3d, depth_map) = next(generator(nb_visualize))
    predict_tags_3d, predict_depth_map = g.predict(vis_labels)

    def zip_and_save(fname, *args):
        clipped = list(map(lambda x: np.clip(x, 0, 1)[:, 0], args))
        print(clipped[0].shape)
        tiled = zip_tile(*clipped)
        print(tiled.shape)
        scipy.misc.imsave(fname, tiled)

    zip_and_save(output_basename + "_predict_tags.png", tags_3d,
                 predict_tags_3d)
    zip_and_save(output_basename + "_predict_depth_map.png", depth_map,
                 predict_depth_map)

    save_model(g,
               weights_fname,
               attrs=tag_dataset.get_distribution_hdf5_attrs())
    with open(output_basename + '.json', 'w+') as f:
        f.write(g.to_json())

    with open(output_basename + '_loss_history.json', 'w+') as f:
        json.dump(history.history, f)

    fig, _ = history.plot()
    fig.savefig(output_basename + "_loss.png")
    print("Saved weights to: {}".format(weights_fname))