def test_Mixup(): numbers1 = [1, 2, 3] numbers2 = [4, 5, 6] samples = list(zip(numbers1, numbers2)) build_batch = (nb.BuildBatch(3, prefetch=0).input(0, 'number', float).output( 1, 'number', float)) # no mixup, return original batch mixup = nb.Mixup(0.0) batches = samples >> build_batch >> mixup >> Collect() inputs, outputs = batches[0] assert list(inputs[0]) == numbers1 assert list(outputs[0]) == numbers2 # mixup with alpaha=1.0 mixup = nb.Mixup(1.0) batches = samples >> build_batch >> mixup >> Collect() for input, output in batches: input, output = input[0], output[0] assert min(input) >= 1 and max(input) <= 3 assert min(output) >= 4 and max(output) <= 6 ri, ro = input[0] - samples[0][0], output[0] - samples[0][1] assert approx(ri, 1e-3) == ro
def test_Network(): model = FakeModel() weightspath = 'dummy_filepath' network = FakeNetwork(model, weightspath) assert network.weightspath == weightspath batches = [((1, 2), (3, 4)), ((5, 6), (7, 8))] train_err = batches >> network.train() >> Collect() assert train_err == [(4, 8), (12, 16)] val_err = batches >> network.validate() >> Collect() assert val_err == [(5, 9), (13, 17)] prediction = batches >> GetCols(0) >> network.predict() >> Collect() assert prediction == [(1, 2), (5, 6)] prediction = batches >> GetCols(0) >> network.predict(False) >> Collect() assert prediction == [((1, 2),), ((5, 6),)] batches = [((1, 2), (1, 2)), ((5, 6), (5, 6))] acc = lambda X, y: np.sum(X == y) assert batches >> network.evaluate([acc]) == 4 batches = [(((0, 1), (0, 2)), (0, 2)), (((5, 5), (6, 6)), (6, 6))] assert batches >> network.evaluate([acc], predcol=1) == 4 assert batches >> network.evaluate([acc], predcol=0) == 1
def test_ImageWriter(): samples = [('nut_color', 1), ('nut_grayscale', 2)] inpath = 'tests/data/img_formats/*.bmp' img_samples = samples >> ReadImage(0, inpath) >> Collect() imagepath = 'tests/data/test_*.bmp' names = samples >> Get(0) >> Collect() img_samples >> WriteImage(0, imagepath, names) >> Consume() for sample, name in zip(img_samples, names): filepath = 'tests/data/test_{}.bmp'.format(name) arr = load_image(filepath) assert np.array_equal(arr, sample[0]) os.remove(filepath) pathfunc = lambda sample, name: 'tests/data/test_{}.jpg'.format(name) img_samples >> WriteImage(0, pathfunc) >> Consume() for i, sample in enumerate(img_samples): filepath = 'tests/data/test_{}.jpg'.format(i) os.path.exists(filepath) os.remove(filepath) with pytest.raises(ValueError) as ex: img_samples >> WriteImage(0, ()) >> Consume() assert str(ex.value).startswith('Expect path or function')
def test_Boost(): negatives = [(0, 0), (1, 0)] positives = [(2, 1), (3, 1), (4, 1)] samples = negatives + positives build_batch = (BuildBatch(3, prefetch=0).input(0, 'number', 'uint8').output( 1, 'one_hot', 'uint8', 2)) network = FakeNetwork(predict_all_positive) boost = Boost(build_batch, network) boosted = samples >> boost >> Collect() assert boosted == negatives, 'Expect negatives boosted' network = FakeNetwork(predict_all_negative) boost = Boost(build_batch, network) boosted = samples >> boost >> Collect() assert boosted == positives, 'Expect positives boosted' network = FakeNetwork(predict_all_perfect) boost = Boost(build_batch, network) boosted = samples >> boost >> Collect() assert boosted == [], 'Expect no samples left for boosting' network = FakeNetwork(predict_all_wrong) boost = Boost(build_batch, network) boosted = samples >> boost >> Collect() assert boosted == samples, 'Expect all samples boosted'
def test_ImagePatchesByMask(): img = np.reshape(np.arange(25), (5, 5)) mask = np.eye(5, dtype='uint8') * 255 samples = [(img, mask)] np.random.seed(0) get_patches = ImagePatchesByMask(0, 1, (3, 3), 1, 1, retlabel=False) patches = samples >> get_patches >> Collect() assert len(patches) == 2 p, m = patches[0] img_patch0 = np.array([[12, 13, 14], [17, 18, 19], [22, 23, 24]]) mask_patch0 = np.array([[255, 0, 0], [0, 255, 0], [0, 0, 255]]) nt.assert_allclose(p, img_patch0) nt.assert_allclose(m, mask_patch0) p, m = patches[1] img_patch1 = np.array([[10, 11, 12], [15, 16, 17], [20, 21, 22]]) mask_patch1 = np.array([[0, 0, 255], [0, 0, 0], [0, 0, 0]]) nt.assert_allclose(p, img_patch1) nt.assert_allclose(m, mask_patch1) with pytest.raises(ValueError) as ex: mask = np.eye(3, dtype='uint8') * 255 samples = [(img, mask)] get_patches = ImagePatchesByMask(0, 1, (3, 3), 1, 1) samples >> get_patches >> Collect() assert str(ex.value).startswith('Image and mask size don' 't match!')
def test_ImagePatchesByAnnotation(): img = np.reshape(np.arange(25), (5, 5)) anno = ('point', ( (3, 2), (2, 3), )) samples = [(img, anno)] np.random.seed(0) get_patches = ImagePatchesByAnnotation(0, 1, (3, 3), 1, 1) patches = samples >> get_patches >> Collect() assert len(patches) == 3 p, l = patches[0] img_patch0 = np.array([[12, 13, 14], [17, 18, 19], [22, 23, 24]]) assert l == 0 nt.assert_allclose(p, img_patch0) p, l = patches[1] img_patch1 = np.array([[11, 12, 13], [16, 17, 18], [21, 22, 23]]) assert l == 1 nt.assert_allclose(p, img_patch1) p, l = patches[2] img_patch1 = np.array([[7, 8, 9], [12, 13, 14], [17, 18, 19]]) assert l == 1 nt.assert_allclose(p, img_patch1) np.random.seed(0) get_patches = ImagePatchesByAnnotation(0, 1, (3, 3), 1, 1, retlabel=False) patches = samples >> get_patches >> Collect() p, m = patches[0] img_patch0 = np.array([[12, 13, 14], [17, 18, 19], [22, 23, 24]]) img_mask0 = np.array([[0, 255, 0], [255, 0, 0], [0, 0, 0]]) nt.assert_allclose(m, img_mask0) nt.assert_allclose(p, img_patch0)
def evaluate(cfg: CFG, net: Network, testdata: Samples) -> (IVec, IVec, float): net.eval() with to.no_grad(): tars, preds, probs = (testdata >> LoadGraph() >> MakeBatch( cfg.batchsize) >> PredBatch(net) >> Unzip()) tars = tars >> Flatten() >> Collect() preds = preds >> Flatten() >> Collect() acc = 100.0 * [t == p for t, p in zip(tars, preds)].count(True) / len(tars) return tars, preds, acc
def test_TransformImage(): TransformImage.register('fake_trans1', lambda e: e + 1) TransformImage.register('fake_trans2', lambda e, x: e + x) samples = [(1, 2), (3, 4)] transform = TransformImage(0).by('fake_trans1') assert samples >> transform >> Collect() == [(2, 2), (4, 4)] transform = TransformImage((0, 1)).by('fake_trans1').by('fake_trans2', 3) assert samples >> transform >> Collect() == [(5, 6), (7, 8)]
def test_AugmentImage(): TransformImage.register('fake_trans1', lambda e: e + 1) TransformImage.register('fake_trans2', lambda e: e + 2) samples = [(1, 2), (3, 4)] augment = AugmentImage(0).by('fake_trans1', 1.0).by('fake_trans2', 1.0) assert samples >> augment >> Collect() == [(2, 2), (3, 2), (4, 4), (5, 4)] augment = AugmentImage(0).by('fake_trans1', 1.0).by('fake_trans2', 0.0) assert samples >> augment >> Collect() == [(2, 2), (4, 4)]
def test_DplyToList(): empty_dplyframe = dp.DplyFrame(pd.DataFrame()) assert empty_dplyframe >> DplyToList() >> Collect() == [] pandasframe = pd.DataFrame(data={'c1': [1, 2, 3], 'c2': [4, 5, 6]}) dplyframe = dp.DplyFrame(pandasframe) assert dplyframe >> DplyToList() >> Collect() == [[1, 4], [2, 5], [3, 6]] with pytest.raises(ValueError) as ex: [1] >> DplyToList() >> Collect() assert str(ex.value) == 'Expect Dplyr dataframe!'
def test_LogToFile_reset(filepath): data = [[1, 2], [3, 4]] with LogToFile(filepath, cols=0, reset=True) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1\n3\n' with LogToFile(filepath, cols=1, reset=False) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1\n3\n2\n4\n'
def test_LogToFile_numpy(filepath): data = [np.array([1, 2]), np.array([3, 4])] with LogToFile(filepath) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1,2\n3,4\n' data = [np.array(1), np.array(2)] with LogToFile(filepath) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1\n2\n'
def test_ReadLabelDirs(): read = ReadLabelDirs('tests/data/labeldirs', '*.txt') samples = read >> Collect() assert samples == [('tests/data/labeldirs/0/test0.txt', '0'), ('tests/data/labeldirs/1/test1.txt', '1'), ('tests/data/labeldirs/1/test11.txt', '1')] read = ReadLabelDirs('tests/data/labeldirs', '*.txt', '') samples = read >> Collect() assert samples == [('tests/data/labeldirs/0/test0.txt', '0'), ('tests/data/labeldirs/1/test1.txt', '1'), ('tests/data/labeldirs/1/test11.txt', '1'), ('tests/data/labeldirs/_2/test2.txt', '_2')]
def test_ImageAnnotationToMask(): img = np.zeros((3, 3), dtype='uint8') anno = ('point', ((0, 1), (2, 0))) samples = [(img, anno)] masks = samples >> ImageAnnotationToMask(0, 1) >> Collect() expected = np.array([[0, 0, 255], [255, 0, 0], [0, 0, 0]], dtype='uint8') assert str(masks[0][1]) == str(expected) # nt.assert_allclose fails!
def test_BuildBatch(): numbers = [4.1, 3.2, 1.1] vectors = [np.array([1, 2, 3]), np.array([2, 3, 4]), np.array([3, 4, 5])] images = [np.zeros((5, 3)), np.ones((5, 3)), np.ones((5, 3))] class_ids = [1, 2, 1] samples = zip(numbers, vectors, images, class_ids) build_batch = (nb.BuildBatch(2, prefetch=0).input( 0, 'number', float).input(1, 'vector', np.uint8).input(2, 'image', np.uint8, False).output(3, 'one_hot', 'uint8', 3)) batches = samples >> build_batch >> Collect() assert len(batches) == 2 batch = batches[0] assert len(batch) == 2, 'Expect inputs and outputs' ins, outs = batch assert len(ins) == 3, 'Expect three input columns in batch' assert len(outs) == 1, 'Expect one output column in batch' assert np.array_equal(ins[0], nb.build_number_batch(numbers[:2], float)) assert np.array_equal(ins[1], nb.build_vector_batch(vectors[:2], 'uint8')) assert np.array_equal(ins[2], nb.build_image_batch(images[:2], 'uint8')) assert np.array_equal(outs[0], nb.build_one_hot_batch(class_ids[:2], 'uint8', 3))
def Boost(iterable, batcher, network, targetcol=-1): """ iterable >> Boost(batcher, network, targetcol=-1) Boost samples with high softmax probability for incorrect class. Expects one-hot encoded targets and softmax predictions for output. | network = Network() | build_batch = BuildBatch(BATCHSIZE, colspec) | boost = Boost(build_batch, network) | samples >> boost >> build_batch >> network.train() >> Consume() :param iterable iterable: Iterable with samples. :param nutsml.BuildBatch batcher: Batcher used for network training. :param nutsml.Network network: Network used for prediction :param int targetcol: Column in sample that contains target values. :return: Generator over samples to boost :rtype: generator """ def do_boost(probs, target): assert len(target) > 1, 'Expect one-hot encoded target: ' + str(target) assert len(target) == len(probs), 'Expect softmax probs: ' + str(probs) return random() > probs[np.argmax(target)] samples1, samples2 = iterable >> Tee(2) for batch in samples1 >> batcher: p_batch, target = batch[:targetcol], batch[targetcol] pred = [p_batch] >> network.predict() >> Collect() for p, t, s in zip(pred, target, samples2): if do_boost(p, t): yield s
def test_LogCols(filepath): data = [[1, 2], [3, 4]] with LogCols(filepath) as logcols: assert data >> logcols >> Collect() == data with open(filepath) as f: assert f.read() == '1,2\n3,4\n'
def test_LogToFile_delete(filepath): data = [[1, 2], [3, 4]] logtofile = LogToFile(filepath) assert data >> logtofile >> Collect() == data assert os.path.exists(filepath) logtofile.delete() assert not os.path.exists(filepath)
def test_SplitRandom_constraint(): same_letter = lambda t: t[0] data = zip('aabbccddee', range(10)) train, val = data >> SplitRandom(rand=StableRandom(0), ratio=0.6, constraint=same_letter) >> Collect() print(train) print(val) assert train == [('a', 1), ('a', 0), ('d', 7), ('b', 2), ('d', 6), ('b', 3)] assert val == [('c', 5), ('e', 8), ('e', 9), ('c', 4)]
def test_AugmentImage(): TransformImage.register('fake_trans1', lambda e: e + 1) TransformImage.register('fake_trans2', lambda e: e + 2) samples = [(1, 2), (3, 4)] augment = AugmentImage(0).by('fake_trans1', 1.0).by('fake_trans2', 1.0) assert samples >> augment >> Collect() == [(2, 2), (3, 2), (4, 4), (5, 4)] augment = AugmentImage(0).by('fake_trans1', 1.0).by('fake_trans2', 0.0) assert samples >> augment >> Collect() == [(2, 2), (4, 4)] samples = [(x, ) for x in range(1000)] augment = AugmentImage(0).by('identical', 0.5) n = samples >> augment >> Count() assert 450 < n < 550 augment = AugmentImage(0).by('identical', 10) assert [(1, )] >> augment >> Count() == 10
def test_Stratify(): samples = [('pos', 1)] * 1000 + [('neg', 0)] * 100 dist = samples >> CountValues(1) stratify = Stratify(1, dist, rand=StableRandom(0)) stratified1 = samples >> stratify >> Collect() stratified2 = samples >> stratify >> Collect() assert stratified1 != stratified2 dist1 = stratified1 >> Get(1) >> CountValues() print(dist1) assert dist1[0] == 100 assert 90 < dist1[1] < 110 dist2 = stratified2 >> Get(1) >> CountValues() print(dist2) assert dist1[0] == 100 assert 90 < dist1[1] < 110
def test_BuildBatch_exceptions(): class_ids = [1, 2] numbers = [4.1, 3.2] samples = zip(numbers, class_ids) with pytest.raises(ValueError) as ex: build_batch = (nb.BuildBatch(2, prefetch=0).by(0, 'number', float).by( 1, 'invalid', 'uint8', 3)) samples >> build_batch >> Collect() assert str(ex.value).startswith('Invalid builder')
def test_CheckNaN(): assert [1, 2] >> CheckNaN() >> Collect() == [1, 2] with pytest.raises(RuntimeError) as ex: [1, np.NaN, 3] >> CheckNaN() >> Consume() assert str(ex.value).startswith('NaN encountered') with pytest.raises(RuntimeError) as ex: [(1, np.NaN), (2, 4)] >> CheckNaN() >> Consume() assert str(ex.value).startswith('NaN encountered')
def test_RegularImagePatches(): img1 = np.reshape(np.arange(12), (3, 4)) samples = [(img1, 0)] get_patches = RegularImagePatches(0, (2, 2), 2) expected = [(np.array([[0, 1], [4, 5]]), 0), (np.array([[2, 3], [6, 7]]), 0)] patches = samples >> get_patches >> Collect() for (p, ps), (e, es) in zip(patches, expected): nt.assert_allclose(p, e) assert ps == es samples = [(img1, img1 + 1)] get_patches = RegularImagePatches((0, 1), (1, 1), 3) expected = [(np.array([[0]]), np.array([[1]])), (np.array([[3]]), np.array([[4]]))] patches = samples >> get_patches >> Collect() for p, e in zip(patches, expected): nt.assert_allclose(p, e)
def test_SplitRandom_constraint(): same_letter = lambda t: t[0] data = zip('aabbccddee', range(10)) train, val = data >> SplitRandom( rand=None, ratio=0.6, constraint=same_letter) >> Collect() train.sort() val.sort() assert train == [('a', 0), ('a', 1), ('b', 2), ('b', 3), ('d', 6), ('d', 7)] assert val == [('c', 4), ('c', 5), ('e', 8), ('e', 9)]
def test_ImageChannelMean(): meansfile = 'tests/data/temp_image_channel_mean.npy' img1 = np.dstack([np.ones((3, 3)), np.ones((3, 3))]) img2 = np.dstack([np.ones((3, 3)), np.ones((3, 3)) * 3]) samples = [(img1, ), (img2, )] # provide means directly img_mean = ImageChannelMean(0, means=[1, 2]) expected = np.array([[[1., 2.]]]) nt.assert_allclose(img_mean.means, expected) # compute means img_mean = ImageChannelMean(0, filepath=meansfile) results = samples >> img_mean.train() >> Collect() expected = np.array([[[1., 2.]]]) nt.assert_allclose(img_mean.means, expected) assert results == samples assert os.path.exists(meansfile) # re-loading means from file img_mean = ImageChannelMean(0, filepath=meansfile) nt.assert_allclose(img_mean.means, expected) # subtract means results = samples >> img_mean >> Collect() expected0 = np.array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]) expected1 = np.array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]]) nt.assert_allclose(results[1][0][:, :, 0], expected0) nt.assert_allclose(results[1][0][:, :, 1], expected1) with pytest.raises(ValueError) as ex: other_samples = [(np.eye(5), )] img_mean = ImageChannelMean(0, filepath=meansfile) other_samples >> img_mean >> Collect() assert str(ex.value).startswith('Mean loaded was computed on different') with pytest.raises(ValueError) as ex: img_mean = ImageChannelMean(0, 'file does not exist') samples >> img_mean >> Collect() assert str(ex.value).startswith('Mean has not yet been computed!') os.remove(meansfile)
def test_ReadImage(): arr0 = np.load('tests/data/img_arrays/nut_color.jpg.npy') arr1 = np.load('tests/data/img_arrays/nut_grayscale.jpg.npy') samples = [('nut_color', 1), ('nut_grayscale', 2)] imagepath = 'tests/data/img_formats/*.jpg' img_samples = samples >> ReadImage(0, imagepath) >> Collect() nt.assert_equal(img_samples[0][0], arr0) nt.assert_equal(img_samples[1][0], arr1) assert img_samples[0][1] == 1 assert img_samples[1][1] == 2 pathfunc = lambda sample: 'tests/data/img_formats/{0}.jpg'.format(*sample) img_samples = samples >> ReadImage(0, pathfunc) >> Collect() nt.assert_equal(img_samples[0][0], arr0) nt.assert_equal(img_samples[1][0], arr1) samples = [('label', 'tests/data/img_formats/nut_color.jpg')] img_samples = samples >> ReadImage(1, as_grey=False) >> Collect() assert img_samples[0][1].shape == (213, 320, 3) img_samples = samples >> ReadImage(1, as_grey=True) >> Collect() assert img_samples[0][1].shape == (213, 320) samples = ['tests/data/img_formats/nut_color.jpg'] img_samples = samples >> ReadImage(None, as_grey=False) >> Collect() assert img_samples[0][0].shape == (213, 320, 3) samples = ['tests/data/img_formats/nut_color.jpg'] img_samples = samples >> ReadImage(None, dtype=float) >> Collect() assert img_samples[0][0].dtype == float
def DISABLED_test_plotlines(): filepath = 'tests/data/temp_plotlines.png' xs = np.arange(0, 6.3, 1.2) ysin, ycos = np.sin(xs), np.cos(xs) data = zip(xs, ysin, ycos) out = data >> pl.PlotLines(1, 0, filepath=filepath) >> Collect() assert out == data expected = 'tests/data/img/plotlines.png' image = ni.load_image(filepath) os.remove(filepath) assert_equal_image(expected, image)
def test_Stratify(): samples = [('pos', 1), ('pos', 1), ('neg', 0)] stratify = Stratify(1, mode='up', rand=StableRandom(0)) stratified = samples >> stratify >> Sort() assert stratified == [('neg', 0), ('neg', 0), ('pos', 1), ('pos', 1)] samples = [('pos', 1), ('pos', 1), ('pos', 1), ('neg1', 0), ('neg2', 0)] stratify = Stratify(1, mode='downrnd', rand=StableRandom(0)) stratified = samples >> stratify >> Sort() assert stratified == [('neg1', 0), ('neg2', 0), ('pos', 1), ('pos', 1)] with pytest.raises(ValueError) as ex: samples >> Stratify(1, mode='invalid') >> Collect() assert str(ex.value).startswith('Unknown mode')
def test_LogToFile(filepath): data = [[1, 2], [3, 4]] with LogToFile(filepath) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1,2\n3,4\n' with LogToFile(filepath, delimiter='; ') as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1; 2\n3; 4\n' with LogToFile(filepath, cols=0, reset=True) as logtofile: assert data >> logtofile >> Collect() == data assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == '1\n3\n1\n3\n' with LogToFile(filepath, cols=(1, 0), colnames=('a', 'b')) as logtofile: assert data >> logtofile >> Collect() == data with open(filepath) as f: assert f.read() == 'a,b\n2,1\n4,3\n'