def test_group_names_DataGeneratorDisk():

    iu.resize_folder('images/',
                     'images1/',
                     image_size_dst=(100, 100),
                     overwrite=True)

    gp = gen_params.copy()
    gp.inputs = ['filename']
    gp.group_names = ['images/']
    gp.data_path = ''
    g = gr.DataGeneratorDisk(ids, **gp)
    assert gen.get_sizes(g[0]) == '([array<2,224,224,3>], [array<2,1>])'

    gp.group_names = ['images/', 'images1/']
    g = gr.DataGeneratorDisk(ids, **gp)
    assert gen.get_sizes(
        g[0]) == '([array<2,224,224,3>, array<2,100,100,3>], [array<2,1>])'

    gp.group_names = [['images/'], ['images1/']]
    sizes = []
    for i in range(100):
        g = gr.DataGeneratorDisk(ids, **gp)
        sizes.append(g[0][0][0].shape[1])

    assert np.unique(sizes).shape[0] > 1

    shutil.rmtree('images1/')
def test_DataGeneratorDisk():
    g = gr.DataGeneratorDisk(ids, **gen_params)

    g.inputs = ['filename', 'filename']
    assert gen.get_sizes(
        g[0]) == '([array<2,224,224,3>, array<2,224,224,3>], [array<2,1>])'

    g.inputs_df = ['score', 'score']
    g.inputs = []
    g.outputs = []
    assert gen.get_sizes(g[0]) == '([array<2,2>], [])'

    g.inputs_df = [['score'], ['score', 'score']]
    assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])'

    g.inputs_df = []
    g.outputs = ['score']
    assert gen.get_sizes(g[0]) == '([], [array<2,1>])'

    g.outputs = ['score', ['score']]
    with pytest.raises(AssertionError):
        g[0]

    g.outputs = [['score'], ['score']]
    assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'
def test_DataGeneratorHDF5():
    gen_params_local = gen_params.copy()
    gen_params_local.update(data_path='data.h5', inputs=['filename'])
    g = gr.DataGeneratorHDF5(ids, **gen_params_local)

    assert gen.get_sizes(g[0]) == '([array<2,1>], [array<2,1>])'

    g.inputs_df = ['score', 'score']
    g.inputs = []
    g.outputs = []
    assert gen.get_sizes(g[0]) == '([array<2,2>], [])'

    g.inputs_df = [['score'], ['score', 'score']]
    assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])'

    g.inputs_df = []
    g.outputs = ['score']
    assert gen.get_sizes(g[0]) == '([], [array<2,1>])'

    g.outputs = ['score', ['score']]
    with pytest.raises(AssertionError):
        g[0]

    g.outputs = [['score'], ['score']]
    assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'
def test_read_fn_DataGeneratorDisk():
    import os

    def read_fn(name, g):
        # g is the parent generator object
        # name is the image name read from the DataFrame
        image_path = os.path.join(g.data_path, name)
        return iu.resize_image(iu.read_image(image_path), (100, 100))

    g = gr.DataGeneratorDisk(ids, read_fn=read_fn, **gen_params)
    gen.get_sizes(g[0]) == '([array<2,100,100,3>], [array<2,1>])'
def test_init_DataGeneratorDisk():
    g = gr.DataGeneratorDisk(ids, **gen_params)
    assert isinstance(g[0], tuple)
    assert isinstance(g[0][0], list)
    assert isinstance(g[0][1], list)
    assert (gen.get_sizes(g[0]) == '([array<2,224,224,3>], [array<2,1>])')
    assert (np.all(g[0][1][0] == np.array([[1], [2]])))
def test_accessor_function_numpy_array():

    ids = pd.DataFrame(
        dict(a=range(10), b=list(np.random.randint(0, 10, (10, 2, 2)))))
    gen_params = Munch(batch_size=4,
                       data_path=None,
                       input_shape=None,
                       inputs_df=lambda ids: [ids[['a']].values],
                       outputs=['b'],
                       shuffle=False,
                       fixed_batches=True)

    # test using a function to access data from ids
    # test if data in ids items can be ndarrays
    g = gr.DataGeneratorDisk(ids, **gen_params)
    assert gen.get_sizes(g[0]) == '([array<4,1>], [array<4,2,2>])'

    # test if double inputs works
    gen_params.outputs = ['a', 'a']
    g = gr.DataGeneratorDisk(ids, **gen_params)
    assert gen.get_sizes(g[0]) == '([array<4,1>], [array<4,2>])'
def test_callable_outputs_DataGeneratorHDF5():
    d = {'features': [1, 2, 3, 4, 5], 'mask': [1, 0, 1, 1, 0]}
    df = pd.DataFrame(data=d)

    def filter_features(df):
        return np.array(df.loc[df['mask'] == 1, ['features']])

    gen_params_local = gen_params.copy()
    gen_params_local.update(data_path=None,
                            outputs=filter_features,
                            inputs=[],
                            inputs_df=['features'],
                            shuffle=False,
                            batch_size=5)

    g = gr.DataGeneratorHDF5(df, **gen_params_local)
    assert gen.get_sizes(g[0]) == '([array<5,1>], array<3,1>)'
    assert all(np.squeeze(g[0][0]) == np.arange(1, 6))
    assert all(np.squeeze(g[0][1]) == [1, 3, 4])
def test_get_sizes():
    x = np.array([[1, 2, 3]])
    assert gen.get_sizes(([x.T], 1, [4,
                                     5])) == '([array<3,1>], <1>, [<1>, <1>])'
    assert gen.get_sizes(np.array([[1, [1, 2]]])) == 'array<1,2>'
def test_basics_deterministic_shuffle_consistency_group_by():

    ids = pd.DataFrame(
        dict(a=range(10), b=list(range(9, -1, -1)), c=np.arange(10) < 5))

    gen_params = Munch(batch_size=4,
                       data_path=None,
                       input_shape=None,
                       inputs_df=['a'],
                       outputs=['b'],
                       shuffle=False,
                       fixed_batches=True)

    # check `fixed_batches` switch
    g = gr.DataGeneratorDisk(ids, **gen_params)
    assert np.array_equal(
        [gen.get_sizes(x) for x in g],
        ['([array<4,1>], [array<4,1>])', '([array<4,1>], [array<4,1>])'])
    assert np.array_equal(g[0][0][0].squeeze(), range(4))

    gen_params.fixed_batches = False
    g = gr.DataGeneratorDisk(ids, **gen_params)
    assert np.array_equal([gen.get_sizes(x) for x in g], [
        '([array<4,1>], [array<4,1>])', '([array<4,1>], [array<4,1>])',
        '([array<2,1>], [array<2,1>])'
    ])
    assert np.array_equal(g[2][0][0].squeeze(), [8, 9])

    # check randomized
    gen_params.shuffle = True
    gen_params.fixed_batches = False  # maintain
    g = gr.DataGeneratorDisk(ids, **gen_params)

    # check if it returns all items
    data = list(zip(*list(g)))
    data0 = np.concatenate([l[0] for l in data[0]], axis=0).squeeze()
    data1 = np.concatenate([l[0] for l in data[1]], axis=0).squeeze()
    assert np.array_equal(np.sort(data0), np.arange(10))
    assert np.array_equal(np.sort(data1), np.arange(10))

    # check if randomization is applied, consistently
    num_randoms0 = 0
    num_randoms1 = 0
    for i in range(100):
        g = gr.DataGeneratorDisk(ids, **gen_params)
        data = list(zip(*list(g)))
        data0 = np.concatenate([l[0] for l in data[0]], axis=0).squeeze()
        data1 = np.concatenate([l[0] for l in data[1]], axis=0).squeeze()

        # check consistency
        ids_ = ids.copy()
        ids_.index = ids_.a
        np.array_equal(ids_.loc[data0].b, data1)

        num_randoms0 += not np.array_equal(data0, np.arange(10))
        num_randoms1 += not np.array_equal(data1, np.arange(10))

    # check randomization, at least once
    assert num_randoms0
    assert num_randoms0

    # check deterministic
    gen_params.shuffle = True
    gen_params.deterministic = np.random.randint(100)
    assert np.array_equal(
        gr.DataGeneratorDisk(ids, **gen_params)[0],
        gr.DataGeneratorDisk(ids, **gen_params)[0])

    gen_params.update(fixed_batches=False,
                      shuffle=True,
                      group_by='c',
                      deterministic=False)

    g = gr.DataGeneratorDisk(ids, **gen_params)
    data = list(zip(*list(g)))
    data = [[l[0] for l in d] for d in data]
    data_conc = [np.concatenate(d, axis=0) for d in data]

    # returns all
    df = pd.DataFrame(np.concatenate(data_conc, axis=1), columns=('a', 'b'))
    x = df.merge(ids, on='a')
    assert np.all(x.b_x == x.b_y)

    # each batch returns a single group
    ids_ = ids.copy()
    ids_.index = ids_.a
    for i, d in enumerate(data[0]):
        assert ids_.loc[d[0]].c.unique().shape == (1, )