Exemplo n.º 1
0
def test_split_fixed_oversample_unbalanced():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    shutil.rmtree(output_dir, ignore_errors=True)

    with pytest.raises(ValueError):
        fixed(input_dir, output_dir, fixed=(9, 1), oversample=True)
Exemplo n.º 2
0
def test_wrong_input():
    input_dir = os.path.join(os.path.dirname(__file__), "imgsxx")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    with pytest.raises(ValueError):
        fixed(input_dir, output_dir)

    with pytest.raises(ValueError):
        fixed("peterpan", output_dir)
Exemplo n.º 3
0
def test_split_fixed_oversample():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    shutil.rmtree(output_dir, ignore_errors=True)

    fixed(input_dir, output_dir, fixed=(2, 2), oversample=True)

    # ensure the number of pics is the same
    a = len(list(pathlib.Path(input_dir).glob("**/*.jpg")))
    b = len(list(pathlib.Path(output_dir).glob("**/*.jpg")))
    assert a != b
Exemplo n.º 4
0
def test_split_fixed_prefix():
    input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts")
    output_dir = os.path.join(os.path.dirname(__file__), "output")

    shutil.rmtree(output_dir, ignore_errors=True)

    fixed(input_dir,
          output_dir,
          fixed=(1, 1),
          oversample=False,
          group_prefix=2)

    # ensure the number of pics is the same
    a = len(list(pathlib.Path(input_dir).glob("**/*.jpg")))
    b = len(list(pathlib.Path(output_dir).glob("**/*.jpg")))
    assert a == b
# https://youtu.be/C6wbr1jJvVs
"""
pip install split-folders
"""

import splitfolders  # or import split_folders

input_folder = 'cell_images/'

# Split with a ratio.
# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`.
#Train, val, test
splitfolders.ratio(input_folder,
                   output="cell_images2",
                   seed=42,
                   ratio=(.7, .2, .1),
                   group_prefix=None)  # default values

# Split val/test with a fixed number of items e.g. 100 for each set.
# To only split into training and validation set, use a single number to `fixed`, i.e., `10`.
# enable oversampling of imbalanced datasets, works only with fixed
splitfolders.fixed(input_folder,
                   output="cell_images2",
                   seed=42,
                   fixed=(35, 20),
                   oversample=False,
                   group_prefix=None)