Пример #1
0
def test_by_regex():
    test = FileGroup(recursive=True)
    test.load_dir("tests/data/image", File, recursive=True)
    test.configure_pipeline(["FilterByRegex"])
    test.pipeline.components[0].configure(pattern="sample.jpg")
    original_count = len(test.files)
    test.run_pipeline()
    assert len(test.files) == original_count - 1
Пример #2
0
def test_subsample():
    test = FileGroup(recursive=True)
    test.load_dir("tests/data/image", File, recursive=True)
    test.configure_pipeline(["FilterSubsample"])
    assert len(test.files) > 2
    test.pipeline.components[0].configure(N=2)
    test.run_pipeline()
    assert len(test.files) == 2
Пример #3
0
def test_by_label():
    test = FileGroup(recursive=True)
    test.load_dir("tests/data/image", File, recursive=True)
    test.configure_pipeline(["FilterByLabel"])
    test.pipeline.components[0].configure(label_to_filter="TRAIN")
    test.files[0].add_label("TRAIN")
    original_count = len(test.files)
    test.run_pipeline()
    assert len(test.files) == original_count - 1
Пример #4
0
def test_validation_and_test():
    test = FileGroup(recursive=True)
    test.load_dir("tests/data", File, recursive=True)
    test.configure_pipeline(["LabelerValidationAndTest"])
    test.pipeline.components[0].configure(val_frac=0.2, test_frac=0.2)
    test.run_pipeline()
    found_counts = [0, 0, 0]
    for fobj in test.files:
        if fobj.has_label("TRAIN"):
            found_counts[0] += 1
        elif fobj.has_label("VALIDATE"):
            found_counts[1] += 1
        elif fobj.has_label("TEST"):
            found_counts[2] += 1

    assert found_counts[0] == len(
        test.files) - 2 * round(len(test.files) * 0.2)
    assert found_counts[1] == round(len(test.files) * 0.2)
    assert found_counts[2] == round(len(test.files) * 0.2)
Пример #5
0
def test_duplicates():
    test = FileGroup(recursive=True)
    test.load_dir("tests/data/image", File, recursive=True)
    test.configure_pipeline(["FilterDuplicateFiles"])
    test.run_pipeline()
    assert len(test.filtered["duplicates"]) > 0