示例#1
0
def test_sampler_list():
    data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5])
    data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1)
    data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3)
    data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5)

    dataset_equal(data1, data21 + data22 + data23, 0)

    data3 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=1)
    dataset_equal(data3, data21, 0)

    def bad_pipeline(sampler, msg):
        with pytest.raises(Exception) as info:
            data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=sampler)
            for _ in data1:
                pass
        assert msg in str(info.value)

    bad_pipeline(sampler=[1.5, 7],
                 msg="Type of indices element must be int, but got list[0]: 1.5, type: <class 'float'>")

    bad_pipeline(sampler=["a", "b"],
                 msg="Type of indices element must be int, but got list[0]: a, type: <class 'str'>.")
    bad_pipeline(sampler="a", msg="Unsupported sampler object of type (<class 'str'>)")
    bad_pipeline(sampler="", msg="Unsupported sampler object of type (<class 'str'>)")
    bad_pipeline(sampler=np.array([1, 2]),
                 msg="Type of indices element must be int, but got list[0]: 1, type: <class 'numpy.int64'>.")
示例#2
0
def test_deterministic_run_distribution():
    """
    Test deterministic run with with setting the seed being used in a distribution
    """
    logger.info("test_deterministic_run_distribution")

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
    decode_op = c_vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor, so the two ops output deterministic sequence
    random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
    data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2)

    dataset_equal(data1, data2, 0)

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)
示例#3
0
def test_seed_deterministic():
    """
    Test deterministic run with setting the seed, only works with num_parallel worker = 1
    """
    logger.info("test_seed_deterministic")

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # seed will be read in during constructor call
    random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = c_vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor, so the two ops output deterministic sequence
    random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)

    dataset_equal(data1, data2, 0)

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)
示例#4
0
def test_sampler_list():
    data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5])
    data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1)
    data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3)
    data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5)

    dataset_equal(data1, data21 + data22 + data23, 0)
示例#5
0
def test_schema_file_vs_string():
    logger.info("test_schema_file_vs_string")

    schema1 = ds.Schema(SCHEMA_FILE)
    with open(SCHEMA_FILE) as file:
        json_obj = json.load(file)
        schema2 = ds.Schema()
        schema2.from_json(json_obj)

    ds1 = ds.TFRecordDataset(FILES, schema1)
    ds2 = ds.TFRecordDataset(FILES, schema2)

    dataset_equal(ds1, ds2, 0)
def test_deterministic_run_fail():
    """
    Test RandomCrop with seed, expected to fail
    """
    logger.info("test_deterministic_run_fail")

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)
    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    # Assuming we get the same seed on calling constructor, if this op is re-used then result won't be
    # the same in between the two datasets. For example, RandomCrop constructor takes seed (0)
    # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
    random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = c_vision.Decode()
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_crop_op, input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = data2.map(operations=decode_op, input_columns=["image"])
    # If seed is set up on constructor
    data2 = data2.map(operations=random_crop_op, input_columns=["image"])

    try:
        dataset_equal(data1, data2, 0)

    except Exception as e:
        # two datasets split the number out of the sequence a
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)
def test_seed_undeterministic():
    """
    Test seed with num parallel workers in c, this test is expected to fail some of the time
    """
    logger.info("test_seed_undeterministic")

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(3)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    # We get the seed when constructor is called
    random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = c_vision.Decode()
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_crop_op, input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = data2.map(operations=decode_op, input_columns=["image"])
    # Since seed is set up on constructor, so the two ops output deterministic sequence.
    # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
    random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data2 = data2.map(operations=random_crop_op2, input_columns=["image"])
    try:
        dataset_equal(data1, data2, 0)
    except Exception as e:
        # two datasets both use numbers from the generated sequence "a"
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)