Exemplo n.º 1
0
def skip_test_minddataset(add_and_remove_cv_file=True):
    """tutorial for cv minderdataset."""
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
    indices = [1, 2, 3, 5, 7]
    sampler = ds.SubsetRandomSampler(indices)
    data_set = ds.MindDataset(CV_FILE_NAME + "0",
                              columns_list,
                              num_readers,
                              sampler=sampler)

    # Serializing into python dictionary
    ds1_dict = ds.serialize(data_set)
    # Serializing into json object
    ds1_json = json.dumps(ds1_dict, sort_keys=True)

    # Reconstruct dataset pipeline from its serialized form
    data_set = ds.deserialize(input_dict=ds1_dict)
    ds2_dict = ds.serialize(data_set)
    # Serializing into json object
    ds2_json = json.dumps(ds2_dict, sort_keys=True)

    assert ds1_json == ds2_json

    _ = get_data(CV_DIR_NAME)
    assert data_set.get_dataset_size() == 5
    num_iter = 0
    for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
        num_iter += 1
    assert num_iter == 5
def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
    indices = [1, 2, 4, -1, -2]
    sampler = ds.SubsetRandomSampler(indices)
    data_set = ds.MindDataset(CV_FILE_NAME + "0",
                              columns_list,
                              num_readers,
                              sampler=sampler)
    assert data_set.get_dataset_size() == 5
    num_iter = 0
    for item in data_set.create_dict_iterator():
        logger.info(
            "-------------- cv reader basic: {} ------------------------".
            format(num_iter))
        logger.info(
            "-------------- item[data]: {}  -----------------------------".
            format(item["data"]))
        logger.info(
            "-------------- item[file_name]: {} ------------------------".
            format(item["file_name"]))
        logger.info(
            "-------------- item[label]: {} ----------------------------".
            format(item["label"]))
        num_iter += 1
    assert num_iter == 5
Exemplo n.º 3
0
def skip_test_chained_sampler_08():
    logger.info("Test Case Chained Sampler - SubsetRandom and Distributed, 4 shards")

    # Create chained sampler, subset random and distributed
    indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11]
    sampler = ds.SubsetRandomSampler(indices, num_samples=12)
    child_sampler = ds.DistributedSampler(num_shards=4, shard_id=1)
    sampler.add_child(child_sampler)
    # Create ImageFolderDataset with sampler
    data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)

    # Verify dataset size
    data1_size = data1.get_dataset_size()
    logger.info("dataset size is: {}".format(data1_size))
    assert data1_size == 3

    # Verify number of iterations
    num_iter = 0
    for item in data1.create_dict_iterator(num_epochs=1):  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        logger.info("image is {}".format(item["image"]))
        logger.info("label is {}".format(item["label"]))
        num_iter += 1

    logger.info("Number of data in data1: {}".format(num_iter))
    # Note: SubsetRandomSampler returns 12 samples
    # Note: Each of 4 shards has 3 samples
    assert num_iter == 3
Exemplo n.º 4
0
def test_cv_minddataset_subset_random_sample_out_of_range(
        add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
    indices = [1, 2, 4, 11, 13]
    sampler = ds.SubsetRandomSampler(indices)
    data_set = ds.MindDataset(CV_FILE_NAME + "0",
                              columns_list,
                              num_readers,
                              sampler=sampler)
    assert data_set.get_dataset_size() == 5
    num_iter = 0
    for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
        logger.info(
            "-------------- cv reader basic: {} ------------------------".
            format(num_iter))
        logger.info(
            "-------------- item[data]: {}  -----------------------------".
            format(item["data"]))
        logger.info(
            "-------------- item[file_name]: {} ------------------------".
            format(item["file_name"]))
        logger.info(
            "-------------- item[label]: {} ----------------------------".
            format(item["label"]))
        num_iter += 1
    assert num_iter == 5
Exemplo n.º 5
0
def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
    indices = []
    sampler = ds.SubsetRandomSampler(indices)
    data_set = ds.MindDataset(CV_FILE_NAME + "0",
                              columns_list,
                              num_readers,
                              sampler=sampler)
    data = get_data(CV_DIR_NAME)
    assert data_set.get_dataset_size() == 10
    num_iter = 0
    for item in data_set.create_dict_iterator():
        logger.info(
            "-------------- cv reader basic: {} ------------------------".
            format(num_iter))
        logger.info(
            "-------------- item[data]: {}  -----------------------------".
            format(item["data"]))
        logger.info(
            "-------------- item[file_name]: {} ------------------------".
            format(item["file_name"]))
        logger.info(
            "-------------- item[label]: {} ----------------------------".
            format(item["label"]))
        num_iter += 1
    assert num_iter == 0
def test_subset_random_sampler():
    logger.info("Test Case SubsetRandomSampler")
    # define parameters
    repeat_count = 1

    # apply dataset operations
    indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11]
    sampler = ds.SubsetRandomSampler(indices)
    data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler)
    data1 = data1.repeat(repeat_count)

    num_iter = 0
    for item in data1.create_dict_iterator():  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        logger.info("image is {}".format(item["image"]))
        logger.info("label is {}".format(item["label"]))
        num_iter += 1

    logger.info("Number of data in data1: {}".format(num_iter))
    assert num_iter == 12