def test_cv_file_append_writer():
    """tutorial for cv dataset append writer."""
    writer = FileWriter(CV3_FILE_NAME, 4)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {"file_name": {"type": "string"},
                      "label": {"type": "int64"}, "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data[0:5])
    writer.commit()
    write_append = FileWriter.open_for_append(CV3_FILE_NAME + "0")
    write_append.write_raw_data(data[5:10])
    write_append.commit()
    reader = FileReader(CV3_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()

    paths = ["{}{}".format(CV3_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(4)]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
示例#2
0
def convert_data_to_mindrecord():
    '''Covert data to mindrecord.'''
    print('Loading mindrecord...')
    writer = FileWriter.open_for_append(mindrecord_file_name)

    print('Loading train data...')
    total_data = []
    with open(dataset_txt_file, 'r') as ft:
        lines = ft.readlines()
        for line in lines:
            sline = line.strip().split(" ")
            image_file = sline[0]
            labels = []
            for item in sline[1:]:
                labels.append(int(item))

            with open(image_file, 'rb') as f:
                img = f.read()

            data = {"image": img, "label": np.array(labels, dtype='int32')}

            total_data.append(data)

    print('Writing train data to mindrecord...')
    if total_data is None:
        raise ValueError("None needs writing to mindrecord.")
    writer.write_raw_data(total_data)
    writer.commit()
def convert_yolo_data_to_mindrecord():
    '''convert_yolo_data_to_mindrecord'''

    print('Loading mindrecord...')
    writer = FileWriter.open_for_append(mindrecord_file_name, )

    print('Loading train data...')
    image_files, anno_files = prepare_file_paths()
    dataset_size = len(anno_files)
    assert dataset_size == len(image_files)
    logger.info("#size of dataset: {}".format(dataset_size))
    data = []
    for i in range(dataset_size):
        data.append(get_data(image_files[i], anno_files[i]))

    print('Writing train data to mindrecord...')
    if data is None:
        raise ValueError("None needs writing to mindrecord.")
    writer.write_raw_data(data)
    writer.commit()