Exemplo n.º 1
0
def main():
    logging.basicConfig(filename='preproces_dateset.log', level=logging.DEBUG)

    logging.info("Scanning content of dataset")
    content = file.scan_content(config.set_path)

    logging.info("Dividing data into groups")
    divided_content = divide_dataset.divide(content)

    bounding_boxes = BoundingBox.get_bounding_boxes(config.bounding_boxes_path)
    logging.info("Starting image preprocessing")
    counter = 0
    for key in divided_content.keys():
        augment = False
        if key == "training":
            augment = True
            logging.info("Training data will be augmented.")
        database = h5py.File(config.get_convolution_datasets_path(key), 'w')
        for (cls_name, img_name) in divided_content[key]:
            counter += 1
            if counter % config.take_every_nth_sample != 0:
                continue

            if cls_name not in database.keys():
                database.create_group(cls_name)
            cls_path = file.add_folder(config.set_path, cls_name)
            if augment:
                augmented_data = augment_images(
                    load_and_preprocess_img(cls_path, img_name,
                                            bounding_boxes))
                augmented_files = [
                    file.remove_extension(img_name) + "_" + str(i)
                    for i in range(len(augmented_data))
                ]
                for img, name in zip(augmented_data, augmented_files):
                    database[cls_name].create_dataset(name, data=img)
            else:
                database[cls_name].create_dataset(
                    file.remove_extension(img_name),
                    data=load_and_preprocess_img(cls_path, img_name,
                                                 bounding_boxes))
        database.close()
    logging.info("Image loading finished")
        for photo in split_ids[group_name]['data']:
            class_name = photo[0]
            if class_name not in group_db.keys():
                group_db.create_group(class_name)
            for i in range(0, config.data_multiplication_factor):
                photo_name = photo[1] + "_" + str(i)
                group_db[class_name].create_dataset(
                    photo_name, data=features_db[class_name][photo_name])
        group_db.close()


if __name__ == "__main__":
    logging.basicConfig(filename="sift.log", level=logging.DEBUG)

    features_db = h5py.File(config.features_db_path, "w")
    bounding_boxes = BoundingBox.get_bounding_boxes(config.bounding_boxes_path)

    counter = 0
    logging.info("Starting extraction")
    for class_path in file.gen_subdir_path(config.set_path):
        class_descriptors = features_db.create_group(
            file.get_folder(class_path))
        for photo_path, photo_name in file.gen_file_path(class_path):
            counter += 1
            if counter % config.take_every_nth_sample != 0:
                continue
            # removes file extension
            photo_name_hash = file.remove_extension(photo_name)
            bb = bounding_boxes[photo_name_hash]
            photo_desc = execute_sift_extraction(photo_path, bb, 1)
            for i, pic in enumerate(photo_desc):