def main(): logging.basicConfig(filename='preproces_dateset.log', level=logging.DEBUG) logging.info("Scanning content of dataset") content = file.scan_content(config.set_path) logging.info("Dividing data into groups") divided_content = divide_dataset.divide(content) bounding_boxes = BoundingBox.get_bounding_boxes(config.bounding_boxes_path) logging.info("Starting image preprocessing") counter = 0 for key in divided_content.keys(): augment = False if key == "training": augment = True logging.info("Training data will be augmented.") database = h5py.File(config.get_convolution_datasets_path(key), 'w') for (cls_name, img_name) in divided_content[key]: counter += 1 if counter % config.take_every_nth_sample != 0: continue if cls_name not in database.keys(): database.create_group(cls_name) cls_path = file.add_folder(config.set_path, cls_name) if augment: augmented_data = augment_images( load_and_preprocess_img(cls_path, img_name, bounding_boxes)) augmented_files = [ file.remove_extension(img_name) + "_" + str(i) for i in range(len(augmented_data)) ] for img, name in zip(augmented_data, augmented_files): database[cls_name].create_dataset(name, data=img) else: database[cls_name].create_dataset( file.remove_extension(img_name), data=load_and_preprocess_img(cls_path, img_name, bounding_boxes)) database.close() logging.info("Image loading finished")
for photo in split_ids[group_name]['data']: class_name = photo[0] if class_name not in group_db.keys(): group_db.create_group(class_name) for i in range(0, config.data_multiplication_factor): photo_name = photo[1] + "_" + str(i) group_db[class_name].create_dataset( photo_name, data=features_db[class_name][photo_name]) group_db.close() if __name__ == "__main__": logging.basicConfig(filename="sift.log", level=logging.DEBUG) features_db = h5py.File(config.features_db_path, "w") bounding_boxes = BoundingBox.get_bounding_boxes(config.bounding_boxes_path) counter = 0 logging.info("Starting extraction") for class_path in file.gen_subdir_path(config.set_path): class_descriptors = features_db.create_group( file.get_folder(class_path)) for photo_path, photo_name in file.gen_file_path(class_path): counter += 1 if counter % config.take_every_nth_sample != 0: continue # removes file extension photo_name_hash = file.remove_extension(photo_name) bb = bounding_boxes[photo_name_hash] photo_desc = execute_sift_extraction(photo_path, bb, 1) for i, pic in enumerate(photo_desc):