def recover_image_info(dataset, filename): """Recover image full name on the application as well as corresponding ground-truth labels Parameters ---------- dataset : str filename : str Returns ------- dict Dictionary that contains image full names (raw images and labelled version) and label infos """ dataset_code = dataset + "_agg" if dataset == "mapillary" else dataset image_file = os.path.join(dataset_code, "images", filename + ".png") label_file = image_file.replace("images", "labels") if dataset == "mapillary" or dataset == "mapillary_agg": image_file = image_file.replace(".png", ".jpg") server_label_filename = os.path.join(app.static_folder, label_file) server_label_image = np.array(Image.open(server_label_filename)) if dataset == "mapillary": size_aggregation = "400_aggregated" elif dataset == "aerial": size_aggregation = "250_full" elif dataset == "tanzania": size_aggregation = "512_full" elif dataset == "shapes": size_aggregation = "64_full" else: raise ValueError(("Unknown dataset. Please choose 'mapillary', " "'aerial', 'tanzania' or 'shapes'.")) with open( os.path.join( "data", dataset, "preprocessed", size_aggregation, "validation.json", )) as fobj: config = json.load(fobj) if not dataset == "aerial": actual_labels = np.unique(server_label_image.reshape([-1, 3]), axis=0).tolist() else: actual_labels = np.unique(server_label_image).tolist() printed_labels = [(item["category"], utils.GetHTMLColor(item["color"])) for item in config["labels"] if item["color"] in actual_labels] return { "image_file": image_file, "label_file": label_file, "labels": printed_labels, }
def summarize_config(config): """Extract and reshape dataset configuration information in a HTML-printing context Parameters ---------- config : dict Dataset label configuration Returns ------- dict Simplified dataset configuration for HTML-printing purpose """ return [(c["category"], utils.GetHTMLColor(c["color"])) for c in config]
def predict( filenames, dataset, problem, datapath="./data", name=None, network=None, batch_size=None, dropout=None, learning_rate=None, learning_rate_decay=None, output_dir="/tmp/deeposlandia/predicted", ): """Make label prediction on image indicated by ̀filename`, according to considered `problem` Parameters ---------- filenames : str Name of the image files on the file system dataset : str Name of the dataset problem : str Name of the considered model, either `featdet` or `semseg` datapath : str Relative path of dataset repository name : str Name of the saved network network : str Name of the chosen architecture, either `simple`, `vgg` or `inception` batch_size : integer Batch size used for training the model dropout : float Dropout rate used for training the model learning_rate : float Learning rate used for training the model learning_rate_decay : float Learning rate decay used for training the model output_dir : str Path of the output directory, where labelled images will be stored (useful only if `problem=semantic_segmentation`) Returns ------- dict Double predictions (between 0 and 1, acts as percentages) regarding each labels """ # `image_paths` is first got as # [[image1, ..., image_i], [image_j, ..., image_n]] image_paths = [glob.glob(f) for f in filenames] # then it is flattened to get a simple list flattened_image_paths = sum(image_paths, []) images = extract_images(flattened_image_paths) model_input_size = images.shape[1] instance_args = [ name, model_input_size, network, batch_size, dropout, learning_rate, learning_rate_decay, ] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset, model_input_size) if os.path.isfile(prepro_folder["training_config"]): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [ x["id"] for x in train_config["labels"] if x["is_evaluate"] ] nb_labels = len(label_ids) else: raise FileNotFoundError( "There is no training data with the given parameters. " "Please generate a valid dataset before calling the program.") output_folder = utils.prepare_output_folder(datapath, dataset, model_input_size, problem) instance_path = os.path.join(output_folder, output_folder["best-instance"]) dropout, network = utils.recover_instance(instance_path) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) if os.path.isfile(output_folder["best-model"]): model.load_weights(output_folder["best-model"]) logger.info( "Model weights have been recovered from %s", output_folder["best-model"], ) else: logger.info( "No available trained model for this image size with optimized hyperparameters. " "The inference will be done on an untrained model") y_raw_pred = model.predict(images, batch_size=2, verbose=1) result = {} if problem == "featdet": label_info = [(i["category"], utils.GetHTMLColor(i["color"])) for i in train_config["labels"]] for filename, prediction in zip(flattened_image_paths, y_raw_pred): result[filename] = [(i[0], 100 * round(float(j), 2), i[1]) for i, j in zip(label_info, prediction)] return result elif problem == "semseg": os.makedirs(output_dir, exist_ok=True) predicted_labels = np.argmax(y_raw_pred, axis=3) encountered_labels = np.unique(predicted_labels) meaningful_labels = [ x for i, x in enumerate(train_config["labels"]) if i in encountered_labels ] labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3), dtype=np.int8) for i in range(nb_labels): labelled_images[predicted_labels == i] = train_config["labels"][i]["color"] for predicted_labels, filename in zip(labelled_images, flattened_image_paths): predicted_image = Image.fromarray(predicted_labels, "RGB") filename = filename.replace(".jpg", ".png") predicted_image_path = os.path.join(output_dir, os.path.basename(filename)) predicted_image.save(predicted_image_path) result[filename] = os.path.basename(filename) return { "labels": summarize_config(meaningful_labels), "label_images": result, } else: raise ValueError( "Unknown model argument. Please use 'featdet' or 'semseg'.")
def predict( filenames, dataset, problem, datapath="./data", aggregate=False, name=None, network=None, batch_size=None, dropout=None, learning_rate=None, learning_rate_decay=None, output_dir="/tmp/deeposlandia/predicted", ): """Make label prediction on image indicated by ̀filename`, according to considered `problem` Parameters ---------- filenames : str Name of the image files on the file system dataset : str Name of the dataset problem : str Name of the considered model, either `feature_detection` or `semantic_segmentation` datapath : str Relative path of dataset repository aggregate : bool Either or not the labels are aggregated name : str Name of the saved network network : str Name of the chosen architecture, either `simple`, `vgg` or `inception` batch_size : integer Batch size used for training the model dropout : float Dropout rate used for training the model learning_rate : float Learning rate used for training the model learning_rate_decay : float Learning rate decay used for training the model output_dir : str Path of the output directory, where labelled images will be stored (useful only if `problem=semantic_segmentation`) Returns ------- dict Double predictions (between 0 and 1, acts as percentages) regarding each labels """ # `image_paths` is first got as # [[image1, ..., image_i], [image_j, ..., image_n]] image_paths = [glob.glob(f) for f in filenames] # then it is flattened to get a simple list flattened_image_paths = sum(image_paths, []) images = extract_images(flattened_image_paths) model_input_size = images.shape[1] if dataset == "aerial": tile_size = utils.get_tile_size_from_image(model_input_size) else: tile_size = model_input_size aggregate_value = "full" if not aggregate else "aggregated" instance_args = [ name, tile_size, network, batch_size, aggregate_value, dropout, learning_rate, learning_rate_decay, ] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder( datapath, dataset, tile_size, aggregate_value ) if os.path.isfile(prepro_folder["training_config"]): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [ x["id"] for x in train_config["labels"] if x["is_evaluate"] ] nb_labels = len(label_ids) else: logger.error( ( "There is no training data with the given " "parameters. Please generate a valid dataset " "before calling the program." ) ) sys.exit(1) if any([arg is None for arg in instance_args]): logger.info( ("Some arguments are None, " "the best model is considered.") ) output_folder = utils.prepare_output_folder(datapath, dataset, problem) instance_filename = ( "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json" ) instance_path = os.path.join(output_folder, instance_filename) dropout, network = utils.recover_instance(instance_path) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) checkpoint_filename = ( "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5" ) checkpoint_full_path = os.path.join(output_folder, checkpoint_filename) if os.path.isfile(checkpoint_full_path): logger.info("Checkpoint full path : %s", checkpoint_full_path) model.load_weights(checkpoint_full_path) logger.info( "Model weights have been recovered from %s", checkpoint_full_path, ) else: logger.info( ( "No available trained model for this image size" " with optimized hyperparameters. The " "inference will be done on an untrained model" ) ) else: logger.info("All instance arguments are filled out.") output_folder = utils.prepare_output_folder( datapath, dataset, problem, instance_name ) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) checkpoints = [ item for item in os.listdir(output_folder) if "checkpoint-epoch" in item ] if len(checkpoints) > 0: model_checkpoint = max(checkpoints) checkpoint_full_path = os.path.join( output_folder, model_checkpoint ) model.load_weights(checkpoint_full_path) logger.info( "Model weights have been recovered from %s", checkpoint_full_path, ) else: logger.info( ( "No available checkpoint for this configuration. " "The model will be trained from scratch." ) ) y_raw_pred = model.predict(images) result = {} if problem == "feature_detection": label_info = [ (i["category"], utils.GetHTMLColor(i["color"])) for i in train_config["labels"] ] for filename, prediction in zip(flattened_image_paths, y_raw_pred): result[filename] = [ (i[0], 100 * round(float(j), 2), i[1]) for i, j in zip(label_info, prediction) ] return result elif problem == "semantic_segmentation": os.makedirs(output_dir, exist_ok=True) predicted_labels = np.argmax(y_raw_pred, axis=3) encountered_labels = np.unique(predicted_labels) meaningful_labels = [ x for i, x in enumerate(train_config["labels"]) if i in encountered_labels ] labelled_images = np.zeros( shape=np.append(predicted_labels.shape, 3), dtype=np.int8 ) for i in range(nb_labels): labelled_images[predicted_labels == i] = train_config["labels"][i][ "color" ] for predicted_labels, filename in zip( labelled_images, flattened_image_paths ): predicted_image = Image.fromarray(predicted_labels, "RGB") filename = filename.replace(".jpg", ".png") predicted_image_path = os.path.join( output_dir, os.path.basename(filename) ) predicted_image.save(predicted_image_path) result[filename] = os.path.basename(filename) return { "labels": summarize_config(meaningful_labels), "label_images": result, } else: logger.error( ( "Unknown model argument. Please use " "'feature_detection' or 'semantic_segmentation'." ) ) sys.exit(1)