示例#1
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `featdet` or `semseg`
    datapath : str
        Relative path of dataset repository
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]

    instance_args = [
        name,
        model_input_size,
        network,
        batch_size,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset,
                                                      model_input_size)

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. "
            "Please generate a valid dataset before calling the program.")

    output_folder = utils.prepare_output_folder(datapath, dataset,
                                                model_input_size, problem)
    instance_path = os.path.join(output_folder, output_folder["best-instance"])
    dropout, network = utils.recover_instance(instance_path)
    model = init_model(
        problem,
        instance_name,
        model_input_size,
        nb_labels,
        dropout,
        network,
    )
    if os.path.isfile(output_folder["best-model"]):
        model.load_weights(output_folder["best-model"])
        logger.info(
            "Model weights have been recovered from %s",
            output_folder["best-model"],
        )
    else:
        logger.info(
            "No available trained model for this image size with optimized hyperparameters. "
            "The inference will be done on an untrained model")

    y_raw_pred = model.predict(images, batch_size=2, verbose=1)

    result = {}
    if problem == "featdet":
        label_info = [(i["category"], utils.GetHTMLColor(i["color"]))
                      for i in train_config["labels"]]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [(i[0], 100 * round(float(j), 2), i[1])
                                for i, j in zip(label_info, prediction)]
        return result
    elif problem == "semseg":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3),
                                   dtype=np.int8)
        for i in range(nb_labels):
            labelled_images[predicted_labels ==
                            i] = train_config["labels"][i]["color"]
        for predicted_labels, filename in zip(labelled_images,
                                              flattened_image_paths):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(output_dir,
                                                os.path.basename(filename))
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        raise ValueError(
            "Unknown model argument. Please use 'featdet' or 'semseg'.")
示例#2
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    aggregate=False,
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `feature_detection` or
    `semantic_segmentation`
    datapath : str
        Relative path of dataset repository
    aggregate : bool
        Either or not the labels are aggregated
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]
    if dataset == "aerial":
        tile_size = utils.get_tile_size_from_image(model_input_size)
    else:
        tile_size = model_input_size

    aggregate_value = "full" if not aggregate else "aggregated"
    instance_args = [
        name,
        tile_size,
        network,
        batch_size,
        aggregate_value,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(
        datapath, dataset, tile_size, aggregate_value
    )

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        logger.error(
            (
                "There is no training data with the given "
                "parameters. Please generate a valid dataset "
                "before calling the program."
            )
        )
        sys.exit(1)

    if any([arg is None for arg in instance_args]):
        logger.info(
            ("Some arguments are None, " "the best model is considered.")
        )
        output_folder = utils.prepare_output_folder(datapath, dataset, problem)
        instance_filename = (
            "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json"
        )
        instance_path = os.path.join(output_folder, instance_filename)
        dropout, network = utils.recover_instance(instance_path)
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoint_filename = (
            "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5"
        )
        checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
        if os.path.isfile(checkpoint_full_path):
            logger.info("Checkpoint full path : %s", checkpoint_full_path)
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available trained model for this image size"
                    " with optimized hyperparameters. The "
                    "inference will be done on an untrained model"
                )
            )
    else:
        logger.info("All instance arguments are filled out.")
        output_folder = utils.prepare_output_folder(
            datapath, dataset, problem, instance_name
        )
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoints = [
            item
            for item in os.listdir(output_folder)
            if "checkpoint-epoch" in item
        ]
        if len(checkpoints) > 0:
            model_checkpoint = max(checkpoints)
            checkpoint_full_path = os.path.join(
                output_folder, model_checkpoint
            )
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available checkpoint for this configuration. "
                    "The model will be trained from scratch."
                )
            )

    y_raw_pred = model.predict(images)

    result = {}
    if problem == "feature_detection":
        label_info = [
            (i["category"], utils.GetHTMLColor(i["color"]))
            for i in train_config["labels"]
        ]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [
                (i[0], 100 * round(float(j), 2), i[1])
                for i, j in zip(label_info, prediction)
            ]
        return result
    elif problem == "semantic_segmentation":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x
            for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(
            shape=np.append(predicted_labels.shape, 3), dtype=np.int8
        )
        for i in range(nb_labels):
            labelled_images[predicted_labels == i] = train_config["labels"][i][
                "color"
            ]
        for predicted_labels, filename in zip(
            labelled_images, flattened_image_paths
        ):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(
                output_dir, os.path.basename(filename)
            )
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        logger.error(
            (
                "Unknown model argument. Please use "
                "'feature_detection' or 'semantic_segmentation'."
            )
        )
        sys.exit(1)