Exemplo n.º 1
0
def select_deepest_taxonomized_candidates(candidates: List[Prediction],
                                          taxonomy: Taxonomy):
    """Filter predictions to only keep the deepest items in the taxonomy.

    For instance, for a list of category predictions, the prediction with
    `value_tag` 'en:meat' will be removed if a prediction with `value_tag`
    'en:pork' is in the `candidates` list.

    :param candidates: The list of candidates to filter
    :param taxonomy: The taxonomy to use
    """
    value_tags = set()

    for candidate in candidates:
        if candidate.value_tag is None:
            logger.warning("Unexpected None `value_tag` (candidate: %s)",
                           candidate)
        else:
            value_tags.add(candidate.value_tag)

    nodes = [taxonomy[node] for node in value_tags if node in taxonomy]
    selected_node_ids = set(node.id
                            for node in taxonomy.find_deepest_nodes(nodes))
    return [
        candidate for candidate in candidates
        if candidate.value_tag in selected_node_ids
    ]
def get_deepest_categories(
        taxonomy: Taxonomy,
        categories_tags: Iterable[List[str]]) -> List[List[str]]:
    return [
        sorted((x.id for x in taxonomy.find_deepest_nodes(
            [taxonomy[c] for c in categories])))
        for categories in categories_tags
    ]
Exemplo n.º 3
0
    def process_predictions(
        y_pred: np.ndarray,
        category_names: List[str],
        taxonomy: Taxonomy,
        threshold: float = 0.5,
        deepest_only: bool = False,
    ) -> List[List[CategoryPrediction]]:
        y_pred_int = (y_pred > threshold).astype(y_pred.dtype)
        y_pred_int_filled = fill_ancestors(y_pred_int,
                                           taxonomy=taxonomy,
                                           category_names=category_names)

        predicted = []
        for i in range(y_pred_int_filled.shape[0]):
            predicted_categories_ids = y_pred_int_filled[i].nonzero()[0]
            predicted_categories = [
                category_names[id_] for id_ in predicted_categories_ids
            ]

            product_predicted = []
            for predicted_category_id, predicted_category in zip(
                    predicted_categories_ids, predicted_categories):
                confidence = y_pred[i, predicted_category_id]
                product_predicted.append(
                    (predicted_category, float(confidence)))

            product_predicted = sorted(product_predicted,
                                       key=operator.itemgetter(1),
                                       reverse=True)

            if deepest_only:
                category_to_confidence = dict(product_predicted)
                product_predicted = [
                    (x.id, category_to_confidence[x.id])
                    for x in taxonomy.find_deepest_nodes(
                        [taxonomy[c] for c, confidence in product_predicted])
                ]
            predicted.append(product_predicted)

        return predicted
Exemplo n.º 4
0
 def test_find_deepest_nodes(self, taxonomy: Taxonomy,
                             items: List[str],
                             output: List[str]):
     item_nodes = [taxonomy[item] for item in items]
     output_nodes = [taxonomy[o] for o in output]
     assert taxonomy.find_deepest_nodes(item_nodes) == output_nodes