Exemplo n.º 1
0
    def update_product(
            self,
            insight: ProductInsight,
            auth: Optional[OFFAuthentication] = None) -> AnnotationResult:
        emb_code: str = insight.value

        product = get_product(insight.barcode, ["emb_codes"])

        if product is None:
            return MISSING_PRODUCT_RESULT

        emb_codes_str: str = product.get("emb_codes", "")

        emb_codes: List[str] = []
        if emb_codes_str:
            emb_codes = emb_codes_str.split(",")

        if self.already_exists(emb_code, emb_codes):
            return ALREADY_ANNOTATED_RESULT

        emb_codes.append(emb_code)
        update_emb_codes(
            insight.barcode,
            emb_codes,
            server_domain=insight.server_domain,
            insight_id=insight.id,
            auth=auth,
        )
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 2
0
def updated_product_update_insights(barcode: str):
    product_dict = get_product(barcode)

    if product_dict is None:
        logger.warn("Updated product does not exist: {}".format(barcode))
        return

    category_added = updated_product_add_category_insight(
        barcode, product_dict)

    if category_added:
        logger.info("Product {} updated".format(barcode))

    product = Product(product_dict)
    validators: Dict[str, InsightValidator] = {}

    for insight in (ProductInsight.select().where(
            ProductInsight.annotation.is_null(),
            ProductInsight.barcode == barcode).iterator()):
        if insight.type not in validators:
            validators[insight.type] = InsightValidatorFactory.create(
                insight.type, None)

        validator = validators[insight.type]
        insight_deleted = delete_invalid_insight(insight,
                                                 validator=validator,
                                                 product=product)
        if insight_deleted:
            logger.info("Insight {} deleted (type: {})".format(
                insight.id, insight.type))
Exemplo n.º 3
0
    def update_product(
            self,
            insight: ProductInsight,
            session_cookie: Optional[str] = None) -> AnnotationResult:
        emb_code: str = insight.data['text']

        product = get_product(insight.barcode, ['emb_codes'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        emb_codes_str: str = product.get('emb_codes', '')

        emb_codes: List[str] = []
        if emb_codes_str:
            emb_codes = emb_codes_str.split(',')

        if self.already_exists(emb_code, emb_codes):
            return ALREADY_ANNOTATED_RESULT

        emb_codes.append(emb_code)
        update_emb_codes(insight.barcode,
                         emb_codes,
                         insight_id=insight.id,
                         session_cookie=session_cookie)
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 4
0
def is_automatically_processable(insight: ProductInsight,
                                 max_timedelta: datetime.timedelta) -> bool:
    if not insight.source_image:
        return False

    image_path = pathlib.Path(insight.source_image)
    image_id = image_path.stem

    if not image_id.isdigit():
        return False

    product = get_product(insight.barcode, fields=["images"])

    if product is None:
        logger.info("Missing product: {}".format(insight.barcode))
        raise InvalidInsight()

    if "images" not in product:
        logger.info("No images for product {}".format(insight.barcode))
        raise InvalidInsight()

    product_images = product["images"]

    if image_id not in product_images:
        logger.info("Missing image for product {}, ID: {}".format(
            insight.barcode, image_id))
        raise InvalidInsight()

    if is_recent_image(product_images, image_id, max_timedelta):
        return True

    if is_selected_image(product_images, image_id):
        return True

    return False
Exemplo n.º 5
0
def updated_product_update_insights(barcode: str):
    product = get_product(barcode)

    if product is None:
        logger.warn("Updated product does not exist: {}".format(barcode))

    category_added = updated_product_add_category_insight(barcode, product)

    if category_added:
        logger.info("Product {} updated".format(barcode))
Exemplo n.º 6
0
    def predict_from_barcode(
            self,
            barcode: str,
            deepest_only: bool = False) -> Optional[List[CategoryPrediction]]:
        product = get_product(barcode,
                              fields=["product_name", "ingredients_tags"])

        if product is None:
            logger.info("Product {} not found".format(barcode))
            return

        return self.predict_from_product(product, deepest_only=deepest_only)
Exemplo n.º 7
0
    def on_get(self, req: falcon.Request, resp: falcon.Response):
        barcode = req.get_param("barcode", required=True)
        deepest_only = req.get_param_as_bool("deepest_only", default=False)

        categories = []

        product = get_product(barcode)
        if product:
            predictions = CategoryClassifier(
                get_taxonomy(TaxonomyType.category.name)
            ).predict(product, deepest_only)
            categories = [p.to_dict() for p in predictions]

        resp.media = {"categories": categories}
Exemplo n.º 8
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        product = get_product(insight.barcode, ['labels_tags'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        labels_tags: List[str] = product.get('labels_tags') or []

        if insight.value_tag in labels_tags:
            return ALREADY_ANNOTATED_RESULT

        add_label_tag(insight.barcode, insight.value_tag)

        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 9
0
    def update_product(
            self,
            insight: ProductInsight,
            session_cookie: Optional[str] = None) -> AnnotationResult:
        if not product_exists(insight.barcode):
            return MISSING_PRODUCT_RESULT

        barcode = insight.barcode

        try:
            product_ingredient: ProductIngredient = (
                ProductIngredient.select().where(
                    ProductIngredient.barcode == barcode).get())
        except ProductIngredient.DoesNotExist:
            logger.warning("Missing product ingredient for product "
                           "{}".format(barcode))
            return AnnotationResult(status="error_no_matching_ingredient",
                                    description="no ingredient is associated "
                                    "with insight (internal error)")

        ingredient_str = product_ingredient.ingredients
        product = get_product(barcode, fields=["ingredients_text"])

        if product is None:
            logger.warning("Missing product: {}".format(barcode))
            return MISSING_PRODUCT_RESULT

        expected_ingredients = product.get("ingredients_text")

        if expected_ingredients != ingredient_str:
            logger.warning("ingredients have changed since spellcheck insight "
                           "creation (product {})".format(barcode))
            return AnnotationResult(
                status=AnnotationStatus.error_updated_product.name,
                description="the ingredient list has been "
                "updated since spellcheck")

        full_correction = self.generate_full_correction(
            ingredient_str, insight.data['start_offset'],
            insight.data['end_offset'], insight.data['correction'])
        save_ingredients(barcode,
                         full_correction,
                         insight_id=insight.id,
                         session_cookie=session_cookie)
        self.update_related_insights(insight)

        product_ingredient.ingredients = full_correction
        product_ingredient.save()
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 10
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        product = get_product(insight.barcode, ['quantity'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        quantity: Optional[str] = product.get('quantity') or None

        if quantity is not None:
            return ALREADY_ANNOTATED_RESULT

        weight = insight.data['text']
        update_quantity(insight.barcode, weight)

        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 11
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        expiration_date: str = insight.data['text']

        product = get_product(insight.barcode, ['expiration_date'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        current_expiration_date = product.get('expiration_date') or None

        if current_expiration_date:
            return ALREADY_ANNOTATED_RESULT

        update_expiration_date(insight.barcode, expiration_date)
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 12
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        product = get_product(insight.barcode, ['categories_tags'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        categories_tags: List[str] = product.get('categories_tags') or []

        if insight.value_tag in categories_tags:
            return ALREADY_ANNOTATED_RESULT

        category_tag = insight.value_tag
        add_category(insight.barcode, category_tag)

        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 13
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        brand: str = insight.data['brand']

        product = get_product(insight.barcode, ['brands_tags'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        brand_tags: List[str] = product.get('brands_tags') or []

        if brand_tags:
            # For now, don't annotate if a brand has already been provided
            return ALREADY_ANNOTATED_RESULT

        add_brand(insight.barcode, brand)
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 14
0
    def update_product(self, insight: ProductInsight) -> AnnotationResult:
        store: str = insight.data['store']
        store_tag: str = insight.value_tag

        product = get_product(insight.barcode, ['stores_tags'])

        if product is None:
            return MISSING_PRODUCT_RESULT

        stores_tags: List[str] = product.get('stores_tags') or []

        if store_tag in stores_tags:
            return ALREADY_ANNOTATED_RESULT

        add_store(insight.barcode, store)
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 15
0
def correct_ingredient(
    country: str,
    ingredient: str,
    pattern: str,
    correction: str,
    comment: str,
    auth: OFFAuthentication,
    dry_run: bool = False,
):
    if dry_run:
        print("*** Dry run ***")

    ingredient_field = "ingredients_text_{}".format(country)
    products = list(iter_products(country, ingredient))
    print("{} products".format(len(products)))
    re_patterns = get_patterns(pattern, correction)

    for product in products:
        barcode = product.get("code")
        print(
            "Fixing {}/product/{}".format(
                BaseURLProvider().country(country).get(), barcode
            )
        )
        product = get_product(barcode, fields=[ingredient_field])

        if product is None:
            print("Product not found: {}".format(barcode))
            continue

        ingredients = product[ingredient_field]

        corrected = generate_correction(ingredients, re_patterns)

        if ingredients == corrected:
            print("No modification after correction, skipping")
            continue

        else:
            print(ingredients)
            print(corrected)
            print("-" * 15)

            if not dry_run:
                save_ingredients(
                    barcode, corrected, lang=country, comment=comment, auth=auth
                )
Exemplo n.º 16
0
    def update_product(
            self,
            insight: ProductInsight,
            auth: Optional[OFFAuthentication] = None) -> AnnotationResult:
        product = get_product(insight.barcode, ["brands_tags"])

        if product is None:
            return MISSING_PRODUCT_RESULT

        add_brand(
            insight.barcode,
            insight.value,
            insight_id=insight.id,
            server_domain=insight.server_domain,
            auth=auth,
        )
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 17
0
    def spellcheck(self, req: falcon.Request, resp: falcon.Response):
        text = req.get_param("text")
        if text is None:
            barcode = req.get_param("barcode")
            if barcode is None:
                raise falcon.HTTPBadRequest("text or barcode is required.")

            product = get_product(barcode) or {}
            text = product.get("ingredients_text_fr")
            if text is None:
                resp.media = {"status": "not_found"}
                return

        index_name = req.get_param("index", default="product_all")
        confidence = req.get_param_as_float("confidence", default=0.5)
        pipeline = req.get_param_as_list("pipeline") or None
        safe = req.get_param_as_bool("safe", blank_as_true=False)

        if safe is not None and pipeline:
            raise falcon.HTTPBadRequest(
                "pipeline and safe parameters cannot be used together"
            )

        if pipeline:
            for item in pipeline:
                if item not in SPELLCHECKERS:
                    raise falcon.HTTPBadRequest(f"unknown pipeline item: {item}")
        elif safe:
            pipeline = ["patterns", "percentages", "vocabulary"]

        spellchecker = Spellchecker.load(
            client=es_client,
            pipeline=pipeline,
            index_name=index_name,
            confidence=confidence,
        )
        correction_item = spellchecker.correct(text)

        resp.media = {
            "text": text,
            "corrected": correction_item.latest_correction,
            "corrections": correction_item.corrections,
        }
Exemplo n.º 18
0
    def update_product(
            self,
            insight: ProductInsight,
            auth: Optional[OFFAuthentication] = None) -> AnnotationResult:
        product = get_product(insight.barcode, ["expiration_date"])

        if product is None:
            return MISSING_PRODUCT_RESULT

        current_expiration_date = product.get("expiration_date") or None

        if current_expiration_date:
            return ALREADY_ANNOTATED_RESULT

        update_expiration_date(
            insight.barcode,
            insight.value,
            insight_id=insight.id,
            server_domain=insight.server_domain,
            auth=auth,
        )
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 19
0
    def update_product(
            self,
            insight: ProductInsight,
            auth: Optional[OFFAuthentication] = None) -> AnnotationResult:
        product = get_product(insight.barcode, ["stores_tags"])

        if product is None:
            return MISSING_PRODUCT_RESULT

        stores_tags: List[str] = product.get("stores_tags") or []

        if insight.value_tag in stores_tags:
            return ALREADY_ANNOTATED_RESULT

        add_store(
            insight.barcode,
            insight.value,
            insight_id=insight.id,
            server_domain=insight.server_domain,
            auth=auth,
        )
        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 20
0
def get_random_insight(insight_type: str = None,
                       country: str = None) -> Optional[ProductInsight]:
    attempts = 0
    while True:
        attempts += 1

        if attempts > 4:
            return None

        query = ProductInsight.select()
        where_clauses = [ProductInsight.annotation.is_null()]

        if country is not None:
            where_clauses.append(ProductInsight.countries.contains(
                country))

        if insight_type is not None:
            where_clauses.append(ProductInsight.type ==
                                 insight_type)

        query = query.where(*where_clauses).order_by(peewee.fn.Random())

        insight_list = list(query.limit(1))

        if not insight_list:
            return None

        insight = insight_list[0]
        # We only need to know if the product exists, so fetching barcode
        # is enough
        product = get_product(insight.barcode, ['code'])

        # Product may be None if not found
        if product:
            return insight
        else:
            insight.delete_instance()
            logger.info("Product not found, insight deleted")
Exemplo n.º 21
0
    def update_product(
            self,
            insight: ProductInsight,
            auth: Optional[OFFAuthentication] = None) -> AnnotationResult:
        product = get_product(insight.barcode, ["quantity"])

        if product is None:
            return MISSING_PRODUCT_RESULT

        quantity: Optional[str] = product.get("quantity") or None

        if quantity is not None:
            return ALREADY_ANNOTATED_RESULT

        update_quantity(
            insight.barcode,
            insight.value,
            insight_id=insight.id,
            server_domain=insight.server_domain,
            auth=auth,
        )

        return UPDATED_ANNOTATION_RESULT
Exemplo n.º 22
0
    def get_source_image_url(barcode: str) -> Optional[str]:
        product: Optional[JSONType] = get_product(barcode,
                                                  fields=["selected_images"])

        if product is None:
            return None

        if "selected_images" not in product:
            return None

        selected_images = product["selected_images"]

        for key in ("front", "ingredients", "nutrition"):
            if key in selected_images:
                images = selected_images[key]

                if "display" in images:
                    display_images = list(images["display"].values())

                    if display_images:
                        return display_images[0]

        return None
Exemplo n.º 23
0
    def get_source_image_url(barcode: str) -> Optional[str]:
        product: Optional[JSONType] = get_product(barcode,
                                                  fields=['selected_images'])

        if product is None:
            return None

        if 'selected_images' not in product:
            return None

        selected_images = product['selected_images']

        if 'front' not in selected_images:
            return None

        front_images = selected_images['front']

        if 'display' in front_images:
            display_images = list(front_images['display'].values())

            if display_images:
                return display_images[0]

        return None
Exemplo n.º 24
0
    def predict_from_barcode(
            self, barcode: str) -> Optional[List[CategoryPrediction]]:
        if not self.loaded:
            self.load()

        product = get_product(barcode,
                              fields=["product_name", "ingredients_tags"])

        if product is None:
            logger.info("Product {} not found".format(barcode))
            return

        X = self.get_input_from_products([product])[0]
        X = [X[0].tolist(), X[1].tolist()]

        data = {"signature_name": "serving_default", "instances": [X]}

        r = http_session.post("{}/{}:predict".format(TF_SERVING_BASE_URL,
                                                     self.NAME),
                              json=data)
        r.raise_for_status()
        response = r.json()

        return response
category_to_id = load_category_vocabulary(model_dir)
ingredient_to_id = load_ingredient_vocabulary(model_dir)
category_names = [
    category for category, _ in sorted(category_to_id.items(),
                                       key=operator.itemgetter(1))
]

nlp = get_nlp(config.lang)

product_name_vocabulary = load_product_name_vocabulary(model_dir)
model = keras.models.load_model(str(model_path))

while True:
    barcode = input("barcode: ").strip()
    product = get_product(barcode, fields=["product_name", "ingredients_tags"])

    if product is None:
        print("Product {} not found".format(barcode))
        continue

    X = generate_data(
        product=product,
        ingredient_to_id=ingredient_to_id,
        product_name_token_to_int=product_name_vocabulary,
        nlp=nlp,
        product_name_max_length=config.model_config.product_name_max_length,
        product_name_preprocessing_config=config.
        product_name_preprocessing_config,
    )