Exemplo n.º 1
0
def run_import_image_job(
    barcode: str, image_url: str, ocr_url: str, server_domain: str
):
    logger.info(
        f"Running `import_image` for product {barcode} ({server_domain}), image {image_url}"
    )
    image = get_image_from_url(image_url, error_raise=False, session=http_session)

    if image is None:
        return

    source_image = get_source_from_url(image_url)

    product = get_product_store()[barcode]
    if product is None:
        logger.warning(
            "Product %s does not exist during image import (%s)", barcode, source_image
        )
        return

    with db:
        with db.atomic():
            save_image(barcode, source_image, product, server_domain)
            import_insights_from_image(
                barcode, image, source_image, ocr_url, server_domain
            )
        with db.atomic():
            # Launch object detection in a new SQL transaction
            run_object_detection(barcode, image, source_image, server_domain)
Exemplo n.º 2
0
def import_image(barcode: str, image_url: str, ocr_url: str,
                 server_domain: str):
    logger.info("Detect insights for product {}, "
                "image {}".format(barcode, image_url))
    product_store = get_product_store()
    insights_all = get_insights_from_image(barcode, image_url, ocr_url)

    if insights_all is None:
        return

    for insight_type, insights in insights_all.items():
        if insight_type == InsightType.image_flag.name:
            handle_image_flag_insights(insights)
            continue

        logger.info("Extracting {}".format(insight_type))
        importer: InsightImporter = InsightImporterFactory.create(
            insight_type, product_store)

        with db.atomic():
            imported = importer.import_insights([insights],
                                                server_domain=server_domain,
                                                automatic=True)
            logger.info(
                "Import finished, {} insights imported".format(imported))
Exemplo n.º 3
0
def mark_insights():
    marked = 0
    with db:
        with db.atomic():
            for insight in (
                ProductInsight.select()
                .where(
                    ProductInsight.automatic_processing == True,  # noqa: E712
                    ProductInsight.latent == False,  # noqa: E712
                    ProductInsight.process_after.is_null(),
                    ProductInsight.annotation.is_null(),
                )
                .iterator()
            ):
                logger.info(
                    "Marking insight {} as processable automatically "
                    "(product: {})".format(insight.id, insight.barcode)
                )
                insight.process_after = datetime.datetime.utcnow() + datetime.timedelta(
                    minutes=10
                )
                insight.save()
                marked += 1

    logger.info("{} insights marked".format(marked))
Exemplo n.º 4
0
def refresh_insights(with_deletion: bool = False):
    deleted = 0
    updated = 0
    product_store = CACHED_PRODUCT_STORE.get()

    datetime_threshold = datetime.datetime.utcnow().replace(hour=0,
                                                            minute=0,
                                                            second=0,
                                                            microsecond=0)
    dataset_datetime = datetime.datetime.fromtimestamp(
        os.path.getmtime(settings.JSONL_MIN_DATASET_PATH))

    if dataset_datetime.date() != datetime_threshold.date():
        logger.warn(
            "Dataset version is not up to date, aborting insight removal job")
        return

    validators: Dict[str, InsightValidator] = {}

    with db:
        with db.atomic():
            for insight in (ProductInsight.select().where(
                    ProductInsight.annotation.is_null(),
                    ProductInsight.timestamp <= datetime_threshold,
                    ProductInsight.server_domain == settings.OFF_SERVER_DOMAIN,
            ).iterator()):
                product: Product = product_store[insight.barcode]

                if product is None:
                    if with_deletion:
                        # Product has been deleted from OFF
                        logger.info("Product with barcode {} deleted"
                                    "".format(insight.barcode))
                        deleted += 1
                        insight.delete_instance()
                else:
                    if insight.type not in validators:
                        validators[
                            insight.type] = InsightValidatorFactory.create(
                                insight.type, product_store)

                    validator = validators[insight.type]
                    insight_deleted = delete_invalid_insight(
                        insight, validator)

                    if insight_deleted:
                        deleted += 1
                        logger.info(
                            "invalid insight {} (type: {}), deleting..."
                            "".format(insight.id, insight.type))
                        continue

                    insight_updated = update_insight_attributes(
                        product, insight)

                    if insight_updated:
                        updated += 1

    logger.info("{} insights deleted".format(deleted))
    logger.info("{} insights updated".format(updated))
Exemplo n.º 5
0
    def annotate(
        self,
        insight: ProductInsight,
        annotation: int,
        update=True,
        auth: Optional[OFFAuthentication] = None,
    ) -> AnnotationResult:
        username: Optional[str] = None
        if auth is not None:
            username = auth.username

            if auth.session_cookie:
                username = extract_username(auth.session_cookie)

        with db.atomic():
            insight.annotation = annotation
            insight.completed_at = datetime.datetime.utcnow()
            insight.save()

            if username:
                UserAnnotation.create(insight=insight, username=username)

        if annotation == 1 and update:
            return self.update_product(insight, auth=auth)

        return SAVED_ANNOTATION_RESULT
Exemplo n.º 6
0
def import_image(barcode: str, image_url: str, ocr_url: str,
                 server_domain: str):
    logger.info("Detect insights for product {}, "
                "image {}".format(barcode, image_url))
    product_store = get_product_store()
    product = product_store[barcode]
    save_image(barcode, image_url, product, server_domain)
    launch_object_detection_job(barcode, image_url, server_domain)
    insights_all = get_insights_from_image(barcode, image_url, ocr_url)

    for insight_type, insights in insights_all.items():
        if insight_type == InsightType.image_flag:
            notify_image_flag(
                insights.insights,
                insights.source_image,  # type: ignore
                insights.barcode,
            )
            continue

        logger.info("Extracting {}".format(insight_type.name))
        importer: BaseInsightImporter = InsightImporterFactory.create(
            insight_type, product_store)

        with db.atomic():
            imported = importer.import_insights([insights],
                                                server_domain=server_domain,
                                                automatic=True)
            logger.info(
                "Import finished, {} insights imported".format(imported))
Exemplo n.º 7
0
def delete_product_insights(barcode: str):
    logger.info("Product {} deleted, deleting associated "
                "insights...".format(barcode))
    with db.atomic():
        deleted = (ProductInsight.delete()
                   .where(ProductInsight.barcode == barcode).execute())

    logger.info("{} insights deleted".format(deleted))
Exemplo n.º 8
0
def import_insights(insight_type: str, items: List[str]):
    product_store = CACHED_PRODUCT_STORE.get()
    importer: InsightImporter = InsightImporterFactory.create(
        insight_type, product_store)

    with db.atomic():
        imported = importer.import_insights((json.loads(l) for l in items),
                                            automatic=False)
        logger.info("Import finished, {} insights imported".format(imported))
Exemplo n.º 9
0
def import_insights(insight_type: str, items: List[str], server_domain: str):
    product_store = get_product_store()
    importer: InsightImporter = InsightImporterFactory.create(
        insight_type, product_store)

    with db.atomic():
        imported = importer.import_insights((json.loads(l) for l in items),
                                            server_domain=server_domain,
                                            automatic=False)
        logger.info("Import finished, {} insights imported".format(imported))
Exemplo n.º 10
0
def delete_product_insights(barcode: str, server_domain: str):
    logger.info("Product {} deleted, deleting associated "
                "insights...".format(barcode))
    with db.atomic():
        deleted = (ProductInsight.delete().where(
            ProductInsight.barcode == barcode,
            ProductInsight.annotation.is_null(),
            ProductInsight.server_domain == server_domain,
        ).execute())

    logger.info("{} insights deleted".format(deleted))
Exemplo n.º 11
0
 def annotate(
     self,
     insight: ProductInsight,
     annotation: int,
     update: bool = True,
     data: Optional[Dict] = None,
     auth: Optional[OFFAuthentication] = None,
     automatic: bool = False,
 ) -> AnnotationResult:
     with db.atomic():
         return self._annotate(insight, annotation, update, data, auth,
                               automatic)
Exemplo n.º 12
0
def run_task(event_type: str, event_kwargs: Dict) -> None:
    if event_type not in EVENT_MAPPING:
        raise ValueError(f"unknown event type: '{event_type}")

    func = EVENT_MAPPING[event_type]

    try:
        # we run task inside transaction to avoid side effects
        with db:
            with db.atomic():
                func(**event_kwargs)
    except Exception as e:
        logger.error(e, exc_info=1)
Exemplo n.º 13
0
    def update_related_insights(insight: ProductInsight):
        diff_len = (len(insight.data['correction']) -
                    len(insight.data['original']))

        if diff_len == 0:
            return

        with db.atomic():
            for other in (ProductInsight.select().where(
                    ProductInsight.barcode == insight.barcode,
                    ProductInsight.id != insight.id, ProductInsight.type ==
                    InsightType.ingredient_spellcheck.name)):
                if insight.data['start_offset'] <= other.data['start_offset']:
                    other.data['start_offset'] += diff_len
                    other.data['end_offset'] += diff_len
                    other.save()
Exemplo n.º 14
0
def import_insights(
    predictions: Iterable[Prediction],
    server_domain: str,
    batch_size: int = 1024,
) -> int:
    product_store = get_product_store()
    imported: int = 0

    prediction_batch: List[Prediction]
    for prediction_batch in chunked(predictions, batch_size):
        with db.atomic():
            imported += import_insights_(
                prediction_batch,
                server_domain,
                automatic=False,
                product_store=product_store,
            )

    return imported
Exemplo n.º 15
0
    def annotate(
        self,
        insight: ProductInsight,
        annotation: int,
        update: bool = True,
        data: Optional[Dict] = None,
        auth: Optional[OFFAuthentication] = None,
        automatic: bool = False,
    ) -> AnnotationResult:
        if insight.latent:
            return LATENT_INSIGHT_RESULT

        with db.atomic() as transaction:
            try:
                return self._annotate(insight, annotation, update, data, auth,
                                      automatic)
            except Exception as e:
                transaction.rollback()
                raise e
Exemplo n.º 16
0
def import_image(barcode: str, image_url: str, ocr_url: str):
    logger.info("Detect insights for product {}, "
                "image {}".format(barcode, image_url))
    product_store = CACHED_PRODUCT_STORE.get()
    insights_all = get_insights_from_image(barcode, image_url, ocr_url)

    if insights_all is None:
        return

    for insight_type, insights in insights_all.items():
        if insight_type == InsightType.image_flag.name:
            notify_image_flag(insights['insights'],
                              insights['source'],
                              insights['barcode'])
            continue

        logger.info("Extracting {}".format(insight_type))
        importer: InsightImporter = InsightImporterFactory.create(insight_type,
                                                                  product_store)

        with db.atomic():
            imported = importer.import_insights([insights], automatic=True)
            logger.info("Import finished, {} insights imported".format(imported))
Exemplo n.º 17
0
def save_annotation(
    insight_id: str,
    annotation: int,
    device_id: str,
    update: bool = True,
    data: Optional[Dict] = None,
    auth: Optional[OFFAuthentication] = None,
    trusted_annotator: bool = False,
) -> AnnotationResult:
    """Saves annotation either by using a single response as ground truth or by using several responses.

    trusted_annotator: defines whether the given annotation comes from an authoritative source (e.g.
    a trusted user), ot whether the annotation should be subject to the voting system.
    """
    try:
        insight: Union[ProductInsight,
                       None] = ProductInsight.get_by_id(insight_id)
    except ProductInsight.DoesNotExist:
        insight = None

    if not insight:
        return UNKNOWN_INSIGHT_RESULT

    if insight.annotation is not None:
        return ALREADY_ANNOTATED_RESULT

    if not trusted_annotator:
        verified: bool = False

        AnnotationVote.create(
            insight_id=insight_id,
            username=auth.get_username() if auth else None,
            value=annotation,
            device_id=device_id,
        )

        with db.atomic() as tx:
            try:
                existing_votes = list(
                    AnnotationVote.select(
                        AnnotationVote.value,
                        peewee.fn.COUNT(
                            AnnotationVote.value).alias("num_votes"),
                    ).where(AnnotationVote.insight_id == insight_id).group_by(
                        AnnotationVote.value).order_by(
                            peewee.SQL("num_votes").desc()))
                insight.n_votes = functools.reduce(
                    lambda sum, row: sum + row.num_votes, existing_votes, 0)
                insight.save()
            except Exception as e:
                tx.rollback()
                raise e

        # If the top annotation has more than 2 votes, consider applying it to the insight.
        if existing_votes[0].num_votes > 2:
            annotation = existing_votes[0].value
            verified = True

        # But first check for the following cases:
        #  1) The 1st place annotation has >2 votes, and the 2nd place annotation has >= 2 votes.
        #  2) 1st place and 2nd place have 2 votes each.
        #
        # In both cases, we consider this an ambiguous result and mark it with 'I don't know'.
        if (existing_votes[0].num_votes >= 2 and len(existing_votes) > 1
                and existing_votes[1].num_votes >= 2):
            # This code credits the last person to contribute a vote with a potentially not their annotation.
            annotation = 0
            verified = True

        if not verified:
            return SAVED_ANNOTATION_VOTE_RESULT

    annotator = InsightAnnotatorFactory.get(insight.type)
    result = annotator.annotate(insight,
                                annotation,
                                update,
                                data=data,
                                auth=auth)
    username = auth.get_username() if auth else "unknown annotator"
    events.event_processor.send_async("question_answered", username, device_id,
                                      insight.barcode)
    return result