def insert_batch(data_path: pathlib.Path, model_name: str, model_version: str) -> int: timestamp = datetime.datetime.utcnow() logger.info("Loading seen set...") seen_set = get_seen_set() logger.info("Seen set loaded") inserted = 0 for item in tqdm.tqdm(jsonl_iter(data_path)): barcode = item["barcode"] source_image = generate_image_path(barcode=barcode, image_id=item["image_id"]) key = (model_name, source_image) if key in seen_set: continue image_instance = ImageModel.get_or_none(source_image=source_image) if image_instance is None: logger.warning("Unknown image in DB: {}".format(source_image)) continue results = [r for r in item["result"] if r["score"] > 0.1] data = {"objects": results} max_confidence = max([r["score"] for r in results], default=None) inserted += 1 image_prediction = ImagePrediction.create( type=TYPE, image=image_instance, timestamp=timestamp, model_name=model_name, model_version=model_version, data=data, max_confidence=max_confidence, ) for i, item in enumerate(results): if item["score"] >= 0.5: LogoAnnotation.create( image_prediction=image_prediction, index=i, score=item["score"], bounding_box=item["bounding_box"], ) seen_set.add(key) return inserted
def on_post(self, req: falcon.Request, resp: falcon.Response): server_domain = req.media.get("server_domain", settings.OFF_SERVER_DOMAIN) annotations = req.media["annotations"] auth = parse_auth(req) username = None if auth is None else auth.get_username() completed_at = datetime.datetime.utcnow() annotated_logos = [] for annotation in annotations: logo_id = annotation["logo_id"] type_ = annotation["type"] value = annotation["value"] or None logo = LogoAnnotation.get_by_id(logo_id) if value is not None: logo.annotation_value = value value_tag = get_tag(value) logo.annotation_value_tag = value_tag logo.taxonomy_value = match_unprefixed_value(value_tag, type_) logo.annotation_type = type_ logo.username = username logo.completed_at = completed_at logo.save() annotated_logos.append(logo) created = generate_insights_from_annotated_logos(annotated_logos, server_domain) resp.media = {"created insights": created}
def on_post(self, req: falcon.Request, resp: falcon.Response): source_value = req.get_param("source_value", required=True) source_type = req.get_param("source_type", required=True) target_value = req.get_param("target_value", required=True) target_type = req.get_param("target_type", required=True) auth = parse_auth(req) username = None if auth is None else auth.get_username() completed_at = datetime.datetime.utcnow() target_value_tag = get_tag(target_value) source_value_tag = get_tag(source_value) taxonomy_value = match_unprefixed_value(target_value_tag, target_type) query = LogoAnnotation.update( { LogoAnnotation.annotation_type: target_type, LogoAnnotation.annotation_value: target_value, LogoAnnotation.annotation_value_tag: target_value_tag, LogoAnnotation.taxonomy_value: taxonomy_value, LogoAnnotation.username: username, LogoAnnotation.completed_at: completed_at, } ).where( LogoAnnotation.annotation_type == source_type, LogoAnnotation.annotation_value_tag == source_value_tag, ) updated = query.execute() resp.media = {"updated": updated}
def export_logo_annotation( output: pathlib.Path, server_domain: Optional[str] = None, annotated: Optional[bool] = None, ): from robotoff.models import db, LogoAnnotation, ImageModel, ImagePrediction from robotoff.utils import dump_jsonl with db: where_clauses = [] if server_domain is not None: where_clauses.append(ImageModel.server_domain == server_domain) if annotated is not None: where_clauses.append( LogoAnnotation.annotation_value.is_null(not annotated)) query = LogoAnnotation.select().join(ImagePrediction).join( ImageModel) if where_clauses: query = query.where(*where_clauses) logo_iter = query.iterator() dict_iter = (l.to_dict() for l in logo_iter) dump_jsonl(output, dict_iter)
def search(self, req: falcon.Request, resp: falcon.Response): count: int = req.get_param_as_int( "count", min_value=1, max_value=2000, default=25 ) type_: Optional[str] = req.get_param("type") barcode: Optional[str] = req.get_param("barcode") value: Optional[str] = req.get_param("value") min_confidence: Optional[float] = req.get_param_as_float("min_confidence") random: bool = req.get_param_as_bool("random", default=False) server_domain: Optional[str] = req.get_param("server_domain") annotated: bool = req.get_param_as_bool("annotated", default=False) where_clauses = [LogoAnnotation.annotation_value.is_null(not annotated)] join_image_prediction = False join_image_model = False if server_domain: where_clauses.append(ImageModel.server_domain == server_domain) join_image_model = True if min_confidence is not None: where_clauses.append(ImagePrediction.max_confidence >= min_confidence) join_image_prediction = True if barcode is not None: where_clauses.append(ImageModel.barcode == barcode) join_image_model = True if type_ is not None: where_clauses.append(LogoAnnotation.annotation_type == type_) if value is not None: value_tag = get_tag(value) where_clauses.append(LogoAnnotation.annotation_value_tag == value_tag) query = LogoAnnotation.select() join_image_prediction = join_image_prediction or join_image_model if join_image_prediction: query = query.join(ImagePrediction) if join_image_model: query = query.join(ImageModel) if where_clauses: query = query.where(*where_clauses) query_count = query.count() if random: query = query.order_by(peewee.fn.Random()) query = query.limit(count) items = [item.to_dict() for item in query.iterator()] for item in items: image_prediction = item.pop("image_prediction") item["image"] = image_prediction["image"] resp.media = {"logos": items, "count": query_count}
def fetch_logos(self, logo_ids: List[str], resp: falcon.Response): logos = [] for logo in (LogoAnnotation.select().join(ImagePrediction).join( ImageModel).where(LogoAnnotation.id.in_(logo_ids)).iterator()): logo_dict = logo.to_dict() image_prediction = logo_dict.pop("image_prediction") logo_dict["image"] = image_prediction["image"] logos.append(logo_dict) resp.media = {"logos": logos, "count": len(logos)}
def on_get(self, req: falcon.Request, resp: falcon.Response, logo_id: int): logo = LogoAnnotation.get_or_none(id=logo_id) if logo is None: resp.status = falcon.HTTP_404 return logo_dict = logo.to_dict() image_prediction = logo_dict.pop("image_prediction") logo_dict["image"] = image_prediction["image"] resp.media = logo_dict
def send_logo_notification(logo: LogoAnnotation, probs: Dict[LogoLabelType, float]): crop_url = logo.get_crop_image_url() prob_text = "\n".join( (f"{label[0]} - {label[1]}: {prob:.2g}" for label, prob in sorted( probs.items(), key=operator.itemgetter(1), reverse=True))) barcode = logo.image_prediction.image.barcode text = ( f"Prediction for <{crop_url}|image> " f"(<https://hunger.openfoodfacts.org/logos?logo_id={logo.id}|annotate logo>, " f"<https://world.openfoodfacts.org/product/{barcode}|product>):\n{prob_text}" ) post_message(text, settings.SLACK_OFF_ROBOTOFF_ALERT_CHANNEL)
def test_crop_image_url(monkeypatch): monkeypatch.delenv("ROBOTOFF_SCHEME", raising=False) # force defaults to apply logo_annotation = LogoAnnotation( image_prediction=ImagePrediction( type="label", model_name="test-model", model_version="1.0", image=ImageModel( barcode="123", image_id="image_id", source_image="/image", width=20, height=20, ), ), bounding_box=(1, 1, 2, 2), ) assert logo_annotation.get_crop_image_url() == ( f"https://robotoff.{settings._robotoff_domain}/api/v1/images/crop" + f"?image_url={settings.OFF_IMAGE_BASE_URL}/image&y_min=1&x_min=1&y_max=2&x_max=2" )
def send_logo_notification(self, logo: LogoAnnotation, probs: Dict[LogoLabelType, float]): crop_url = logo.get_crop_image_url() prob_text = "\n".join( (f"{label[0]} - {label[1]}: {prob:.2g}" for label, prob in sorted( probs.items(), key=operator.itemgetter(1), reverse=True))) barcode = logo.image_prediction.image.barcode base_off_url = settings.BaseURLProvider().get() text = ( f"Prediction for <{crop_url}|image> " f"(<https://hunger.openfoodfacts.org/logos?logo_id={logo.id}|annotate logo>, " f"<{base_off_url}/product/{barcode}|product>):\n{prob_text}") self._post_message(_slack_message_block(text), self.ROBOTOFF_ALERT_CHANNEL)
def get_logo_annotations() -> Dict[int, LogoLabelType]: annotations: Dict[int, LogoLabelType] = {} for logo in (LogoAnnotation.select( LogoAnnotation.id, LogoAnnotation.annotation_type, LogoAnnotation.annotation_value, LogoAnnotation.taxonomy_value, ).where(LogoAnnotation.annotation_type.is_null(False)).iterator()): if logo.annotation_value is None: annotations[logo.id] = (logo.annotation_type, None) elif logo.taxonomy_value is not None: annotations[logo.id] = (logo.annotation_type, logo.taxonomy_value) return annotations
def test_noop_slack_notifier_logging(caplog): caplog.set_level(logging.INFO) notifier = slack.NoopSlackNotifier() notifier.send_logo_notification( LogoAnnotation( image_prediction=ImagePrediction( barcode="123", image=ImageModel(source_image="/path/to/image.jpg", width=10, height=10), ), bounding_box=(1, 1, 2, 2), ), {}, ) (logged, ) = caplog.records assert logged.msg.startswith("Alerting on slack channel")
def run_object_detection(barcode: str, image_url: str, server_domain: str): source_image = get_source_from_image_url(image_url) image_instance = ImageModel.get_or_none(source_image=source_image) if image_instance is None: logger.warning("Missing image in DB for image {}".format(image_url)) return timestamp = datetime.datetime.utcnow() results = predict_objects(barcode, image_url, server_domain) logos = [] for model_name, result in results.items(): data = result.to_json(threshold=0.1) max_confidence = max([item["score"] for item in data], default=None) image_prediction = ImagePrediction.create( image=image_instance, type="object_detection", model_name=model_name, model_version=settings.OBJECT_DETECTION_MODEL_VERSION[model_name], data={"objects": data}, timestamp=timestamp, max_confidence=max_confidence, ) for i, item in enumerate(data): if item["score"] >= 0.5: logo = LogoAnnotation.create( image_prediction=image_prediction, index=i, score=item["score"], bounding_box=item["bounding_box"], ) logos.append(logo) if logos: add_logos_to_ann(image_instance, logos) save_nearest_neighbors(logos) thresholds = LOGO_CONFIDENCE_THRESHOLDS.get() import_logo_insights(logos, thresholds=thresholds, server_domain=server_domain)
def add_logo_to_ann(sleep_time: float): from itertools import groupby import time import requests import tqdm from robotoff.logos import add_logos_to_ann, get_stored_logo_ids from robotoff.models import db, ImageModel, ImagePrediction, LogoAnnotation from robotoff.utils import get_logger logger = get_logger() seen = get_stored_logo_ids() with db: logos_iter = tqdm.tqdm(LogoAnnotation.select().join( ImagePrediction).join(ImageModel).where( LogoAnnotation.nearest_neighbors.is_null()).order_by( ImageModel.id).iterator()) for _, logo_batch in groupby( logos_iter, lambda x: x.image_prediction.image.id): logos = list(logo_batch) if all(l.id in seen for l in logos): continue image = logos[0].image_prediction.image logger.info(f"Adding logos of image {image.id}") try: added = add_logos_to_ann(image, logos) except requests.exceptions.ReadTimeout: logger.warn("Request timed-out during logo addition") continue logger.info(f"Added: {added}") if sleep_time: time.sleep(sleep_time)
def on_put(self, req: falcon.Request, resp: falcon.Response, logo_id: int): logo = LogoAnnotation.get_or_none(id=logo_id) if logo is None: resp.status = falcon.HTTP_404 return type_ = req.media["type"] value = req.media["value"] or None updated = False if type_ != logo.annotation_type: logo.annotation_type = type_ updated = True if value != logo.annotation_value: logo.annotation_value = value if value is not None: value_tag = get_tag(value) logo.annotation_value_tag = value_tag logo.taxonomy_value = match_unprefixed_value(value_tag, type_) else: logo.annotation_value_tag = None logo.taxonomy_value = None updated = True if updated: auth = parse_auth(req) username = None if auth is None else auth.get_username() logo.username = username logo.completed_at = datetime.datetime.utcnow() logo.save() resp.status = falcon.HTTP_204
def test_image_brand_annotation(client, monkeypatch, fake_taxonomy): ann = LogoAnnotationFactory( image_prediction__image__source_image="/images/2.jpg", annotation_type="brand" ) barcode = ann.image_prediction.image.barcode _fake_store(monkeypatch, barcode) monkeypatch.setattr( BRAND_PREFIX_STORE, "get", lambda: {("Etorki", "0000000xxxxxx")} ) start = datetime.utcnow() result = client.simulate_post( "/api/v1/images/logos/annotate", json={ "withCredentials": True, "annotations": [{"logo_id": ann.id, "value": "etorki", "type": "brand"}], }, headers=_AUTH_HEADER, ) end = datetime.utcnow() assert result.status_code == 200 assert result.json == {"created insights": 1} ann = LogoAnnotation.get(LogoAnnotation.id == ann.id) assert ann.annotation_type == "brand" assert ann.annotation_value == "etorki" assert ann.annotation_value_tag == "etorki" assert ann.taxonomy_value == "Etorki" assert ann.username == "a" assert start <= ann.completed_at <= end # we generate a prediction predictions = list(Prediction.select().filter(barcode=barcode).execute()) assert len(predictions) == 1 (prediction,) = predictions assert prediction.type == "brand" assert prediction.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert prediction.value == "Etorki" assert prediction.value_tag == "Etorki" assert prediction.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert prediction.automatic_processing # We check that this prediction in turn generates an insight insights = list(ProductInsight.select().filter(barcode=barcode).execute()) assert len(insights) == 1 (insight,) = insights assert insight.type == "brand" assert insight.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert insight.value == "Etorki" assert insight.value_tag == "Etorki" assert insight.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert insight.automatic_processing assert insight.username == "a" assert insight.completed_at is None # we did not run annotate yet
def test_image_label_annotation(client, monkeypatch, fake_taxonomy): """This test will check that, given an image with a logo above the confidence threshold, that is then fed into the ANN logos and labels model, we annotate properly a product. """ ann = LogoAnnotationFactory(image_prediction__image__source_image="/images/2.jpg") barcode = ann.image_prediction.image.barcode _fake_store(monkeypatch, barcode) start = datetime.utcnow() result = client.simulate_post( "/api/v1/images/logos/annotate", json={ "withCredentials": True, "annotations": [ {"logo_id": ann.id, "value": "EU Organic", "type": "label"} ], }, headers=_AUTH_HEADER, ) end = datetime.utcnow() assert result.status_code == 200 assert result.json == {"created insights": 1} ann = LogoAnnotation.get(LogoAnnotation.id == ann.id) assert ann.annotation_type == "label" assert ann.annotation_value == "EU Organic" assert ann.annotation_value_tag == "eu-organic" assert ann.taxonomy_value == "en:eu-organic" assert ann.username == "a" assert start <= ann.completed_at <= end # we generate a prediction predictions = list(Prediction.select().filter(barcode=barcode).execute()) assert len(predictions) == 1 (prediction,) = predictions assert prediction.type == "label" assert prediction.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert prediction.value is None assert prediction.value_tag == "en:eu-organic" assert prediction.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert prediction.automatic_processing # We check that this prediction in turn generates an insight insights = list(ProductInsight.select().filter(barcode=barcode).execute()) assert len(insights) == 1 (insight,) = insights assert insight.type == "label" assert insight.data == { "logo_id": ann.id, "confidence": 1.0, "username": "******", "is_annotation": True, "notify": True, } assert insight.value is None assert insight.value_tag == "en:eu-organic" assert insight.predictor == "universal-logo-detector" assert start <= prediction.timestamp <= end assert insight.automatic_processing assert insight.username == "a" assert insight.completed_at is None
def run_object_detection( barcode: str, image: Image.Image, source_image: str, server_domain: str ): """Detect logos using the universal logo detector model and generate logo-related insights. :param barcode: Product barcode :param image: Pillow Image to run the object detection on :param image_url: URL of the image to use :param server_domain: The server domain associated with the image """ logger.info( f"Running object detection for product {barcode} ({server_domain}), " f"image {source_image}" ) image_instance = ImageModel.get_or_none(source_image=source_image) if image_instance is None: logger.warning("Missing image in DB for image %s", source_image) return timestamp = datetime.datetime.utcnow() model_name = "universal-logo-detector" results = ObjectDetectionModelRegistry.get(model_name).detect_from_image( image, output_image=False ) data = results.to_json(threshold=0.1) max_confidence = max([item["score"] for item in data], default=None) image_prediction = ImagePrediction.create( image=image_instance, type="object_detection", model_name=model_name, model_version=settings.OBJECT_DETECTION_MODEL_VERSION[model_name], data={"objects": data}, timestamp=timestamp, max_confidence=max_confidence, ) logos = [] for i, item in enumerate(data): if item["score"] >= 0.5: logo = LogoAnnotation.create( image_prediction=image_prediction, index=i, score=item["score"], bounding_box=item["bounding_box"], ) logos.append(logo) logger.info(f"{len(logos)} logos found for image {source_image}") if logos: add_logos_to_ann(image_instance, logos) try: save_nearest_neighbors(logos) except requests.exceptions.HTTPError as e: resp = e.response logger.warning( f"Could not save nearest neighbors in ANN: {resp.status_code}: %s", resp.text, ) thresholds = LOGO_CONFIDENCE_THRESHOLDS.get() import_logo_insights(logos, thresholds=thresholds, server_domain=server_domain)
import json from robotoff import settings from robotoff.models import LogoAnnotation, db annotations = {} with db: for logo_annotation in (LogoAnnotation.select( LogoAnnotation.id, LogoAnnotation.taxonomy_value).where( LogoAnnotation.taxonomy_value.is_null(False)).iterator()): annotations[logo_annotation.id] = logo_annotation.taxonomy_value with (settings.DATASET_DIR / "annotations.jsonl").open("w") as f: json.dump(annotations, f)