def import_insights( self, data: Iterable[ProductInsights], server_domain: str, automatic: bool, ) -> int: timestamp = datetime.datetime.utcnow() processed_insights: Iterator[Insight] = self.process_insights( data, server_domain, automatic) full_insights = self.add_fields(processed_insights, timestamp, server_domain) inserted = 0 for raw_insight_batch in chunked(full_insights, 50): insight_batch: List[JSONType] = [] insight: Insight for insight in raw_insight_batch: insight_dict = insight.to_dict() if not insight.latent or not exist_latent(insight_dict): insight_batch.append(insight_dict) inserted += batch_insert(ProductInsight, insight_batch, 50) return inserted
def import_insights( cls, predictions: List[Prediction], server_domain: str, automatic: bool, product_store: DBProductStore, ) -> int: """Import insights, this is the main method. :return: the number of insights that were imported. """ required_prediction_types = cls.get_required_prediction_types() for prediction in predictions: if prediction.type not in required_prediction_types: raise ValueError( f"unexpected prediction type: '{prediction.type}'") inserts = 0 for to_create, to_delete in cls.generate_insights( predictions, server_domain, automatic, product_store): if to_delete: to_delete_ids = [insight.id for insight in to_delete] logger.info( f"Deleting insight IDs: {[str(x) for x in to_delete_ids]}") ProductInsight.delete().where( ProductInsight.id.in_(to_delete_ids)).execute() if to_create: inserts += batch_insert( ProductInsight, (model_to_dict(insight) for insight in to_create), 50, ) return inserts
def import_insights(self, data: Iterable[Dict], automatic: bool = False) -> int: timestamp = datetime.datetime.utcnow() barcode_seen: Set[str] = set() insight_seen: Set = set() insights = [] product_ingredients = [] inserted = 0 for item in data: barcode = item['barcode'] corrections = item['corrections'] text = item['text'] if barcode not in barcode_seen: product_ingredients.append({ 'barcode': barcode, 'ingredients': item['text'], }) barcode_seen.add(barcode) for correction in corrections: start_offset = correction['start_offset'] end_offset = correction['end_offset'] key = (barcode, start_offset, end_offset) if key not in insight_seen: original_snippet = self.generate_snippet( text, start_offset, end_offset, correction['original']) corrected_snippet = self.generate_snippet( text, start_offset, end_offset, correction['correction']) insights.append({ 'id': str(uuid.uuid4()), 'type': InsightType.ingredient_spellcheck.name, 'barcode': barcode, 'timestamp': timestamp, 'automatic_processing': False, 'data': { **correction, 'original_snippet': original_snippet, 'corrected_snippet': corrected_snippet, }, }) insight_seen.add(key) if len(product_ingredients) >= 50: batch_insert(ProductIngredient, product_ingredients, 50) product_ingredients = [] if len(insights) >= 50: inserted += batch_insert(ProductInsight, insights, 50) insights = [] batch_insert(ProductIngredient, product_ingredients, 50) inserted += batch_insert(ProductInsight, insights, 50) return inserted
def import_insights(self, data: Iterable[Dict], automatic: bool = False) -> int: grouped_by: GroupedByOCRInsights = self.group_by_barcode(data) inserts: List[Dict] = [] timestamp = datetime.datetime.utcnow() for barcode, insights in grouped_by.items(): insights = list(self.deduplicate_insights(insights)) insights = self.sort_by_priority(insights) inserts += list( self._process_product_insights(barcode, insights, timestamp, automatic)) return batch_insert(ProductInsight, inserts, 50)
def on_post(self, req: falcon.Request, resp: falcon.Response): timestamp = datetime.datetime.utcnow() inserts = [] for prediction in req.media["predictions"]: server_domain: str = prediction.get("server_domain", settings.OFF_SERVER_DOMAIN) server_type: str = get_server_type(server_domain).name source_image = generate_image_path(prediction["barcode"], prediction.pop("image_id")) inserts.append({ "timestamp": timestamp, "server_domain": server_domain, "server_type": server_type, "source_image": source_image, **prediction, }) inserted = batch_insert(ImagePrediction, inserts) logger.info("{} image predictions inserted".format(inserted))
def import_product_predictions( barcode: str, product_predictions_iter: Iterable[Prediction], server_domain: str, ): """Import predictions for a specific product. If a prediction already exists in DB (same (barcode, type, server_domain, source_image, value, value_tag)), it won't be imported. :param barcode: Barcode of the product. All `product_predictions` must have the same barcode. :param product_predictions_iter: Iterable of Predictions. :param server_domain: The server domain associated with the predictions. :return: The number of items imported in DB. """ timestamp = datetime.datetime.utcnow() existing_predictions = set( PredictionModel.select( PredictionModel.type, PredictionModel.server_domain, PredictionModel.source_image, PredictionModel.value_tag, PredictionModel.value, ).where(PredictionModel.barcode == barcode).tuples()) # note: there are some cases # when we could decide to replace old predictions of the same key. # It's not yet implemented. to_import = (create_prediction_model(prediction, server_domain, timestamp) for prediction in product_predictions_iter if ( prediction.type, server_domain, prediction.source_image, prediction.value_tag, prediction.value, ) not in existing_predictions) return batch_insert(PredictionModel, to_import, 50)
def import_insights(self, data: Iterable[Dict], automatic: bool = False) -> int: inserts = self.process_product_insights(data, automatic) return batch_insert(ProductInsight, inserts, 50)
def import_insights(self, data: Iterable[JSONType], server_domain: str, automatic: bool) -> int: timestamp = datetime.datetime.utcnow() insights = self.process_insights(data, server_domain, automatic) insights = self.add_fields(insights, timestamp, server_domain) return batch_insert(ProductInsight, insights, 50)