from robotoff import settings from robotoff.products import ProductDataset from robotoff.utils import dump_jsonl, get_logger logger = get_logger() def images_dimension_iter(): dataset = ProductDataset.load() for product in dataset.stream().filter_nonempty_text_field("code"): images = product.get("images", {}) for image_id, image_data in images.items(): if not image_id.isdigit(): continue if "full" not in image_data["sizes"]: continue width = image_data["sizes"]["full"]["w"] height = image_data["sizes"]["full"]["h"] yield [int(width), int(height), product["code"], str(image_id)] dump_jsonl(settings.PROJECT_DIR / "images_dimension.jsonl", images_dimension_iter())
from robotoff.insights.importer import ( AUTHORIZED_LABELS_STORE, import_insights as import_insights_, ) from robotoff.insights.ocr import ( extract_insights, get_barcode_from_path, ocr_iter, OCRResult, ) from robotoff.models import db, ProductInsight from robotoff.off import get_product from robotoff.products import get_product_store from robotoff.utils import get_logger, jsonl_iter logger = get_logger(__name__) def run_from_ocr_archive( input_: Union[str, TextIO], insight_type: InsightType, output: Optional[str] = None, keep_empty: bool = False, ): insights = generate_from_ocr_archive(input_, insight_type, keep_empty) if output is not None: output_f = open(output, "w") else: output_f = sys.stdout
def generate_ocr_insights(input_: str, insight_type: str, output: str): from robotoff.cli import insights from robotoff.utils import get_logger get_logger() insights.run_from_ocr_archive(input_, insight_type, output)