Exemplo n.º 1
0
 def test_monter_products(self):
     config = generate_handle_config(
         {
             "provenance": "monter.no",
             "namespace": "monter.no",
             "market": "no",
             "collection_name": "byggoffers",
             "fieldMapping": [
                 {
                     "source": "NOBB",
                     "destination": "nobb",
                     "replace_type": "key",
                 },
             ],
         }
     )
     actual = handle_products(self.monter_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.monter_products))
     self.assertEqual(len(actual), len(self.monter_products))
     self.assertIsNotNone(actual[0]["gtins"])
     self.assertIsNotNone(actual[0]["gtins"]["nobb"])
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["sku"])
     self.assertIsNotNone(actual[0]["imageUrl"])
Exemplo n.º 2
0
 def test_meny_products(self):
     config = generate_handle_config(
         {
             "provenance": "meny",
             "namespace": "meny",
             "market": "no",
             "collection_name": "groceryoffers",
             "categoriesLimits": [],
             "extractQuantityFields": ["unit_price_raw", "subtitle", "title"],
             "fieldMapping": [
                 {"source": "sku", "destination": "ean", "replace_type": "key"},
                 {
                     "source": "product_variant",
                     "destination": "description",
                     "replace_type": "key",
                 },
             ],
         }
     )
     actual = handle_products(self.meny_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.meny_products))
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["quantity"]["size"])
     self.assertIsNotNone(actual[0]["sku"])
     self.assertIsNotNone(actual[0]["gtins"]["gtin13"])
Exemplo n.º 3
0
 def test_iherb_products(self):
     config = generate_handle_config(
         {
             "provenance": "iherb",
             "namespace": "iherb",
             "market": "no",
             "collection_name": "iherboffers",
             "fieldMapping": [
                 {
                     "source": "sku",
                     "destination": "mpn",
                     "replace_type": "key",
                 },
             ],
         }
     )
     actual = handle_products(self.iherb_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.iherb_products))
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["quantity"]["size"])
     self.assertIsNotNone(actual[0]["mpn"])
     self.assertIsNotNone(actual[0]["sku"])
     self.assertIsNotNone(actual[0]["imageUrl"])
Exemplo n.º 4
0
 def test_europris_products(self):
     config = generate_handle_config(
         {
             "provenance": "europris",
             "namespace": "europris",
             "market": "no",
             "collection_name": "groceryoffers",
             "categoriesLimits": [],
             "extractQuantityFields": ["description", "name"],
             "fieldMapping": [
                 {
                     "source": "name",
                     "destination": "title",
                     "replace_type": "key",
                 },
                 {
                     "source": "link",
                     "destination": "href",
                     "replace_type": "key",
                 },
             ],
         }
     )
     actual = handle_products(self.europris_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.europris_products))
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["sku"])
Exemplo n.º 5
0
 def test_shopgun_products(self):
     config = generate_handle_config(
         {
             "provenance": "shopgun",
             "namespace": "shopgun",
             "market": "no",
             "collection_name": "groceryoffers",
         }
     )
     actual = handle_products(self.shopgun_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.shopgun_products))
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["quantity"]["size"])
Exemplo n.º 6
0
 def test_byggmax_products(self):
     config = generate_handle_config(
         {
             "provenance": "byggmax.no",
             "namespace": "byggmax.no",
             "market": "no",
             "collection_name": "byggoffers",
         }
     )
     actual = handle_products(self.byggmax_products, config)
     pprint(actual[0])
     self.assertIsInstance(actual, list)
     self.assertEqual(len(actual), len(self.byggmax_products))
     self.assertIsNotNone(actual[0]["title"])
     self.assertIsNotNone(actual[0]["pricing"])
     self.assertIsNotNone(actual[0]["href"])
     self.assertIsNotNone(actual[0]["uri"])
     self.assertIsNotNone(actual[0]["sku"])
     self.assertIsNotNone(actual[0]["imageUrl"])
Exemplo n.º 7
0
 def test_single_meny_product(self):
     config = generate_handle_config(
         {
             "provenance": "meny",
             "namespace": "meny",
             "market": "no",
             "collection_name": "groceryoffers",
             "categoriesLimits": [],
             "extractQuantityFields": ["unit_price_raw", "unit_raw", "title"],
             "fieldMapping": [
                 {"source": "sku", "destination": "ean", "replace_type": "key"},
                 {
                     "source": "product_variant",
                     "destination": "description",
                     "replace_type": "key",
                 },
             ],
         }
     )
     scraper_offer = {
         "price": 3.0,
         "title": "Tomat stykk",
         "unit_price_raw": "kr\u00a039,90/kg",
         "unit_raw": "80\u00a0g",
         "quantity_info": "80\u00a0g",
         "image_url": "https://res.cloudinary.com/norgesgruppen/image/upload/c_pad,b_white,f_auto,h_320,q_50,w_320/v1558839429/Product/2000406400006.png",
         "product_url": "https://meny.no/varer/frukt-gront/gronnsaker/tomater/tomat-stykk-2000406400006",
         "meny_id": "2000406400006",
         "provenance": "meny",
         "url_fingerprint": "460cae747c054fc03fac9a0a88d8a9ef172c279d",
         "url": "https://meny.no/varer/frukt-gront/gronnsaker/tomater/tomat-stykk-2000406400006",
         "canonical_url": "https://meny.no/varer/frukt-gront/gronnsaker/tomater/tomat-stykk-2000406400006",
         "sku": "2000406400006",
         "gtin13": "2000406400006",
         "provenanceId": "2000406400006",
         "priceCurrency": "NOK",
         "collection_name": "groceryoffers",
     }
     result = handle_products([scraper_offer], config)
     # 39.9 if parsing the value string. 37.5 if inferring it from the quantity..
     self.assertEqual(result[0]["value"]["size"]["standard"]["min"], 39.9)
Exemplo n.º 8
0
    def test_filter_products_with_gt(self):
        config = {
            "provenance": "obsbygg",
            "namespace": "obsbygg",
            "market": "no",
            "categoriesLimits": [],
            "fieldMapping": [],
            "extractQuantityFields": ["title"],
            "ignore_none": False,
            "collection_name": "byggoffers",
            "filters": [
                {
                    "source": "pricing.price",
                    "operator": "gt",
                    "target": 200,
                }
            ],
        }

        actual = handle_products(self.obsbygg_products, config)

        self.assertEqual(len(actual), 54, "Should be 54 offers price higher than 200")
Exemplo n.º 9
0
    def test_filter_products_with_has(self):
        config = {
            "provenance": "obsbygg",
            "namespace": "obsbygg",
            "market": "no",
            "categoriesLimits": [],
            "fieldMapping": [],
            "extractQuantityFields": ["title"],
            "ignore_none": False,
            "collection_name": "byggoffers",
            "filters": [
                {
                    "source": "categories",
                    "operator": "has",
                    "target": "skruer og spiker",
                }
            ],
        }

        actual = handle_products(self.obsbygg_products, config)

        self.assertEqual(len(actual), 24, "Should be 24 offers with target category")
Exemplo n.º 10
0
def handle_feed_with_config(feed: list, config: HandleConfig) -> BulkWriteResult:
    if not config["namespace"]:
        raise Exception("Config needs namespace")
    if not config["collection_name"]:
        raise Exception("Config needs collection_name")
    if not config["scrape_time"]:
        raise Exception("Config needs scrape_time")

    start_time = datetime.now()

    sns_client = boto3.client("sns")  # type: botostubs.SNS

    products = handle_products(feed, config)
    products = list(
        {**product, "siteCollection": config["collection_name"]} for product in products
    )

    products = add_affiliate_links(products)

    end_time = datetime.now()

    handle_run = {
        **config,
        "example_items": products[:100],
        "time_elapsed_seconds": (end_time - start_time).total_seconds(),
        "items_handled": len(products),
        "createdAt": end_time,
        "updatedAt": end_time,
        "logs": aws_config.get_log_group_url(),
    }

    if os.getenv("IS_LOCAL"):
        logging.info({**handle_run, "example_items": products[:1]})
    else:
        store_handle_run(handle_run)

    if len(products) == 0:
        return {"message": "No offers to save"}

    if config["collection_name"] == "bookoffers":
        try:
            products = list(
                {
                    **product,
                    "gtins": get_book_gtins(product),
                    "uri": product["book_uri"],
                }
                for product in products
            )
            result = {}
            # Only 512 offers when in dev to avoid filling up Elastic Search storage
            if os.getenv("STAGE") == "dev":
                result = save_book_offers(products[:512])
            else:
                result = save_book_offers(products)
            sns_message_data = {
                "namespace": config["namespace"],
            }
            sns_message = json.dumps(
                {"default": json.dumps(sns_message_data, default=json_handler)}
            )
            sns_client.publish(
                Message=sns_message,
                MessageStructure="json",
                TargetArn=BOOK_FEED_HANDLED_TOPIC_ARN,
            )
            return result
        except Exception as e:
            log_traceback(e)
            raise e

    try:
        # Only 512 offers when in dev to avoid filling up Elastic Search storage
        if os.getenv("STAGE") == "dev":
            result = save_scraped_products(products[:512], config["collection_name"])
        else:
            result = save_scraped_products(products, config["collection_name"])
    except Exception as e:
        log_traceback(e)
        raise e

    # To allow event-driven behaviour, we publish an sns topic when products are saved successfully.
    sns_message_data = {
        **config,
        "collection_name": config["collection_name"],
    }
    sns_message = json.dumps(
        {"default": json.dumps(sns_message_data, default=json_handler)}
    )
    sns_client.publish(
        Message=sns_message,
        MessageStructure="json",
        TargetArn=SCRAPER_FEED_HANDLED_TOPIC_ARN,
    )

    return result.bulk_api_result