Пример #1
0
def add_identical_offers(collection_name,
                         offer_limit,
                         n_highest,
                         provenance=None):
    collection = get_collection("mpnoffers")
    model = load_model_from_s3(collection_name)
    now = datetime.now()
    mongo_filter = {
        "validThrough": {
            "$gt": now
        },
        "siteCollection": collection_name
    }
    if provenance:
        mongo_filter["provenance"] = provenance
    offers = collection.find(
        mongo_filter,
        projection=MONGO_PROJECTION,
        limit=offer_limit,
    )
    offers_list = list(offers)
    result = list(
        add_identical_offers_to_batch(batch, model, collection_name, n_highest)
        for batch in pydash.chunk(offers_list, CHUNK_SIZE))
    return result
Пример #2
0
def create_models(collection_name, offer_limit):
    collection = get_collection("mpnoffers")
    now = datetime.now()
    start_time = time()
    print(f"Fetching up to {OFFER_LIMIT} offers from db.")
    offers = collection.aggregate(
        [
            {
                "$match": {
                    "validThrough": {
                        "$gt": now
                    },
                    "siteCollection": collection_name
                }
            },
            {
                "$project": MONGO_PROJECTION
            },
            {
                "$limit": 2**17
            },
        ],
        allowDiskUse=True,
    )
    offers_list = list(offers)
    print(f"Time spent: {time() - start_time} s.")
    print(f"Got {len(offers_list)} after filter")
    model = get_model(offers_list)
    result = save_model_to_s3(model, collection_name)

    return result
Пример #3
0
def save_scraped_products(products: Iterable, offers_collection_name: str):
    last_update_limit = datetime.utcnow() - timedelta(
        OVERWRITE_EDIT_LIMIT_DAYS)
    meta_fields_collection = get_collection(f"mpnoffersmeta")
    meta_fields = meta_fields_collection.find(
        dict(updatedAt={"$gt": last_update_limit}))
    uri_field_dict = meta_fields_result_to_dict(meta_fields)
    return bulk_upsert(remove_protected_fields(products, uri_field_dict),
                       "mpnoffers", "uri")
Пример #4
0
def bulk_upsert(iterable: Iterable,
                collection_name: str,
                id_field: str = "uri"):
    print("Start saving to Mongo collection: {}".format(collection_name))
    collection = get_collection(collection_name)
    requests = list(map(get_update_one, iterable))
    print("{} items to write".format(len(requests)))
    result = collection.bulk_write(requests)
    return result
Пример #5
0
def save_similar_offers(updates: list):
    collection = get_collection("mpnoffers")
    requests = list([
        UpdateOne(
            dict(uri=update["uri"]),
            {"$set": dict(similarOffers=update["similarOffers"])},
        ) for update in updates
    ])
    return collection.bulk_write(requests)
Пример #6
0
def add_identical_offer_relations(uris_lists: Iterable[Iterable[str]]):
    """
    Adds offers with the same gtins to be identical."""
    operations = []
    now = datetime.now()
    for uris in uris_lists:
        upsert_operation1 = UpdateOne(
            {
                "relationType": "identical",
                "offerSet": {
                    "$in": uris
                },
            },
            {
                "$setOnInsert": {
                    "createdAt": now,
                    "updatedAt": now,
                    "relationType": "identical",
                    "offerSet": uris,
                    "selectMethod": "auto"
                },
            },
            upsert=True,
        )
        operations.append(upsert_operation1)
        upsert_operation2 = UpdateOne(
            {
                "relationType": "identical",
                "offerSet": {
                    "$in": uris
                },
            },
            {
                "$set": {
                    "updatedAt": now
                },
                "$addToSet": {
                    "offerSet": {
                        "$each": uris
                    },
                },
            },
            upsert=False,
        )
        operations.append(upsert_operation2)

    print(f"{len(operations)} operations to add identical offers")

    collection = get_collection("offerbirelations")

    bulk_write_result = collection.bulk_write(operations, ordered=True)

    return bulk_write_result
Пример #7
0
def save_promoted_offers(df, collection_name: str):
    collection = get_collection(collection_name)
    requests = list([
        UpdateOne(
            dict(uri=get_product_uri(provenances.SHOPGUN, row.id)),
            {
                "$set": dict(is_promoted=True,
                             select_method=select_methods.AUTO)
            },
        ) for _, row in df.iterrows()
    ])
    return collection.bulk_write(requests)
Пример #8
0
def get_handle_configs(provenance: str):
    collection = get_collection("handleconfigs")
    result = list(x for x in collection.find({
        "provenance": provenance,
        "status": {
            "$ne": "disabled"
        }
    }))
    if len(result) > 0:
        return result
    else:
        raise NoHandleConfigError(
            f"No handleconfig found for provenance: {provenance}.")
Пример #9
0
def get_offers_by_uris(uris):
    collection = get_collection("mpnoffers")
    return collection.find({"uri": {"$in": uris}}, MONGO_PROJECTION)
Пример #10
0
def get_offers_with_product(
    provenance: str,
    collection_name: str,
    target_collection_name: str,
    relation_collection_name: str,
    limit: int = 0,
) -> Iterable[dict]:
    collection = get_collection(collection_name)
    pipeline = [
        # {"$match": {"provenance": provenance,}},
        {
            "$match": {
                "gtins": {
                    "$ne": None
                },
            }
        },
        {
            "$addFields": {
                "gtin_list": {
                    "$objectToArray": "$gtins"
                },
            },
        },
        {
            "$lookup": {
                "from":
                target_collection_name,
                # "localField": "gtin_list",
                # "foreignField": "gtins",
                "let": {
                    "source_gtin_list": "$gtin_list"
                },
                "pipeline": [
                    {
                        "$addFields": {
                            "gtin_list": {
                                "$objectToArray": "$gtins"
                            },
                        },
                    },
                    {
                        "$addFields": {
                            "same_gtins": {
                                "$setIntersection": [
                                    "$$source_gtin_list",
                                    "$gtin_list",
                                ]
                            },
                        },
                    },
                    {
                        "$match": {
                            "$expr": {
                                "$gt": ["$same_gtins", []]
                            }
                        },
                    },
                    {
                        "$project": {
                            "_id": 1,
                            "provenance": 1,
                            "same_gtins": 1,
                        }
                    },
                ],
                "as":
                "gtin_products",
            },
        },
        {
            "$lookup": {
                "from":
                relation_collection_name,
                "let": {
                    "source_id": "$_id"
                },
                "pipeline": [
                    {
                        "$match": {
                            "$expr": {
                                "$eq": ["$$source_id", "$offer"]
                            }
                        }
                    },
                    {
                        "$project": {
                            "_id": 1,
                            "product": 1
                        }
                    },
                ],
                # "localField": "_id",
                # "foreignField": "offer",
                "as":
                "product_relations",
            }
        },
    ]
    if limit > 0:
        pipeline.append({"$limit": limit})
    return collection.aggregate(pipeline)
Пример #11
0
def store_handle_run(handle_run_config):
    collection = get_collection("handleruns")
    return collection.insert_one(handle_run_config)
Пример #12
0
def get_offers_same_gtin_offers(
    provenance: str,
    collection_name: str,
    limit: int = 0,
) -> Iterable[dict]:
    collection = get_collection(collection_name)
    now = datetime.now()
    pipeline = [
        {
            "$match": {
                "validThrough": {
                    "$gt": now
                },
                "provenance": provenance,
                "gtins": {
                    "$ne": {},
                    "$exists": True
                },
            }
        },
        {
            "$project": {
                "gtins": 1,
                "provenance": 1,
                "uri": 1
            }
        },
        {
            "$addFields": {
                "gtin_list": {
                    "$objectToArray": "$gtins"
                },
                "source_id": "$_id",
            },
        },
        {
            "$lookup": {
                "from":
                "mpnoffers",
                "let": {
                    "source_gtin_list": "$gtin_list",
                    "source_id": "$source_id"
                },
                "pipeline": [
                    {
                        "$match": {
                            "gtins": {
                                "$ne": {},
                                "$exists": True
                            },
                            "$expr": {
                                "$ne": ["$$source_id", "$_id"]
                            },
                        },
                    },
                    {
                        "$addFields": {
                            "gtin_list": {
                                "$objectToArray": "$gtins"
                            },
                        },
                    },
                    {
                        "$addFields": {
                            "same_gtins": {
                                "$setIntersection": [
                                    "$$source_gtin_list",
                                    "$gtin_list",
                                ]
                            },
                        },
                    },
                    {
                        "$match": {
                            "same_gtins": {
                                "$exists": True
                            },
                            "$expr": {
                                "$gt": ["$same_gtins", []]
                            },
                        },
                    },
                    {
                        "$project": {
                            "_id": 1,
                            "provenance": 1,
                            "same_gtins": 1,
                            "uri": 1,
                        }
                    },
                ],
                "as":
                "gtin_products",
            },
        },
        {
            "$match": {
                "gtin_products": {
                    "$not": {
                        "$size": 0
                    }
                }
            }
        },
    ]
    if limit > 0:
        pipeline.append({"$limit": limit})
    return collection.aggregate(pipeline)