Пример #1
0
def price_info_in_pg(db: PostgresDAO.PostgreSQLdb, product_attribute: str):
    """Create and fill a table called {product_attribute}_price_information in PostgreSQL
    with information about the average price and standard price deviation of products with a certain value of a certain
    product_attribute.

    PostgreSQL table will be called {product_attribute}_price_information.

    The table will have 3 rows:
    -a row for the value of the product attribute, with the name of the product attribute as primary key.
    -a row for the average price of products with this product attribute value called average_price.
    -a row for the standard deviation of products with this product attribute value called standard_deviation.

    Args:
        db: The PostgreSQL database to query.
        product_attribute: the product attribute to get product information by. For instance, 'sub_sub_category'."""
    create_product_attribute_and_price_information_table(db, product_attribute)

    price_information = get_attribute_price_information(db, product_attribute)

    grouped_price_information = group_attribute_prices(price_information)

    dataset = grouped_attribute_prices_to_PostgreSQL_dataset(grouped_price_information)

    query = query_functions.construct_insert_query(f"{product_attribute}_price_information",
                                                    [product_attribute, "average_price", "standard_deviation"])

    db.many_update_queries(query, dataset, fast_execution=True)
Пример #2
0
    def write_all_to_pg(db: PostgresDAO.PostgreSQLdb):
        """Write all profiles in Profile.tracker to the corresponding entry in the Profiles table in PostgreSQL.

        Args:
            db: The PostgreSQL database to write to."""
        dataset = []
        for profile in Profile.tracker.values():
            dataset.append((profile.budget_segment, profile.id))
        query = "UPDATE Profiles SET budget_preference = %s WHERE profile_id = %s;"
        db.many_update_queries(query, dataset, fast_execution=True)
Пример #3
0
def determine_user_price_preferences(db: PostgresDAO.PostgreSQLdb,
                                     product_attribute: str,
                                     deviation_amount: int or float = 1):
    """Call all necessary methods to gather data about price preference, process it, and write it to PostgreSQL.

    Args:
        db: The PostgreSQL database to query.
        product_attribute: the product attribute budget preference should be judged by.
        deviation_amount:
            the average amount of standard price deviations a profile should vary from the average
            in whatever sub_sub_categories they have bought products you should start drawing conclusions from.
            1 by default."""
    db.query(
        "UPDATE Profiles SET budget_preference = NULL WHERE budget_preference IS NOT NULL",
        commit_changes=True)
    Profile.get_all_from_pg(db, product_attribute)
    Profile.calculate_budget_segment_ALL(deviation_amount)
    Profile.write_all_to_pg(db)
def popularity_recommendation(current_date: datetime.datetime,
                              db: PostgresDAO.PostgreSQLdb):
    """Call all necessary recommendations to fill the popularity_recommendation table in PostgreSQL,
    according to provided settings.

    Args:
        current_date, the date / time the algorithm should think it is.
        db: the PostgreSQL DB to query."""
    Product.get_from_pg(db, current_date)
    Product.score_all()
    top = Top(4)
    top.insert_multiple(Product.tracker.values())
    insert_dataset = [tuple([product.product_id for product in top.data])]
    query_functions.create_rec_table_query(PostgresDAO.db,
                                           'popularity_recommendation', '')
    db.query("INSERT INTO popularity_recommendation VALUES %s",
             insert_dataset,
             commit_changes=True)
    def get_from_pg(db: PostgresDAO.PostgreSQLdb,
                    up_until_date: datetime.datetime = None):
        """Get all ordered_products from PostgreSQL, and assign them to / create the proper Product instances.

        Args:
            db: the PostgreSQL database to query for ordered_products.
            up_until_date: the date up until which session should be included (for time-travelling purposes)."""
        query = """SELECT Ordered_products.product_id, Ordered_products.quantity, Sessions.session_end
            FROM SESSIONS
            INNER JOIN Ordered_products ON Sessions.session_id = Ordered_products.session_id
            """

        if not up_until_date is None:
            query += f"WHERE Sessions.session_end < '{up_until_date.strftime('%Y-%m-%d')}'"

        query += ";"

        order_dataset = db.query(query, expect_return=True)

        for result in order_dataset:
            if not result[0] in Product.tracker:
                Product(result[0], TODAY)
            Product.tracker[result[0]].add_order(result)
Пример #6
0
def get_attribute_price_information(db: PostgresDAO.PostgreSQLdb,
                                    product_attribute: str) -> dict:
    """Query the PostgreSQL for price information about a certain product attribute
    as entered into the PostgreSQL database by subsubcat_price_information.py.

    Args:
        db: The PostgreSQL database to query.
        product_attribute: the product attribute to get price information about (for instance, 'sub_sub_category').

    Returns:
        a dictionairy with every value of the provided product attribute as keys,
        and a tuple containing:
            0: the average price of products with that value of the given product attribute.
            1: the price standard deviation of products with that value of the given product attribute.
        as values.
        """
    price_information = {}
    query_result = db.query(
        f"""SELECT {product_attribute}, average_price, standard_deviation
FROM {product_attribute}_price_information;""",
        expect_return=True)
    for att_value, avg, dev in query_result:
        price_information[att_value] = (avg, dev)
    return price_information
Пример #7
0
def profile_order_products_from_postgreSQL(
        db: PostgresDAO.PostgreSQLdb, product_attribute: str) -> list[tuple]:
    """Query the PostgreSQL database for the profile_id, a certain product attribute, the quantity, and the selling
    price for every product that has been ordered by a profile.

    Args:
        db: The PostgreSQL database to query.
        product_attribute: the product attribute to get for every ordered product (for instance, 'sub_sub_category').

    Returns:
        a list containing a tuple for every product ordered in a session by a profile, that contains:
            0: the profile_id associated with the order.
            1: the value of the given product attribute.
            2: the amount of times that product was ordered in that session.
            3: the product's price in cents."""
    query = f"""SELECT
	Profiles.profile_id,
	Products.{product_attribute},
	Ordered_products.quantity,
	Products.selling_price
FROM
	Profiles
INNER JOIN
	Bu
	ON Profiles.profile_id = Bu.profile_id
INNER JOIN
	Sessions
	ON Bu.bu_id = Sessions.bu_id
INNER JOIN
	Ordered_products
	ON Sessions.session_id = Ordered_products.session_id
INNER JOIN
	Products
	ON Ordered_products.product_id = Products.product_id
;"""
    return db.query(query, expect_return=True)
Пример #8
0
def simple_mongo_to_sql(mongo_collection_name: str,
                        postgres_db: PostgresDAO.PostgreSQLdb,
                        postgres_table_name: str,
                        mongo_attribute_list: list[str or list[str]],
                        postgres_attribute_list: list[str],
                        unpack_method_dict: dict = {"shouldnothappen": "shouldnothappen"},
                        reject_if_null_amount: int = 0,
                        remember_in_existance: int = None):
    """A function to do a 'simple', one to one conversion of certain attributes of a MongoDB collection to a PostgreSQL table.

    Args:
        mongo_collection_name: The name of the MongoDB collection to retrieve products from.
        postgres_db: the postgres DB object (from PostgresDAO.py) to insert into.
        postgres_table_name: the postgres_table_name to insert into.
        mongo_attribute_list:
            a list that's allowed to contain either a string, or a list containing strings, for every attribute that has to be retrieved from mongoDB.
            -the key as string if the attribute is directly stored under a single key in the item in MongoDB.
                For instance: "name" if the name is stored under the key name.
            -list containing keys as strings if the attribute is stored within an object (or several, stored within eachother) in MongoDB.
                For instance: ["price", "selling_price"] if the selling price is stored under "selling_price" within an object stored under "price".
        postgres_attribute_list:
            the list of attribute names in PostGreSQL, in the same order as the corresponding attributes in mongo_attribute_list.
            it is recommended to put any primary keys/not null constrained attributes first.
        unpack_method_dict:
            a dictionairy with keys that represent indexes in postgres_attribute_list.
            the values are methods to unpack certain MongoDB attributes in case they need further unpacking.
            for instance, bu_id in the sessions collection is always stored in a length = 1 array.
            a simple function to help unpack this should be passed with unpack_method_dict.
        reject_if_null_amount:
            an integer that represents the first n attributes in both of the attribute lists.
            any entries that have the value null within these attributes will not be entered into PostGreSQL.
        remember_unique_attributes:
            the index of an attribute that entries in existence have to be stored in python for.
            if True, returns set of all entries of a certain attribute in existence.

    Returns:
        if remember_unique_attributes is True, returns set of all entries of a certain attribute in existence.
        returns None otherwise
        """
    collection = MongodbDAO.getDocuments(mongo_collection_name)
    data_list = []
    if not remember_in_existance is None: #TODO: think of better var names
        remember_things_in_existance_set = set()
    for item in collection:
        value_list = []
        for i in range(0, len(mongo_attribute_list)):
            key = mongo_attribute_list[i]
            unpack_method = unpack_method_dict.get(i)
            if type(key) == list:
                value = unpack(item, key)
            else:
                value = item.get(key)
            if unpack_method != None:
                value = unpack_method(value)
            if not(isinstance(value, str) or isinstance(value, int) or isinstance(value, float) or value is None):#because pymongo keeps giving us wonky datatypes. TODO: Consider always using str() without checking for type
                value = str(value)
            if remember_in_existance == i:
                remember_things_in_existance_set.add(value)
            value_list.append(value)
        if not (None in value_list[0:reject_if_null_amount]):
            data_list.append(tuple(value_list))
    q = construct_insert_query(postgres_table_name, postgres_attribute_list)
    postgres_db.many_update_queries(q, data_list, fast_execution=True)
    if not remember_in_existance is None:
        return remember_things_in_existance_set
Пример #9
0
def fill_sessions_profiles_bu(db: PostgresDAO.PostgreSQLdb, valid_product_ids: set):
    """Fill the session, profile and bu tables in the PostgreSQL db using the profiles and session collections in MongoDB.

    Not very modular, but considering the specific nature of (almost) every PostgreSQL table, a specific function is
    probably the most pragmatic way to go about it.



    Args:
        db: the PostgresDAO.postgreSQLdb object to fill."""
    profile_collection = MongodbDAO.getDocuments("profiles")
    session_collection = MongodbDAO.getDocuments("sessions")

    session_dataset = []
    profile_dataset = []
    buid_dataset = []
    ordered_products_dataset = []
    ordered_products_dict = {} #keeps track of all the orders with (session_id, product_id) as key and quantity as value
    profile_set = set() #keeps track of all the profile_ids that exist in the profiles collection
    buid_dict = {} #keeps track of all the buids that exist in the session collection dataset (as keys) and what profile they're associated with (if any) as values.

    for session in session_collection:
        #get session information and add to session dataset
        session_id = session.get("_id")
        if session_id is None:
            continue
        session_segment = session.get("segment")
        session_buid = unpack(session, ["buid", 0])
        session_end = session.get("session_end")

        if isinstance(session_buid, list):
            session_buid = unpack(session_buid, [0])

        session_id = string_or_none(session_id)
        session_segment = string_or_none(session_segment)
        session_buid = string_or_none(session_buid)

        session_tuple = (session_id, session_segment, session_buid, session_end)
        session_dataset.append(session_tuple)

        #add session_buid to buid_dict
        if not session_buid is None:
            buid_dict[session_buid] = None

        #add products that have been ordered to ordered_products_dict
        session_order = unpack(session, ["order", "products"])
        if session_order != None:
            for product in session_order:
                product_id = string_or_none(product.get("id"))
                if product_id in valid_product_ids:
                    if (session_id, product_id) in ordered_products_dict:
                        ordered_products_dict[(session_id, product_id)] += 1
                    else:
                        ordered_products_dict[(session_id, product_id)] = 1

    for profile in profile_collection:
        #get profile information and add to profile_set
        profile_id = string_or_none(profile.get("_id"))
        profile_buids = profile.get("buids")
        profile_set.add(profile_id)

        #assign profile_id associated buid in buid_Dict
        if isinstance(profile_buids, list):
            for profile_buid in profile_buids:
                profile_buid = string_or_none(profile_buid)
                if profile_buid in buid_dict and not profile_id is None:
                    buid_dict[profile_buid] = profile_id

    #fill buid_dataset from buid_dict
    for buid, profile in buid_dict.items():
        buid_dataset.append((buid, profile))

    #fill profile_dataset from profile_set
    profile_dataset = [(x,) for x in profile_set]

    #fill ordered_products_dataset from ordered_products_dict
    for k, v in ordered_products_dict.items():
        ordered_products_dataset.append((k[0], k[1], v))

    #construct insert queries for PostgreSQL insertions
    profile_query = construct_insert_query("Profiles", ["profile_id"])
    bu_query = construct_insert_query("Bu", ["bu_id", "profile_id"])
    session_query = construct_insert_query("Sessions", ["session_id", "segment", "bu_id", "session_end"])
    ordered_products_query = construct_insert_query("Ordered_products", ["session_id", "product_id", "quantity"])

    #insert into PostgreSQL
    db.many_update_queries(profile_query, profile_dataset, fast_execution=True)
    db.many_update_queries(bu_query, buid_dataset, fast_execution=True)
    db.many_update_queries(session_query, session_dataset, fast_execution=True)
    db.many_update_queries(ordered_products_query, ordered_products_dataset, fast_execution=True)