示例#1
0
def recommend_products_by_category(product_id: str, modification_type: str) -> list:
    with create_connection() as connection:
        cursor = connection.execute("SELECT categoryId FROM item_category_list WHERE itemId=(?)", (product_id,))
        product_category_id = cursor.fetchone()[0]

        if modification_type == "same_category":
            cursor = connection.execute("SELECT itemId FROM item_category_list "
                                        "WHERE categoryId=(?)", (product_category_id,))

        elif modification_type == "sibling_category":
            cursor = connection.execute("SELECT parentCategoryId FROM category WHERE id=(?)", (product_category_id,))
            parent_category_id = cursor.fetchone()[0]

            cursor = connection.execute("SELECT id FROM category WHERE parentCategoryId=(?)", (parent_category_id,))
            category_siblings = [item[0] for item in cursor.fetchall()]
            if parent_category_id:
                category_siblings.append(parent_category_id)

            cursor = connection.execute("SELECT itemId FROM item_category_list WHERE categoryId "
                                        "IN ({})".format(",".join([str(item) for item in category_siblings])))

        result_items = [item[0] for item in cursor.fetchall()]

        if product_id in result_items:
            result_items.remove(product_id)

        return get_top_10_by_rating(result_items)
    def _prep_data(self):
        """
        prepare data for recommender
        1. item-user scipy sparse matrix
        2. hashmap of itemId to row index in item-user scipy sparse matrix
        """
        connection = create_connection()

        with connection:
            # read data
            cursor = connection.cursor()
            cursor.execute("SELECT * FROM review")
            reviews = cursor.fetchall()
            cursor.close()

            columns = ['id', 'userId', 'itemId', 'rating', 'reviewTime']
            df_ratings = pd.DataFrame(reviews, columns=columns)
            # pivot and create movie-user matrix
            item_user_mat = df_ratings.pivot(index='itemId',
                                             columns='userId',
                                             values='rating').fillna(0)
            # hashmap of itemId to row index in item-user scipy sparse matrix
            hashmap = {}
            index = 0
            for i in item_user_mat.index:
                hashmap[index] = i
                index = index + 1

            # transform matrix to scipy sparse matrix
            item_user_mat_sparse = csr_matrix(item_user_mat.values)

            # clean up
            del df_ratings, item_user_mat
            return item_user_mat_sparse, hashmap
示例#3
0
def get_random_item_id():
    connection = create_connection()

    with connection:
        cursor = connection.execute("SELECT id FROM item ORDER BY RANDOM() LIMIT 1")
        item_id = cursor.fetchone()[0]

    return item_id
示例#4
0
def index():
    connection = create_connection()

    with connection:
        cursor = connection.execute("SELECT id FROM item ORDER BY RANDOM() LIMIT 120")
        all_items = [get_item_dict(item[0]) for item in cursor.fetchall()]

    return render_template("main_page.html", products=all_items)
def algorithm_related(product_id: str, recommendations_count: int) -> list:
    connection = create_connection()
    similarity_recommender = SimilarityRecommender(connection, product_id)
    recommended = similarity_recommender.recommend_products(
        recommendations_count, find_similar_items_related,
        count_similarities_related)
    recommended_ids = []
    for product in recommended:
        recommended_ids.append(product.id)

    connection.close()
    return recommended_ids
示例#6
0
def get_top_10_by_rating(items: list) -> list:
    with create_connection() as connection:
        cursor = connection.execute("SELECT id, overallRating FROM item WHERE id "
                                    "IN ({})".format(",".join(['"{}"'.format(item_id) for item_id in items])))
        rated_pairs = [(pair[0], pair[1]) for pair in cursor.fetchall()]

        rated_pairs.sort(key=lambda pair: pair[1], reverse=True)

        # adds a bit of randomness to the result
        rated_pairs = rated_pairs[:20]
        shuffle(rated_pairs)

        return [pair[0] for pair in rated_pairs[:10]]
示例#7
0
def recommend_products_by_related(product_id: str, modification_type: str) -> list:
    with create_connection() as connection:

        if modification_type == "all":
            cursor = connection.execute("SELECT relatedItemId FROM item_related_list WHERE itemId=(?)", (product_id,))

        else:
            cursor = connection.execute("SELECT relatedItemId FROM item_related_list WHERE itemId=(?) AND relation=(?)",
                                        (product_id, modification_type))

        related = [item[0] for item in cursor.fetchall()]

        return get_top_10_by_rating(related)
def recommend_products_randomly(product_id: str) -> list:
    connection = create_connection()

    with connection:
        cursor = connection.execute(
            "SELECT id FROM item ORDER BY RANDOM() LIMIT 10;")
        recommended_products = [item[0] for item in cursor.fetchall()]

        # to ensure that the product_id is not in its own list of recommended products
        while product_id in recommended_products:
            recommended_products.remove(product_id)
            cursor = connection.execute(
                "SELECT id FROM item ORDER BY RANDOM() LIMIT 1;")
            recommended_products.append(cursor.fetchone()[0])

        return recommended_products
def get_mean_per_item_list():
    connection = create_connection()

    with connection:
        cursor = connection.cursor()
        cursor.execute("SELECT * FROM review")
        reviews = cursor.fetchall()
        cursor.close()

        columns = ['id', 'userId', 'itemId', 'rating', 'reviewTime']
        df_ratings = pd.DataFrame(reviews, columns=columns)
        # pivot and create item-user matrix
        item_user_mat = df_ratings.pivot(index='itemId',
                                         columns='userId',
                                         values='rating')
        return item_user_mat.mean(axis=1, skipna=True)
def categories_statistics():
    connection = create_connection()
    category_items_counts_cur = connection.execute("SELECT count(itemId) FROM item_category_list GROUP BY categoryId;")
    category_items_counts = [c[0] for c in category_items_counts_cur]
    category_items_counts_sorted = sorted(category_items_counts)
    category_items_counts_statistics = Counter(category_items_counts)
    category_items_counts_statistics_sorted = sorted(category_items_counts_statistics.items())

    with open('../data/statistics/category_products_counts.txt', 'w', encoding='utf-8') as outp:
        outp.write('Postupně pro jednotlivé kategorie počty produktů.\n')
        for c in category_items_counts_sorted:
            outp.write(str(c) + '\n')

    with open('../data/statistics/category_products_counts_statistics.txt', 'w', encoding='utf-8') as outp:
        outp.write('x\ty ... Je y kategorií, které mají právě x produktů.\n')
        for c in category_items_counts_statistics_sorted:
            outp.write(str(c[0]) + '\t' + str(c[1]) + '\n')
示例#11
0
    def run(self):
        global LAST_TIME_CHECKPOINT
        LAST_TIME_CHECKPOINT = datetime.now()
        start_time = datetime.now()

        # prepare DB
        self.log("Preparing DB...")
        db_con = create_connection()
        self.prepare_tables(db_con)
        self.log_billboard(["Preparation of DB is DONE!"])

        # parse input files
        ratings_by_id = self.parse_review_file(db_con)
        self.parse_meta_file(db_con, ratings_by_id)
        self.parse_related_and_categories(db_con)

        db_con.execute("PRAGMA foreign_keys = on")
        db_con.close()
        end_time = datetime.now()

        print("DONE - Script execution took: {}".format(end_time - start_time))
示例#12
0
def get_item_dict(item_id: str) -> dict:
    connection = create_connection()

    with connection:
        cursor = connection.execute('''SELECT * FROM item WHERE id="{}"'''.format(item_id))
        item = cursor.fetchone()

        if item is None:
            abort(404)

        return {
            "id": item[0],
            "title": html.unescape(item[1]) if item[1] and item[1] != "None" else "<TITLE>",
            "description": html.unescape(item[2]) if item[2] and item[2] != "None" else "",
            "price": item[3] if item[3] else "<PRICE>",
            "imageUrl": item[4] if item[4] else "",
            "salesCategory": html.unescape(item[5]) if item[5] and item[5] != "None" else "",
            "salesRank": item[6],
            "overallRating": item[7],
            "percentageRating": ((item[7] * 100) / 5)
        }
示例#13
0
def feedback(item_id: str):
    connection = create_connection()

    with connection:
        to_insert = [item_id]
        for algo_type in TYPES_OF_ALGORITHMS:
            to_insert.append(get_int_value(request.form.get(algo_type, None)))

        query = '''
            INSERT INTO algo_evaluation(
                itemId, 
                random, 
                relatedAll, relatedAlsoBought, relatedAlsoViewed, 
                sameCategory, siblingCategory, 
                collaborativeFiltering,
                contentBased, contentBasedWithCategory)
            VALUES {}
        '''.format(tuple(to_insert))
        connection.execute(query)

    item_id = get_random_item_id()
    new_url = "/product/{}?submitted_feedback=true".format(item_id)

    return redirect(new_url)
 def __init__(self):
     self.connection = create_connection()