示例#1
0
def parse_products():
    date_latest = utils.parse_latest_date(PRODUCTS_DIR)
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]
    categories_with_specs = (
        "desktops",
        "notebooks",
    )

    products = []
    used_products = set()

    for category in categories_with_specs:
        category_products = load_json(
            f"{SPECS_DIR}/{date_latest}/{category}-list.json")
        used_products.update({p["source_id"] for p in category_products})
        products.extend(category_products)

    for category in categories:
        category_products = load_json(f"{products_latest}/{category}")
        category_products = [p for p in category_products]
        products.extend(category_products)

    with open(f"{DB_DUMPS_DIR}/products.json", "w") as f:
        f.write(json.dumps(products, ensure_ascii=False))
示例#2
0
def parse_reviews():
    date_latest = utils.parse_latest_date(REVIEWS_DIR)
    reviews_latest = f"{REVIEWS_DIR}/{date_latest}"

    categories = os.listdir(reviews_latest)
    categories = [c for c in categories if not c.startswith(".")]

    reviews = []
    for category in categories:
        products = os.listdir(f"{reviews_latest}/{category}")
        products = [p for p in products if not p.startswith(".")]

        for product in products:
            product_id = product[: product.index(".json")]
            product_reviews = load_json(f"{reviews_latest}/{category}/{product}")

            for review in product_reviews["data"]:
                review_dict = {
                    "product_id": product_id,
                    "source_id": review["id"],
                    "date": datetime.strptime(review["date"], "%d.%m.%Y"),
                    "rating": review["rating"],
                    "comment_plus": review["comment"]["plus"],
                    "comment_minus": review["comment"]["minus"],
                    "comment_text": review["comment"]["text"],
                }

                review_rating = review["feedback"]["reviewsRating"]
                approved, rated = _parse_approved_rated(review_rating)
                review_dict.update({"review_approved": approved, "review_rated": rated})
                reviews.append(review_dict)

    with open(f"{DB_DUMPS_DIR}/reviews.json", "wb") as f:
        f.write(orjson.dumps(reviews))
示例#3
0
def download_tweet_sets():
    if "session_id" in session:
        return redirect(url_for("test_tweets"))

    if os.path.exists(app.config['TWEETS_SETS_FILE']):
        data = load_json(app.config['TWEETS_SETS_FILE'])
    else:
        data = []

    form = TweetsSetDownloadForm()
    if form.validate_on_submit():
        new_set_id = form.set_name.data.replace(" ", "_") + "_" + str(
            int(datetime.timestamp(datetime.now())))
        new_set = {
            "id": new_set_id,
            "set_name": form.set_name.data,
            "search_query": form.search_query.data,
            "tweets_number": int(form.tweets_number.data),
            "bufale_pages": int(form.bufale_pages.data)
        }
        create_tweets_set(new_set_id, form.search_query.data,
                          int(form.tweets_number.data),
                          int(form.bufale_pages.data))
        data.append(new_set)
        write_json(app.config['TWEETS_SETS_FILE'], data)
        return redirect(url_for("download_tweet_sets"))

    return render_template("tweets_set_download.html",
                           form=form,
                           tweets_sets=data[::-1])
示例#4
0
def start_test():
    if "session_id" in session:
        return redirect(url_for("test_tweets"))

    if os.path.exists(app.config['SESSIONS_FILE']):
        data = load_json(app.config['SESSIONS_FILE'])
    else:
        data = []

    # creating the choices for the select field
    if os.path.exists(app.config['TWEETS_SETS_FILE']):
        tweets_sets = load_json(app.config['TWEETS_SETS_FILE'])
    else:
        return redirect(url_for("download_tweet_sets"))

    tweets_sets = [(ts['id'], ts['id'] + " (" + ts["search_query"] + ")")
                   for ts in tweets_sets[::-1]]
    form = UserForm()
    form.tweets_set_to_use.choices = tweets_sets
    if form.validate_on_submit():
        new_session_id = form.username.data.replace(" ", "_") + "_" + str(
            int(datetime.timestamp(datetime.now())))
        new_session = {
            "id": new_session_id,
            "username": form.username.data,
            "age": form.age.data,
            "gender": form.gender.data
        }
        data.append(new_session)
        write_json(app.config['SESSIONS_FILE'], data)

        session["session_id"] = new_session_id
        session["username"] = form.username.data
        session["age"] = form.age.data
        session["gender"] = form.gender.data
        session["tweets_set_id"] = form.tweets_set_to_use.data
        session["start_timestamp"] = int(datetime.timestamp(datetime.now()))

        return redirect(url_for("test_tweets"))

    return render_template("start_test.html", form=form)
示例#5
0
def parse_categories():
    date_latest = utils.parse_latest_date(f"{PRODUCTS_DIR}")
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]
    categories_with_specs = (
        "desktops",
        "notebooks",
    )

    categories_set = set()
    used_categories = set()
    for category in categories_with_specs:
        category_products = load_json(
            f"{SPECS_DIR}/{date_latest}/{category}-list.json")
        for product in category_products:
            categories_set.add(
                (product["category_name"], product["category_id"]))
            used_categories.add(product["category_id"])

    for category in categories:
        products = load_json(f"{products_latest}/{category}")
        for product in products:
            if product["category_id"] in used_categories:
                continue

            name = product["category_name"].replace("%20", " ")
            categories_set.add((name, product["category_id"]))

    categories_sorted = sorted(categories_set, key=lambda x: (x[0], x[1]))
    categories_dict = [{
        "name": c[0],
        "source_id": c[1]
    } for c in categories_sorted]

    with open(f"{DB_DUMPS_DIR}/categories.json", "w") as f:
        json.dump(categories_dict, f)
示例#6
0
    def start_requests(self):
        parsed_ids = db_utils.get_dumped_product_details()
        products = load_json(self.products_json)

        for product in products:
            if product["source_id"] in parsed_ids:
                continue

            yield scrapy.Request(
                url=product["url"],
                cb_kwargs={"product": product},
                callback=self.parse_product,
            )
            parsed_ids.add(product["source_id"])
示例#7
0
def parse_products():
    date_latest = utils.parse_latest_date(PRODUCTS_DIR)
    products_latest = f"{PRODUCTS_DIR}/{date_latest}"

    categories = os.listdir(products_latest)
    categories = [c for c in categories if not c.startswith(".")]

    products = []
    for category in categories:
        category_products = load_json(f"{products_latest}/{category}")
        products.extend(category_products)

    with open(f"{DB_DUMPS_DIR}/products.json", "w") as f:
        f.write(json.dumps(products, ensure_ascii=False))
示例#8
0
def parse_specs():
    date_latest = utils.parse_latest_date(SPECS_DIR)
    categories_with_specs = (
        "desktops",
        "notebooks",
    )

    specs = []
    for category in categories_with_specs:
        products = load_json(f"{SPECS_DIR}/{date_latest}/{category}-specs.json")
        for product in products:
            specs.append(processed_specs(product))

    with open(f"{DB_DUMPS_DIR}/specs.json", "w") as f:
        json.dump(specs, f, ensure_ascii=False)
示例#9
0
def test_tweets():
    if "session_id" not in session:
        return redirect(url_for("start_test"))

    tweets_set = load_json(
        os.path.join(app.config["TWEETS_SETS_DIR"],
                     session["tweets_set_id"] + ".json"))

    class DynamicTestForm(TestForm):
        pass

    for t in tweets_set:
        field = RadioField(t["progressive"],
                           choices=[("True", "True"), ("Maybe", "Maybe"),
                                    ("Fake", "Fake")],
                           id=t["id"],
                           validators=[InputRequired()])
        setattr(DynamicTestForm, t["progressive"], field)

    form = DynamicTestForm()

    if form.validate_on_submit():
        user_choices = {}
        for t in tweets_set:
            user_choices[t["id"]] = form[t["progressive"]].data

        user_session = {
            "id": session["session_id"],
            "username": session["username"],
            "age": session["age"],
            "gender": session["gender"],
            "tweets_set_id": session["tweets_set_id"],
            "start_timestamp": session["start_timestamp"],
            "finish_timestamp": int(datetime.timestamp(datetime.now())),
            "user_choices": user_choices
        }

        write_json(
            os.path.join(app.config['SESSIONS_DIR'],
                         session["session_id"] + ".json"), user_session)
        return redirect(url_for("results"))

    # return test_tweets.html and list and length of list to html page
    return render_template("test_tweets.html",
                           set_length=len(tweets_set),
                           tweets=tweets_set,
                           form=form)
示例#10
0
def _insert_from_source(source_file, query):
    db = LocalSession()
    date = utils.parse_latest_date(DB_DUMPS_DIR)

    data: List[dict] = load_json(f"{DB_DUMPS_DIR}/{date}/{source_file}")
    db.bulk_insert_dicts(query, data)
示例#11
0
""" A file full of constants.

September 2, 2016
"""

from app import utils

settings = utils.load_json("data/settings.json")

# ---------------------------- CONSTANTS ----------------------------

assignments = utils.load_json(settings['assignments_path'])
students = utils.load_json(settings['students_path'])
info = utils.load_json(settings['info_path'])
示例#12
0
from flask import request, render_template, redirect, url_for
from app import app, utils, recommender, plotting
import os

tsne_weights = utils.load_npy('data/tsne_weights.npy')
embeddings = utils.load_npy('data/embeddings.npy')
[d, inv_d] = utils.load_json('data/subreddit_dicts.json')


@app.route('/', methods=['GET', 'POST'])
def base():
    dataset_tsne_3dplot = plotting.dataset_tsne_3dplot_subset(
        tsne_weights, d, 10000)
    if request.method == 'POST':
        input_subreddit = request.form['input_name']
        num_recommendations = int(request.form['num_recommendations'])
        return redirect(
            url_for('recs_for_subreddit',
                    subreddit=input_subreddit,
                    num_recommendations=num_recommendations))
    return render_template('base.html', plot=dataset_tsne_3dplot)


@app.route('/<subreddit>', methods=['GET', 'POST'])
def recs_for_subreddit(subreddit=None):
    num_recommendations = request.args.get('num_recommendations')
    num_recommendations = int(
        num_recommendations) if num_recommendations is not None else 10
    num_recommendations = 10 if num_recommendations not in [
        5, 10, 15, 20
    ] else num_recommendations