示例#1
0
def price_tracker_job():
    t_start = time.perf_counter()
    # Get list of URLs for all products from the table
    URLs = [url_set[0] for url_set in db_utils.get_all_products()]
    failed_URLs = []

    for URL in tqdm(URLs):
        # Request HTML response from the page and extract info from it
        details = scraper_utils.extract_amazon_url(URL)
        if not details:
            failed_URLs.append(URL)
            print("Cannot scrape URL: {}".format(URL))
            continue

        # Insert data into prices -> (asin, price, datetime)
        price_details = (details["ASIN"], details["price"], datetime.now())
        db_utils.insert_price(price_details)

        list_prices = db_utils.get_price_from_asin(details["ASIN"])["price"]
        # Email alert users only if price changes with the previous one
        if is_price_change(list_prices):
            db_utils.alert_user_email(details["ASIN"], details["name"],
                                      details["price"], list_prices[-2])

    t_end = time.perf_counter()
    db_utils.alert_admin_tracker(failed_URLs, URLs)
    print("...finish tracking at {}, in {} seconds ...".format(
        datetime.now(), t_end - t_start))
def track_url(URL):
    # Request HTML response from the page and extract info from it
    details = scraper_utils.extract_amazon_url(URL)

    if not details:
        """ If None, invalid URL (404) """
        return

    # Insert product if it is not in database yet
    if not db_utils.get_product_from_asin(details["ASIN"]):
        product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \
            details["cat1"], details["cat2"], details["rating"], details["nVotes"], \
            details["availability"], details["imageURL"], details["url"])
        db_utils.insert_product(product_details)

    # Insert data into prices -> (asin, price, datetime)
    price_details = (details["ASIN"], details["price"], datetime.now())
    db_utils.insert_price(price_details)
def track_url(URL):
    time.sleep(WAIT_TIME)

    # Request HTML response from the page and extract info from it
    details = scraper_utils.extract_amazon_url(URL)
    if not details:
        print("Cannot scrape URL: {}".format(URL))
        return URL

    # Insert data into prices -> (asin, price, datetime)
    price_details = (details["ASIN"], details["price"], datetime.now())
    db_utils.insert_price(price_details)

    list_prices = db_utils.get_price_from_asin(details["ASIN"])["price"]
    # Email alert users only if price changes with the previous one
    if is_price_change(list_prices):
        db_utils.alert_user_email(details["ASIN"], details["name"],
                                  details["price"], list_prices[-2])

    # Close db session after each update
    db_utils.db_close()
示例#4
0
def helper_add_new_product_from_user(product_url):
    from datetime import datetime

    details = extract_amazon_url(product_url)
    if not details:
        ''' Invalid URL '''
        return None

    try:
        product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \
            details["cat1"], details["cat2"], details["rating"], details["nVotes"], \
            details["availability"], details["imageURL"], details["url"])
        db_utils.insert_product(product_details)
    except:
        pass

    # Insert data into prices -> (asin, price, datetime)
    curr_time = datetime.now()
    price_details = (details["ASIN"], details["price"], curr_time)
    db_utils.insert_price(price_details)

    return details, price_details
my_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(my_dir, r"../")

sys.path.append(root_dir)  # root directory
import file_path

sys.path.append(file_path.database_dir)
import db_utils

with open(os.path.join(my_dir, "URL_first_time_only.txt"), 'r') as f:
    all_lines = f.readlines()[2:]
    URLs = [url[:-1] for url in all_lines]

for URL in tqdm(URLs):
    # Request HTML response from the page and extract info from it
    details = scraper_utils.extract_amazon_url(URL)

    if not details:
        """ If None, invalid URL (404) """
        continue

    # Insert product if it is not in database yet
    if not db_utils.get_product_from_asin(details["ASIN"]):
        product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \
            details["cat1"], details["cat2"], details["rating"], details["nVotes"], \
            details["availability"], details["imageURL"], details["url"])
        db_utils.insert_product(product_details)

    # Insert data into prices -> (asin, price, datetime)
    price_details = (details["ASIN"], details["price"], datetime.now())
    db_utils.insert_price(price_details)