def price_tracker_job(): t_start = time.perf_counter() # Get list of URLs for all products from the table URLs = [url_set[0] for url_set in db_utils.get_all_products()] failed_URLs = [] for URL in tqdm(URLs): # Request HTML response from the page and extract info from it details = scraper_utils.extract_amazon_url(URL) if not details: failed_URLs.append(URL) print("Cannot scrape URL: {}".format(URL)) continue # Insert data into prices -> (asin, price, datetime) price_details = (details["ASIN"], details["price"], datetime.now()) db_utils.insert_price(price_details) list_prices = db_utils.get_price_from_asin(details["ASIN"])["price"] # Email alert users only if price changes with the previous one if is_price_change(list_prices): db_utils.alert_user_email(details["ASIN"], details["name"], details["price"], list_prices[-2]) t_end = time.perf_counter() db_utils.alert_admin_tracker(failed_URLs, URLs) print("...finish tracking at {}, in {} seconds ...".format( datetime.now(), t_end - t_start))
def track_url(URL): # Request HTML response from the page and extract info from it details = scraper_utils.extract_amazon_url(URL) if not details: """ If None, invalid URL (404) """ return # Insert product if it is not in database yet if not db_utils.get_product_from_asin(details["ASIN"]): product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \ details["cat1"], details["cat2"], details["rating"], details["nVotes"], \ details["availability"], details["imageURL"], details["url"]) db_utils.insert_product(product_details) # Insert data into prices -> (asin, price, datetime) price_details = (details["ASIN"], details["price"], datetime.now()) db_utils.insert_price(price_details)
def track_url(URL): time.sleep(WAIT_TIME) # Request HTML response from the page and extract info from it details = scraper_utils.extract_amazon_url(URL) if not details: print("Cannot scrape URL: {}".format(URL)) return URL # Insert data into prices -> (asin, price, datetime) price_details = (details["ASIN"], details["price"], datetime.now()) db_utils.insert_price(price_details) list_prices = db_utils.get_price_from_asin(details["ASIN"])["price"] # Email alert users only if price changes with the previous one if is_price_change(list_prices): db_utils.alert_user_email(details["ASIN"], details["name"], details["price"], list_prices[-2]) # Close db session after each update db_utils.db_close()
def helper_add_new_product_from_user(product_url): from datetime import datetime details = extract_amazon_url(product_url) if not details: ''' Invalid URL ''' return None try: product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \ details["cat1"], details["cat2"], details["rating"], details["nVotes"], \ details["availability"], details["imageURL"], details["url"]) db_utils.insert_product(product_details) except: pass # Insert data into prices -> (asin, price, datetime) curr_time = datetime.now() price_details = (details["ASIN"], details["price"], curr_time) db_utils.insert_price(price_details) return details, price_details
my_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = os.path.join(my_dir, r"../") sys.path.append(root_dir) # root directory import file_path sys.path.append(file_path.database_dir) import db_utils with open(os.path.join(my_dir, "URL_first_time_only.txt"), 'r') as f: all_lines = f.readlines()[2:] URLs = [url[:-1] for url in all_lines] for URL in tqdm(URLs): # Request HTML response from the page and extract info from it details = scraper_utils.extract_amazon_url(URL) if not details: """ If None, invalid URL (404) """ continue # Insert product if it is not in database yet if not db_utils.get_product_from_asin(details["ASIN"]): product_details = (details["ASIN"], details["name"], int(details["isDeal"]), \ details["cat1"], details["cat2"], details["rating"], details["nVotes"], \ details["availability"], details["imageURL"], details["url"]) db_utils.insert_product(product_details) # Insert data into prices -> (asin, price, datetime) price_details = (details["ASIN"], details["price"], datetime.now()) db_utils.insert_price(price_details)