def get_all_sites(category: str = None, domain: str = None): """ Get all site urls :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT site_url, sitemap_url FROM scrapy_sites """ if category: sql += " WHERE category='{}'".format(category) if domain: sql += " WHERE site_url LIKE '%{}%'".format(domain) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def save_link_fb_shares(site_link: str, r: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() site_link_title = r.get('og_object', {}).get('title', '') fb_description = r.get('og_object', {}).get('description', '') fb_type = r.get('og_object', {}).get('type', '') fb_shares = r.get('engagement', {}).get('share_count', 0) fb_updated_time = r.get('og_object', {}).get('updated_time', '') fb_graph_object = ujson.dumps(r) # is_viral = if the link got a share it could be viral is_viral = "yes" if fb_shares < 1: is_viral = "no" sql = """ INSERT INTO facebook_most_shared(site_link, site_link_title, fb_description, fb_type, fb_shares, fb_updated_time, fb_graph_object, is_viral) VALUES ('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}') """.format(site_link, site_link_title.replace("'", r"\'"), fb_description.replace("'", r"\'"), fb_type, fb_shares, fb_updated_time, fb_graph_object.replace("'", r"\'"), is_viral) try: cursor.execute(sql) conn.commit() except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() cursor.close() return
def update_link_fb_shares(site_link: str, r: dict): conn = get_mysql_conn() cursor = conn.cursor() fb_shares = r.get('engagement', {}).get('share_count', 0) fb_updated_time = r.get('og_object', {}).get('updated_time', '') fb_graph_object = ujson.dumps(r) # is_viral = if the link got a share it could be viral is_viral = "yes" if fb_shares < 1: is_viral = "no" sql = """ UPDATE facebook_most_shared SET fb_shares = {}, fb_updated_time = '{}', fb_graph_object = '{}', is_viral = '{}' WHERE site_link = '{}' """.format(fb_shares, fb_updated_time, fb_graph_object.replace("'", r"\'"), is_viral, site_link) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def get_fb_link_title(site_link): """ Get FB link by the site_url :param site_link: :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT * FROM facebook_most_shared WHERE site_link = "{}" """.format(site_link) try: cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() return return rows
def save_ga_report(title, path, avg_top, top, start_date, users, adsense_clicks, adsense_revenue) -> None: conn = get_mysql_conn() cursor = conn.cursor() sql = """ INSERT INTO ga_reports(page_title, page_path, avg_time_on_page, time_on_page, start_date, users, adsense_clicks, adsense_revenue) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ print("Saving GA report for: {}...\n".format(title)) try: # truncate to 5 decimals avg_top = format(float(avg_top), '.5f') top = format(float(top), '.5f') adsense_clicks = format(float(adsense_clicks), '.5f') adsense_revenue = format(float(adsense_revenue), '.5f') cursor.execute(sql, (title, path, avg_top, top, start_date, users, adsense_clicks, adsense_revenue)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def update_ga_report(avg_top, top, path, users, adsense_clicks, adsense_revenue) -> None: conn = get_mysql_conn() cursor = conn.cursor() # truncate to 5 decimals avg_top = format(float(avg_top), '.5f') top = format(float(top), '.5f') adsense_clicks = format(float(adsense_clicks), '.5f') adsense_revenue = format(float(adsense_revenue), '.5f') sql = """UPDATE ga_reports SET avg_time_on_page = {}, time_on_page = {}, users = {}, adsense_clicks = {}, adsense_revenue = {} WHERE page_path = "{}" """.format(avg_top, top, users, adsense_clicks, adsense_revenue, path) print("Updating GA report for: {}...\n".format(path)) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def save_campaign_report(r: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() email_subject = r.get('subject_line', 'No subject') emails_sent = r.get('emails_sent', 0) unique_opens = r.get('opens', {}).get('unique_opens', 0) open_rate = round(r.get('opens', {}).get('open_rate', 0) * 100, 2) response_object = ujson.dumps(r) sql = """ INSERT INTO mailchimp_reports(email_subject, emails_sent, unique_opens, open_rate, response_object) VALUES (%s, %s, %s, %s, %s) """ print("Saving report for: {}...\n".format(email_subject)) try: cursor.execute(sql, (email_subject, emails_sent, unique_opens, open_rate, response_object)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def get_top_game_last_entry(game_id): """ Get the last stats for a given game :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT current_position FROM twitch_top_games WHERE game_id = "{}" ORDER BY insert_time DESC LIMIT 0,1 """.format(game_id) print(sql) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() if rows: return rows[0] return False
def insert_top_game_stats(r, current_position, last_position): conn = get_mysql_conn() cursor = conn.cursor() game_id = r.get('id', 0) game_name = r.get('name', "") box_art_url = r.get('box_art_url', "") # current_position = r.get('current_position', "") # last_position = r.get('last_position', "") delta_position = int(last_position) - int(current_position) sql = """ INSERT INTO twitch_top_games(game_id, game_name, box_art_url, current_position, last_position, delta_position) VALUES (%s, %s, %s, %s, %s, %s) """ try: cursor.execute(sql, (game_id, game_name, box_art_url, current_position, last_position, delta_position)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def update_campaign_report(email_subject: str, r: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() emails_sent = r.get('emails_sent', 0) unique_opens = r.get('opens', {}).get('unique_opens', 0) open_rate = round(r.get('opens', {}).get('open_rate', 0) * 100, 2) response_object = ujson.dumps(r) sql = """UPDATE mailchimp_reports SET unique_opens = {}, open_rate = {}, emails_sent = {}, response_object = '{}' WHERE email_subject = "{}" """.format(unique_opens, open_rate, emails_sent, response_object, email_subject) print("Updating report for: {}...\n".format(email_subject)) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def save_tweet(t: dict, keyword: str) -> None: conn = get_mysql_conn() cursor = conn.cursor() retweet_count = int(t.get('retweet_count', 0)) created_at = t['created_at'] tweet_id = t['id_str'] tweet = t['text'] user_id = t['user']['id_str'] user_name = t['user']['screen_name'] blob = ujson.dumps(t) tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id) # TODO: add a site_link field and extract the link from the tweet sql = """ INSERT INTO twitter_most_retweeted(created_at, tweet_id, retweet_count, tweet, user_id, user_name, tweet_blob, tweet_link, keyword) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ print("\nSaving tweet: {}...".format(tweet_link)) try: cursor.execute(sql, (created_at, tweet_id, retweet_count, tweet, user_id, user_name, blob, tweet_link, keyword)) conn.commit() except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() cursor.close() return
def get_allowed_domains(): """ Get all domains :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT site_url, sitemap_url FROM scrapy_sites """ cursor.execute(sql) conn.commit() rows = dictfecth(cursor) # extract domains domains = list() for r in rows: d = get_domain(r['site_url']) domains.append(d) cursor.close() return domains
def update_link_title_descrip(id, title, description): """ Update a link's title and description from a social network """ conn = get_mysql_conn() cursor = conn.cursor() # update shares sql = """ UPDATE links_shares SET link_title = '{}', link_description = '{}' WHERE idlinks_shares = '{}' """.format(title.replace("'", r"\'"), description.replace("'", r"\'"), id) try: cursor.execute(sql) print("'{}' -> link updated.".format(title)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() return False cursor.close() return True
def is_fb_link_viral(site_link: str) -> bool: conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT is_viral FROM facebook_most_shared WHERE site_link = "{}" """.format(site_link) try: cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() if rows: v = rows[0].get('is_viral', 'unknown') if v == 'unknown' or v == 'yes': print("{} is VIRAL!".format(site_link)) return True else: print("{} is NOT VIRAL :(".format(site_link)) return False else: print("{} is NEW, it COULD BE VIRAL 8)".format(site_link)) return True except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() return False
def save_link_retweets(query: str, t: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() retweet_count = int(t.get('retweet_count', 0)) created_at = t['created_at'] tweet_id = t['id_str'] tweet = t['text'] user_id = t['user']['id_str'] user_name = t['user']['screen_name'] blob = ujson.dumps(t).replace("'", r"\'") tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id) sql = """ INSERT INTO twitter_most_retweeted(created_at, tweet_id, retweet_count, tweet, user_id, user_name, tweet_blob, tweet_link, query) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ print("\nSaving tweet: {}...".format(tweet_link)) try: cursor.execute(sql, (created_at, tweet_id, retweet_count, tweet, user_id, user_name, blob, tweet_link, query)) conn.commit() except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() cursor.close() return
def update_link_retweets(query: str, t: dict): conn = get_mysql_conn() cursor = conn.cursor() retweet_count = int(t.get('retweet_count', 0)) tweet_id = t['id_str'] user_name = t['user']['screen_name'] blob = ujson.dumps(t).replace("'", r"\'") tweet_link = "https://twitter.com/{}/status/{}".format(user_name, tweet_id) sql = """ UPDATE twitter_most_retweeted SET retweet_count = {}, tweet_blob = '{}' WHERE tweet_id = '{}' """.format(retweet_count, blob, tweet_id) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def update_member(email: str, item: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() rating = item.get('member_rating', 0) open_rate = item.get('stats', {}).get('avg_open_rate', 0) click_rate = item.get('stats', {}).get('avg_click_rate', 0) sql = """UPDATE mailchimp_members SET rating = {}, open_rate = {}, click_rate = {} WHERE email = "{}" """.format(rating, open_rate, click_rate, email) print("Updating member: {}...\n".format(email)) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def save_member(item: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() city = item.get('merge_fields', {}).get('FCITY') role = item.get('merge_fields', {}).get('FROLE') country_code = item.get('location', {}).get('country_code') name = item.get('merge_fields', {}).get('FNAME') email = item.get('email_address') rating = item.get('member_rating', 0) open_rate = item.get('stats', {}).get('avg_open_rate', 0) click_rate = item.get('stats', {}).get('avg_click_rate', 0) sql = """ INSERT INTO mailchimp_members(name, email, country_code, city, role, rating, open_rate, click_rate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ print("Saving member: {}...\n".format(name)) try: cursor.execute(sql, (name, email, country_code, city, role, rating, open_rate, click_rate)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def update_link_stats(link: str, t: dict) -> None: conn = get_mysql_conn() cursor = conn.cursor() total = int(t.get('total', 0)) facebook = int(t.get('shares', {}).get('facebook', 0)) linkedin = int(t.get('shares', {}).get('linkedin', 0)) twitter = int(t.get('shares', {}).get('twitter', 0)) blob = ujson.dumps(t).replace("'", r"\'") sql = """ UPDATE sharethis_stats SET total = {}, facebook = {}, linkedin = {}, twitter = {}, response_blob = '{}' WHERE site_link = '{}' """.format(total, facebook, linkedin, twitter, blob, link) print(sql) try: cursor.execute(sql) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() cursor.close() return
def get_top_stream_last_entry(stream_id): """ Get the last stats for a given stream :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT viewer_count FROM twitch_top_streams WHERE stream_id = "{}" ORDER BY insert_time DESC LIMIT 0,1 """.format(stream_id) print(sql) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() if rows: return rows[0] return False
def get_fb_shares_by_domain(domain: str, threshold: int = 1, limit: int = 0): """ Get links and shares for a given domain :param domain: :param threshold: :param limit: :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT site_link, site_link_title, fb_shares FROM facebook_most_shared WHERE site_link LIKE "%{}%" AND fb_shares > {} ORDER BY fb_shares """.format(domain, threshold) if limit > 0: sql += " LIMIT 0,{}".format(limit) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def word_weight_upsert(word, weight): conn = get_mysql_conn() cursor = conn.cursor() # check if the word exists sql = """ SELECT idprediction_blog_titles FROM prediction_blog_titles WHERE word = "{}"; """.format(word) r = cursor.execute(sql) if r > 0: conn.commit() rows = dictfecth(cursor) # update shares id = rows[0]["idprediction_blog_titles"] sql = """ UPDATE prediction_blog_titles SET weight = {} WHERE idprediction_blog_titles = '{}' """.format(weight, id) try: cursor.execute(sql) print("{} = {} weight updated.".format(word, weight)) conn.commit() except Exception as e: print("ERROR! ({})\n".format(str(e))) conn.rollback() return False cursor.close() return True else: # insert new link with shares sql = """ INSERT INTO prediction_blog_titles(word, weight) VALUES (%s, %s) """.format() try: cursor.execute(sql, (word, weight)) print("{} = {} weight inserted.".format(word, weight)) conn.commit() except Exception as e: print("\nERROR! ({})".format(str(e))) conn.rollback() return False cursor.close() return True
def get_site_links_by_category(category: str): conn = get_mysql_conn() cursor = conn.cursor() sql = "SELECT A.site_link as site_link FROM scrapy_sites_links as A " \ "INNER JOIN scrapy_sites as B ON A.site_url = B.site_url " \ "WHERE B.category = '{}'".format(category) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_top_tweets(retweet_threshold: int = 5, limit: int = 100): conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT * FROM twitter_most_retweeted WHERE retweet_count > {} ORDER BY retweet_count DESC LIMIT 0,{}; """.format(retweet_threshold, limit) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_open_rate_by_country(): conn = get_mysql_conn() cursor = conn.cursor() sql = """ select country_code as country, AVG(open_rate) as total from mailchimp_members WHERE country_code <> '' GROUP BY country_code; """ cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_members_by_country(): conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT country_code as country, COUNT(*) as total from mailchimp_members WHERE country_code <> '' GROUP BY country_code; """ cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_top_open_rate(limit: int = 10): conn = get_mysql_conn() cursor = conn.cursor() sql = """ select email_subject, unique_opens, open_rate, emails_sent from mailchimp_reports where emails_sent > 100 order by open_rate desc limit 0,{}; """.format(limit) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_word_weight(word): conn = get_mysql_conn() cursor = conn.cursor() sql = """ select weight from prediction_blog_titles WHERE word = "{}"; """.format(word) r = cursor.execute(sql) if r > 0: conn.commit() rows = dictfecth(cursor) return rows[0]["weight"] return 0
def get_retweets_by_domain(domain: str, threshold: int = 10, limit: int = 0): conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT * FROM twitter_most_retweeted WHERE query LIKE "%{}%" AND retweet_count > {} ORDER BY retweet_count DESC """.format(domain, threshold) if limit > 0: sql += " LIMIT 0,{}".format(limit) cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows
def get_domains(): """ Get all domains :return: """ conn = get_mysql_conn() cursor = conn.cursor() sql = """ SELECT * FROM domain_stats; """ cursor.execute(sql) conn.commit() rows = dictfecth(cursor) cursor.close() return rows