def reset_db(db='test', **db_settings): if db == 'test': db_settings = { 'host': 'localhost', 'db': 'tweets_test', 'user': '******', 'passwd': environ.get('TWEETS_TESTDB_PASSWORD') } redis_settings = {'host': 'localhost', 'db': '0'} elif db == 'mbp2': db_settings = { 'host': 'localhost', 'db': 'twitter', 'user': '******', 'passwd': environ.get('TWEETS_DB_PASSWORD') } redis_settings = {'host': 'localhost', 'db': 2} else: if not db_settings: raise ValueError("Must supply valid db name or settings") cache = SafeRedis(**redis_settings) cache.flushdb() print "Flushed Redis cache" db = MySQLdb.connect(**db_settings) cursor = db.cursor() print 'Wiping {db_name}'.format(db_name=db) try: for table in ('cluster', 'hashtag', 'hashtag_to_cluster', 'hashtag_to_tweet', 'tag_word', 'tag_word_to_hashtag', 'url', 'url_to_tweet', 'user_to_tweet', 'user_to_url', 'word', 'word_to_tweet', 'twitter_user', 'tweet'): cursor.execute("""DELETE FROM `{table}`;""".format(table=table)) print "Wiped {table}".format(table=table) finally: db.commit()
def __init__(self, use_testdb=False, show_sql=True): """ Initialize the Inserter. :param test: whether to use the test db/Redis settings :type test: bool """ if use_testdb: db_settings = { 'host': 'localhost', 'db': 'tweets_test', 'user': '******', 'passwd': environ.get('TWEETS_TESTDB_PASSWORD') } redis_settings = {'host': 'localhost', 'db': '0'} else: db_settings = { 'host': 'localhost', 'db': 'twitter', 'user': '******', 'passwd': environ.get('TWEETS_DB_PASSWORD') } redis_settings = {'host': 'localhost', 'db': '2'} self.show_sql = show_sql self.DB = MySQLdb.connect(**db_settings) self.cursor = self.DB.cursor() self.cache = SafeRedis(**redis_settings) for table in ('cluster', 'hashtag', 'tag_word', 'url', 'word'): key = 'pk_{table}'.format(table=table) if self.cache.get(key) is None: self.cache.set(key, 0) self.escape_word = self.DB.escape
ROUTER_IP = '24.186.113.22' REDIS_HOST = '24.186.113.22' REDIS_PORT = '6666' REDIS_DB = 1 DOMAIN_PAT = re.compile(r'https?://([\w\d\.\-]+\.\w{2,3})') ERR_PAT = re.compile(r'host=\'([\w\d\.]+)\'') if os.path.exists(os.path.expanduser("~/PycharmProjects/tweet_stuff")): home_dir = os.path.expanduser("~/PycharmProjects/tweet_stuff") elif os.path.exists(os.path.expanduser("~/tweet_stuff")): home_dir = os.path.expanduser("~/tweet_stuff") IN_DIR = os.path.join(home_dir, "extracted2") OUT_DIR = os.path.join(home_dir, "fixed") CONN = SafeRedis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB) def resolve_redirects(url): print "Resolving {}".format(url) cached = CONN.get(url) if cached: return cached session = requests.session() try: with closing(session.head(url, timeout=300)) as req: r = req except (exceptions.RequestException, socket.error) as e: try: requests.head('http://{}'.format(ROUTER_IP)).close() CONN.set(url, url)