def tweet(temperatures): if len(temperatures) == 0: return api = twitter_api.get_api() the_tweet = '' for sensor in temperatures: the_tweet += "%s: %s, " % (sensor, temperatures[sensor]) the_tweet = the_tweet[:-2] api.PostUpdate(the_tweet)
import sqlite3 as sqlite import db from twitter_api import get_api3 as get_api import datetime con = db.get_connection() cursor = con.cursor() con_1 = db.get_connection_1() cursor_1 = con_1.cursor() api = get_api() class TwitterUser(): def __init__(self, user_id, user_id_str, scrn_name, name, foer_cnt, friend_cnt, desc, location, created_at, status_cnt, verified, scanned): self.user_id = user_id self.user_id_str = user_id_str self.scrn_name = scrn_name self.name = name self.foer_cnt = foer_cnt self.friend_cnt = friend_cnt self.desc = desc self.location = location if isinstance(created_at, datetime.datetime): self.created_at = created_at.date() else: self.created_at = created_at self.status_cnt = status_cnt self.verified = verified self.scanned = scanned
def main(): # simultaneous crawlers n_threads = 6 # load tokenizer tokenizer = Tokenizer() # load news account IDs news_accounts = settings['news_accounts'] # load api api = get_api(keys[0]) session = SessionPool().get_session() # for every hour: # Get headline tweets published from one hour ago headlines = [] one_hour_before = datetime.utcnow() - timedelta(hours=1) with closing(get_saver(session)) as saver: saver.send(None) for screen_name in news_accounts: news_sources_tweets = get_latest_tweets(screen_name, one_hour_before, api) if not news_sources_tweets: continue for tweet in news_sources_tweets: text = tweet.text if is_retweet(tweet): text = tweet.retweeted_status.text headlines.append(text) saver.send(tweet_tuple(tweet=tweet, is_headline=True, event_id=None)) headlines_preprocessed = [] for headline in headlines: doc = set() for term in tokenizer.tokenize(headline): term = ' '.join([t.lower_ for t in term]) doc.add(term) headlines_preprocessed.append(doc) keywords = detect_keywords(headlines_preprocessed, threshold=2) events = [] keyword_sets = [] with session.begin(): # take n_threads first keyword sets (top score first) # for each keyword set, take first 3 keywords (random order) for kwd in keywords[:n_threads]: tmp = list(kwd[0])[:3] event = Event(keyword1=tmp[0], keyword2=tmp[1], keyword3=tmp[2] if len(tmp) == 3 else None) session.add(event) events.append(event) keyword_sets.append(' '.join(tmp)) for event in events: session.refresh(event) keyword_sets = list(zip(keyword_sets, map(lambda x: x.id, events))) # collect tweet sets per keyword set for 1 hour total_time = 1 * hour logger.info(f"Collecting tweets for {total_time / 60} minutes.") collect_tweets(keyword_sets, limit=total_time) return keywords, headlines