示例#1
0
def tweet(temperatures):
    if len(temperatures) == 0:
        return
    api = twitter_api.get_api()
    the_tweet = ''
    for sensor in temperatures:
        the_tweet += "%s: %s, " % (sensor, temperatures[sensor])
    the_tweet = the_tweet[:-2]
    api.PostUpdate(the_tweet)
示例#2
0
import sqlite3 as sqlite
import db
from twitter_api import get_api3 as get_api
import datetime

con = db.get_connection()
cursor = con.cursor()
con_1 = db.get_connection_1()
cursor_1 = con_1.cursor()
api = get_api()
class TwitterUser():
    def __init__(self, user_id, user_id_str, scrn_name, name,
            foer_cnt, friend_cnt, 
            desc, location, created_at, status_cnt, verified,
            scanned):
        self.user_id = user_id
        self.user_id_str = user_id_str
        self.scrn_name = scrn_name
        self.name = name
        self.foer_cnt = foer_cnt
        self.friend_cnt = friend_cnt
        self.desc = desc
        self.location = location
        if isinstance(created_at, datetime.datetime):
            self.created_at = created_at.date()
        else:
            self.created_at = created_at
        self.status_cnt = status_cnt
        self.verified = verified
        self.scanned = scanned
示例#3
0
def main():
    # simultaneous crawlers
    n_threads = 6

    # load tokenizer
    tokenizer = Tokenizer()

    # load news account IDs
    news_accounts = settings['news_accounts']

    # load api
    api = get_api(keys[0])

    session = SessionPool().get_session()

    # for every hour:
    # Get headline tweets published from one hour ago
    headlines = []
    one_hour_before = datetime.utcnow() - timedelta(hours=1)

    with closing(get_saver(session)) as saver:
        saver.send(None)

        for screen_name in news_accounts:
            news_sources_tweets = get_latest_tweets(screen_name, one_hour_before, api)
            if not news_sources_tweets:
                continue
            for tweet in news_sources_tweets:
                text = tweet.text
                if is_retweet(tweet):
                    text = tweet.retweeted_status.text

                headlines.append(text)
                saver.send(tweet_tuple(tweet=tweet, is_headline=True, event_id=None))

    headlines_preprocessed = []
    for headline in headlines:
        doc = set()
        for term in tokenizer.tokenize(headline):
            term = ' '.join([t.lower_ for t in term])
            doc.add(term)
        headlines_preprocessed.append(doc)

    keywords = detect_keywords(headlines_preprocessed, threshold=2)

    events = []
    keyword_sets = []

    with session.begin():
        # take n_threads first keyword sets (top score first)
        # for each keyword set, take first 3 keywords (random order)
        for kwd in keywords[:n_threads]:
            tmp = list(kwd[0])[:3]

            event = Event(keyword1=tmp[0],
                          keyword2=tmp[1],
                          keyword3=tmp[2] if len(tmp) == 3 else None)
            session.add(event)
            events.append(event)
            keyword_sets.append(' '.join(tmp))

    for event in events:
        session.refresh(event)

    keyword_sets = list(zip(keyword_sets, map(lambda x: x.id, events)))

    # collect tweet sets per keyword set for 1 hour
    total_time = 1 * hour

    logger.info(f"Collecting tweets for {total_time / 60} minutes.")
    collect_tweets(keyword_sets, limit=total_time)

    return keywords, headlines