Exemplo n.º 1
0
def main(file_type, filename, db_name):
    if not os.path.exists(db_name):
        twitter_utils.create_db(db_name)

    conn = sqlite3.connect(db_name)

    tweets = load(file_type, filename)

    sys.stderr.write("parsed %s tweets. now inserting... " % len(tweets))

    for tweet in tweets:
        try:
            conn.execute(
                "INSERT INTO tweets (twitter_id, user_id, is_retweet, created_at, text, in_reply_to_status_id, coordinates, geo, place, source) "
                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    tweet["id"],
                    None,
                    False,
                    int(time.mktime(tweet["created_at"].timetuple())),
                    tweet["text"],
                    None,
                    None,
                    None,
                    None,
                    None,
                ),
            )
        except sqlite3.IntegrityError:
            pass

    conn.commit()
    conn.close()

    sys.stderr.write("done!\n")
Exemplo n.º 2
0
def main(screen_name, db_name):
    if not os.path.exists(db_name):
        twitter_utils.create_db(db_name)

    conn = sqlite3.connect(db_name)

    c = conn.execute("SELECT max(twitter_id) FROM tweets")
    since_id = c.fetchall()[0][0]

    page = 1
    fail_count = 0
    while True:
        sys.stderr.write("since_id=%s, page=%s, fail_count=%s...\n" % (since_id, page, fail_count))

        try:
            tweets = user_timeline(screen_name, page, since_id=since_id)
            fail_count = 0
        except urllib2.HTTPError:
            if fail_count < 5:
                fail_count += 1
                continue
            else:
                sys.stderr.write("fail whale keeps showing up. stopped retrying...")
                break

        if not tweets:
            break

        saved_tweets = 0

        for tweet in tweets:
            is_retweet = 'retweeted_status' in tweet
            if is_retweet:
                tweet = tweet['retweeted_status']

            try:
                conn.execute("INSERT INTO tweets (twitter_id, user_id, is_retweet, created_at, text, in_reply_to_status_id, coordinates, geo, place, source) "
                             "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                             (tweet['id'], tweet['user']['id'], is_retweet, int(time.mktime(tweet['created_at'].timetuple())), tweet['text'], tweet['in_reply_to_status_id'],
                              simplejson.dumps(tweet['coordinates']) if tweet.get('coordinates') else None, simplejson.dumps(tweet['geo']) if tweet.get('geo') else None, simplejson.dumps(tweet['place']) if tweet.get('place') else None, tweet.get('source')))
                saved_tweets += 1
            except sqlite3.IntegrityError:
                pass

            conn.commit()

        sys.stderr.write("saved %s tweets.\n" % saved_tweets)

        if len(tweets) < API_PAGE_SIZE:
            break

        time.sleep(25 * (fail_count + 1))

        page += 1

    conn.close()