Пример #1
0
def save_tweets(filename, tweets, user_id):
    """Save tweets from dict to file.

    Save tweets from dict to UTF-8 encoded file, one per line:
        <tweet id (number)> <tweet text>
    Tweet text is:
        <date> <<user>> [RT @<user>: ]<text>

    Args:
        filename: A string representing the file name to save tweets to.
        tweets: A dict mapping tweet-ids (int) to tweet text (str).
    """
    if len(tweets) == 0:
        return

    try:
        archive = open(filename, "w")
    except IOError as e:
        err("Cannot save tweets: %s" % str(e))
        return


#first line is for user_id and #tweet
    archive.write(''.join(['%d' % user_id, '\t', '#tweets\n']))
    for k in sorted(tweets.keys()):
        archive.write("%i %s\n" % (k, tweets[k].encode('utf-8')))

    archive.close()
Пример #2
0
def save_tweets(filename, tweets, user_id):
    """Save tweets from dict to file.

    Save tweets from dict to UTF-8 encoded file, one per line:
        <tweet id (number)> <tweet text>
    Tweet text is:
        <date> <<user>> [RT @<user>: ]<text>

    Args:
        filename: A string representing the file name to save tweets to.
        tweets: A dict mapping tweet-ids (int) to tweet text (str).
    """
    if len(tweets) == 0:
        return

    try:
        archive = open(filename,"w")
    except IOError as e:
        err("Cannot save tweets: %s" % str(e))
        return

#first line is for user_id and #tweet
    archive.write(''.join([
        '%d' % user_id,
        '\t',
        '#tweets\n'
        ]))
    for k in sorted(tweets.keys()):
        archive.write("%i %s\n" % (k, tweets[k].encode('utf-8')))

    archive.close()
Пример #3
0
def get_auths_data():
    auth_file = open('./auth_users_follow', 'r')
    auth_users = []
    for auther in auth_file.readlines():
        auth_users.append(auther.strip().split(r' '))
    err('we have %d authed users' % len(auth_users))
    return auth_users
Пример #4
0
def get_auths_data():
    auth_file = open("./auth_users_follow", "r")
    auth_users = []
    for auther in auth_file.readlines():
        auth_users.append(auther.strip().split(r" "))
    err("we have %d authed users" % len(auth_users))
    return auth_users
Пример #5
0
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor,
                                             followers)
        # except TwitterError as e:
        #     if e.e.code == 401:
        #         reason = ("follow%s of that user are protected"
        #                   % ("ers" if followers else "ing"))
        #         err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
        #         break
        #     elif e.e.code == 400:
        #         err("Fail: %i API rate limit exceeded" % e.e.code)
        #         rate = twitter.account.rate_limit_status()
        #         reset = rate['reset_time_in_seconds']
        #         reset = time.asctime(time.localtime(reset))
        #         delay = int(rate['reset_time_in_seconds']
        #                     - time.time()) + 5 # avoid race
        #         err("Hourly limit of %i requests reached, next reset on %s: "
        #             "going to sleep for %i secs" % (rate['hourly_limit'],
        #                                             reset, delay))
        #         fail.wait(delay)
        #         continue
        #     elif e.e.code == 502:
        #         err("Fail: %i Service currently unavailable, retrying..."
        #             % e.e.code)
        #     else:
        #         err("Fail: %s\nRetrying..." % str(e)[:500])
        #     fail.wait(3)
        # except urllib2.URLError as e:
        #     err("Fail: urllib2.URLError %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except httplib.error as e:
        #     err("Fail: httplib.error %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except KeyError as e:
        #     err("Fail: KeyError %s - Retrying..." % str(e))
        #     fail.wait(3)
        except:
            break
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
Пример #6
0
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor,
                                             followers)
        # except TwitterError as e:
        #     if e.e.code == 401:
        #         reason = ("follow%s of that user are protected"
        #                   % ("ers" if followers else "ing"))
        #         err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
        #         break
        #     elif e.e.code == 400:
        #         err("Fail: %i API rate limit exceeded" % e.e.code)
        #         rate = twitter.account.rate_limit_status()
        #         reset = rate['reset_time_in_seconds']
        #         reset = time.asctime(time.localtime(reset))
        #         delay = int(rate['reset_time_in_seconds']
        #                     - time.time()) + 5 # avoid race
        #         err("Hourly limit of %i requests reached, next reset on %s: "
        #             "going to sleep for %i secs" % (rate['hourly_limit'],
        #                                             reset, delay))
        #         fail.wait(delay)
        #         continue
        #     elif e.e.code == 502:
        #         err("Fail: %i Service currently unavailable, retrying..."
        #             % e.e.code)
        #     else:
        #         err("Fail: %s\nRetrying..." % str(e)[:500])
        #     fail.wait(3)
        # except urllib2.URLError as e:
        #     err("Fail: urllib2.URLError %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except httplib.error as e:
        #     err("Fail: httplib.error %s - Retrying..." % str(e))
        #     fail.wait(3)
        # except KeyError as e:
        #     err("Fail: KeyError %s - Retrying..." % str(e))
        #     fail.wait(3)
        except:
            break
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
def main():
    parser = ArgumentParser()
    parser.add_argument("twitter_user")
    parser.add_argument("-r", "--run", help="do real lottery run")
    args = parser.parse_args()

    simulateRun = True
    if args.run:
        simulateRun = False

    # Variables that contains the user credentials to access Twitter API
    AUTH_DATA = {
        'ACCESS_TOKEN': '',
        'ACCESS_SECRET': '',
        'CONSUMER_KEY': '',
        'CONSUMER_SECRET': ''
    }
    TWITTER_AUTH_FILE = expanduser("~") + '/.raspjamming.lottery.twitter.auth'
    with open(TWITTER_AUTH_FILE) as authFile:
        for line in authFile:
            line = line.strip().split('=')
            key = line[0].strip()
            if len(key) == 0:
                continue
            AUTH_DATA[key] = line[1].strip()

    oauth = OAuth(AUTH_DATA['ACCESS_TOKEN'], AUTH_DATA['ACCESS_SECRET'],
                  AUTH_DATA['CONSUMER_KEY'], AUTH_DATA['CONSUMER_SECRET'])
    twitter = Twitter(auth=oauth)
    user_ids, users = [], {}
    try:
        # Retrieve followers list
        user = args.twitter_user
        print("Twitter user for lottery: " + user)
        user_ids = follow(twitter, user, True)
        users = lookup(twitter, user_ids)
    except KeyboardInterrupt as e:
        err()
        err("Interrupted.")
        raise SystemExit(1)

    print("Found users:")
    for uid in user_ids:
        try:
            print(str(uid) + "\t" + users[uid])
        except KeyError:
            pass

    l = Lottery(user_ids, users, simulateRun)
    srv = HTTPServer(('127.0.0.1', 5000), HTTPRequestHandler)
    l.subscribe(shutdown_http_server)
    l.subscribe(HTTPRequestHandler.set_winner)
    l.run()
    srv.serve_forever()
Пример #8
0
def main(args=sys.argv[1:]):
    options = {
        'oauth': False,
        'followers': True,
        'api-rate': False,
        'show_id': False
    }
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user or given, except if asking for API rate
    if not options['extra_args'] and not options['api-rate']:
        print(__doc__)
        raise SystemExit(1)

    # authenticate using OAuth, asking for token if necessary
    if options['oauth']:
        # oauth_filename = (os.getenv("HOME", "") + os.sep
        #                   + ".twitter-follow_oauth")
        # if not os.path.exists(oauth_filename):
        #     oauth_dance("Twitter-Follow", CONSUMER_KEY, CONSUMER_SECRET,
        #                 oauth_filename)
        # oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auths = get_auths_data()
        random_index = random.randint(0, len(auths) - 1)
        err('Using the number %d oauth user' % random_index)
        auth = OAuth(auths[random_index][0], auths[random_index][1], CONSUMER_KEY,
                     CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')

    if options['api-rate']:
        rate_limit_status(twitter)
        return

    # obtain list of followers (or following) for every given user
    for user in options['extra_args']:
        user_ids, users = [], {}
        user_id = twitter.users.lookup(screen_name=user)[0]['id']
        try:
            user_ids = follow(twitter, user, options['followers'])
            users = lookup(twitter, user_ids)
        except KeyboardInterrupt as e:
            err()
            err("Interrupted.")
            raise SystemExit(1)
        print(''.join([
            '%d' % user_id,
            '\t',
            '%d' % len(user_ids)
            ]))

        for uid in user_ids:
            if options['show_id']:
              try:
                print('following' + '\t' + str(uid) + '\t' + users[uid].encode("utf-8"))
              except KeyError:
                pass

            else:
              try:
                print(users[uid].encode("utf-8"))
              except KeyError:
                pass

        # print total on stderr to separate from user list on stdout
        if options['followers']:
            err("Total followers for %s: %i" % (user, len(user_ids)))
        else:
            err("Total users %s is following: %i" % (user, len(user_ids)))
Пример #9
0
def lookup(twitter, user_ids):
    """Resolve an entire list of user ids to screen names."""
    users = {}
    api_limit = 100
    for i in range(0, len(user_ids), api_limit):
        fail = Fail()
        while True:
            try:
                portion = lookup_portion(twitter, user_ids[i:][:api_limit])
            except TwitterError as e:
                if e.e.code == 400:
                    err("Fail: %i API rate limit exceeded" % e.e.code)
                    rate = twitter.account.rate_limit_status()
                    reset = rate["reset_time_in_seconds"]
                    reset = time.asctime(time.localtime(reset))
                    delay = int(rate["reset_time_in_seconds"] - time.time()) + 5  # avoid race
                    err(
                        "Hourly limit of %i requests reached, next reset on "
                        "%s: going to sleep for %i secs" % (rate["hourly_limit"], reset, delay)
                    )
                    fail.wait(delay)
                    continue
                elif e.e.code == 502:
                    err("Fail: %i Service currently unavailable, retrying..." % e.e.code)
                else:
                    err("Fail: %s\nRetrying..." % str(e)[:500])
                fail.wait(3)
            except urllib2.URLError as e:
                err("Fail: urllib2.URLError %s - Retrying..." % str(e))
                fail.wait(3)
            except httplib.error as e:
                err("Fail: httplib.error %s - Retrying..." % str(e))
                fail.wait(3)
            except KeyError as e:
                err("Fail: KeyError %s - Retrying..." % str(e))
                fail.wait(3)
            else:
                users.update(portion)
                err("Resolving user ids to screen names: %i/%i" % (len(users), len(user_ids)))
                break
    return users
Пример #10
0
def archive_loop(archiver):
    """Generic loop and handling for all kinds of archiving.
    Mostly copied from Mike Verdone's twitter.archiver."""
    fail = Fail()
    twitter = archiver.twitter_search
    last_new = 0
    # download one API call at a time until done while handling errors
    while True:
        try:
            archiver.query()
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i Bad Request" % e.e.code)
                break
            elif e.e.code == 404:
                err("Fail: %i Profile does not exist" % e.e.code)
                break
            elif e.e.code == 429:
                err("Fail: %i Too Many Requests" % e.e.code)
                (reset_unix, limit) = archiver.rate_limit_status()
                reset_str = time.asctime(time.localtime(reset_unix))
                delay = int(reset_unix - time.time()) + 5 # avoid race
                err("Limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" % (limit, reset_str, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..."
                    % e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            this_new = archiver.new - last_new
            err('Browsing.  This batch: %d.  Cumulative new: %d.  Cumulative duplicate: %d' \
                % (archiver.query_count, archiver.new, archiver.dup))
            if not archiver.more():
                archiver.success()
                break
            last_new = archiver.new
            fail = Fail()
Пример #11
0
def main(args=sys.argv[1:]):
    options = {"oauth": False, "followers": True, "api-rate": False, "show_id": False}
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user or given, except if asking for API rate
    if not options["extra_args"] and not options["api-rate"]:
        print(__doc__)
        raise SystemExit(1)

    # authenticate using OAuth, asking for token if necessary
    if options["oauth"]:
        # oauth_filename = (os.getenv("HOME", "") + os.sep
        #                   + ".twitter-follow_oauth")
        # if not os.path.exists(oauth_filename):
        #     oauth_dance("Twitter-Follow", CONSUMER_KEY, CONSUMER_SECRET,
        #                 oauth_filename)
        # oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auths = get_auths_data()
        random_index = random.randint(0, len(auths) - 1)
        err("Using the number %d oauth user" % random_index)
        auth = OAuth(auths[random_index][0], auths[random_index][1], CONSUMER_KEY, CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version="1.1", domain="api.twitter.com")

    if options["api-rate"]:
        rate_limit_status(twitter)
        return

    # obtain list of followers (or following) for every given user
    for user in options["extra_args"]:
        user_ids, users = [], {}
        user_id = twitter.users.lookup(screen_name=user)[0]["id"]
        try:
            user_ids = follow(twitter, user, options["followers"])
            users = lookup(twitter, user_ids)
        except KeyboardInterrupt as e:
            err()
            err("Interrupted.")
            raise SystemExit(1)
        print("".join(["%d" % user_id, "\t", "%d" % len(user_ids)]))

        for uid in user_ids:
            if options["show_id"]:
                try:
                    print("following" + "\t" + str(uid) + "\t" + users[uid].encode("utf-8"))
                except KeyError:
                    pass

            else:
                try:
                    print(users[uid].encode("utf-8"))
                except KeyError:
                    pass

        # print total on stderr to separate from user list on stdout
        if options["followers"]:
            err("Total followers for %s: %i" % (user, len(user_ids)))
        else:
            err("Total users %s is following: %i" % (user, len(user_ids)))
Пример #12
0
def main(args=sys.argv[1:]):
    options = {
        'oauth': False,
        'save-dir': ".",
        'api-rate': False,
        'timeline': "",
        'mentions': "",
        'dms': "",
        'favorites': False,
        'follow-redirects': False,
        'redirect-sites': None,
        'isoformat': False,
    }
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user given
    # except if asking for API rate, or archive of timeline or mentions
    if not options['extra_args'] and not (
            options['api-rate'] or options['timeline'] or options['mentions']
            or options['dms']):
        print(__doc__)
        return

    # authenticate using OAuth, asking for token if necessary
    if options['oauth']:
        # oauth_filename = (os.getenv("HOME", "") + os.sep
        #                   + ".twitter-archiver_oauth")
        # if not os.path.exists(oauth_filename):
        #     oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET,
        #                 oauth_filename)
        # oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auths = get_auths_data()
        random_index = random.randint(0, len(auths) - 1)
        print('Using the number %d oauth user' % random_index)
        auth = OAuth(auths[random_index][0], auths[random_index][1],
                     CONSUMER_KEY, CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')

    if options['api-rate']:
        rate_limit_status(twitter)
        return

    global format_text
    if options['follow-redirects'] or options['redirect-sites']:
        if options['redirect-sites']:
            hosts = parse_host_list(options['redirect-sites'])
        else:
            hosts = None
        format_text = functools.partial(expand_format_text, hosts)
    else:
        format_text = direct_format_text

    # read users from command-line or stdin
    users = options['extra_args']
    if len(users) == 1 and users[0] == "-":
        users = [line.strip() for line in sys.stdin.readlines()]

    # save tweets for every user
    total, total_new = 0, 0
    for user in users:
        filename = options['save-dir'] + os.sep + user
        user_id = twitter.users.lookup(screen_name=user)[0]['id']
        # print('%d\n' % user_id)
        if options['favorites']:
            filename = filename + "-favorites"
        print("* Archiving %s tweets in %s" % (user, filename))

        tweets = {}
        try:
            tweets = load_tweets(filename)
        except Exception as e:
            err("Error when loading saved tweets: %s - continuing without" %
                str(e))

        new = 0
        before = len(tweets)
        try:
            statuses(twitter,
                     user,
                     tweets,
                     options['mentions'],
                     options['favorites'],
                     isoformat=options['isoformat'])
        except KeyboardInterrupt:
            err()
            err("Interrupted")
            raise SystemExit(1)

        save_tweets(filename, tweets, user_id)
        total += len(tweets)
        new = len(tweets) - before
        total_new += new
        print("Total tweets for %s: %i (%i new)" % (user, len(tweets), new))

    print("Total: %i tweets (%i new) for %i users" %
          (total, total_new, len(users)))
Пример #13
0
def statuses(twitter,
             screen_name,
             tweets,
             mentions=False,
             favorites=False,
             received_dms=None,
             isoformat=False):
    """Get all the statuses for a screen name."""
    max_id = None
    fail = Fail()
    # get portions of statuses, incrementing max id until no new tweets appear
    while True:
        try:
            portion = statuses_portion(twitter, screen_name, max_id, mentions,
                                       favorites, received_dms, isoformat)
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rate = twitter.account.rate_limit_status()
                reset = rate['reset_time_in_seconds']
                reset = time.asctime(time.localtime(reset))
                delay = int(rate['reset_time_in_seconds'] -
                            time.time()) + 5  # avoid race
                err("Hourly limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" %
                    (rate['hourly_limit'], reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 404:
                err("Fail: %i This profile does not exist" % e.e.code)
                break
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..." %
                    e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(tweets)
            tweets.update(portion)
            new += len(tweets)
            err("Browsing %s statuses, new tweets: %i" %
                (screen_name if screen_name else "home", new))
            if new < 190:
                break
            max_id = min(portion.keys()) - 1  # browse backwards
            fail = Fail()
Пример #14
0
def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False):
    """Get all the statuses for a screen name."""
    max_id = None
    fail = Fail()
    # get portions of statuses, incrementing max id until no new tweets appear
    while True:
        try:
            portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat)
        except TwitterError as e:
            if e.e.code == 401:
                err("Fail: %i Unauthorized (tweets of that user are protected)"
                    % e.e.code)
                break
            elif e.e.code == 400:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rate = twitter.account.rate_limit_status()
                reset = rate['reset_time_in_seconds']
                reset = time.asctime(time.localtime(reset))
                delay = int(rate['reset_time_in_seconds']
                            - time.time()) + 5 # avoid race
                err("Hourly limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" % (rate['hourly_limit'],
                                                    reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 404:
                err("Fail: %i This profile does not exist" % e.e.code)
                break
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..."
                    % e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(tweets)
            tweets.update(portion)
            new += len(tweets)
            err("Browsing %s statuses, new tweets: %i"
                % (screen_name if screen_name else "home", new))
            if new < 190:
                break
            max_id = min(portion.keys())-1 # browse backwards
            fail = Fail()
Пример #15
0
def main(args=sys.argv[1:]):
    options = {
        'oauth': False,
        'followers': True,
        'api-rate': False,
        'show_id': False
    }
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user or given, except if asking for API rate
    if not options['extra_args'] and not options['api-rate']:
        print(__doc__)
        raise SystemExit(1)

    # authenticate using OAuth, asking for token if necessary
    if options['oauth']:
        oauth_filename = (os.getenv("HOME", "") + os.sep
                          + ".twitter-follow_oauth")
        if not os.path.exists(oauth_filename):
            oauth_dance("Twitter-Follow", CONSUMER_KEY, CONSUMER_SECRET,
                        oauth_filename)
        oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
                     CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')

    if options['api-rate']:
        rate_limit_status(twitter)
        return

    # obtain list of followers (or following) for every given user
    for user in options['extra_args']:
        user_ids, users = [], {}
        try:
            user_ids = follow(twitter, user, options['followers'])
            users = lookup(twitter, user_ids)
        except KeyboardInterrupt as e:
            err()
            err("Interrupted.")
            raise SystemExit(1)

        for uid in user_ids:
            if options['show_id']:
              try:
                print(str(uid) + "\t" + users[uid].encode("utf-8"))
              except KeyError:
                pass

            else:
              try:
                print(users[uid].encode("utf-8"))
              except KeyError:
                pass

        # print total on stderr to separate from user list on stdout
        if options['followers']:
            err("Total followers for %s: %i" % (user, len(user_ids)))
        else:
            e
Пример #16
0
def main(args=sys.argv[1:]):
    options = {
        'oauth': False,
        'save-dir': ".",
        'api-rate': False,
        'timeline': "",
        'mentions': "",
        'dms': "",
        'favorites': False,
        'follow-redirects': False,
        'redirect-sites': None,
        'isoformat': False,
    }
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user given
    # except if asking for API rate, or archive of timeline or mentions
    if not options['extra_args'] and not (options['api-rate'] or
                                          options['timeline'] or
                                          options['mentions'] or
                                          options['dms']):
        print(__doc__)
        return

    # authenticate using OAuth, asking for token if necessary
    if options['oauth']:
        # oauth_filename = (os.getenv("HOME", "") + os.sep
        #                   + ".twitter-archiver_oauth")
        # if not os.path.exists(oauth_filename):
        #     oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET,
        #                 oauth_filename)
        # oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auths = get_auths_data()
        random_index = random.randint(0, len(auths) - 1)
        print('Using the number %d oauth user' % random_index)
        auth = OAuth(auths[random_index][0], auths[random_index][1], CONSUMER_KEY,
                     CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')

    if options['api-rate']:
        rate_limit_status(twitter)
        return

    global format_text
    if options['follow-redirects'] or options['redirect-sites'] :
        if options['redirect-sites']:
            hosts = parse_host_list(options['redirect-sites'])
        else:
            hosts = None
        format_text = functools.partial(expand_format_text, hosts)
    else:
        format_text = direct_format_text

    # read users from command-line or stdin
    users = options['extra_args']
    if len(users) == 1 and users[0] == "-":
        users = [line.strip() for line in sys.stdin.readlines()]

    # save tweets for every user
    total, total_new = 0, 0
    for user in users:
        filename = options['save-dir'] + os.sep + user
        user_id = twitter.users.lookup(screen_name=user)[0]['id']
        # print('%d\n' % user_id)
        if options['favorites']:
            filename = filename + "-favorites"
        print("* Archiving %s tweets in %s" % (user, filename))

        tweets = {}
        try:
            tweets = load_tweets(filename)
        except Exception as e:
            err("Error when loading saved tweets: %s - continuing without"
                % str(e))

        new = 0
        before = len(tweets)
        try:
            statuses(twitter, user, tweets, options['mentions'], options['favorites'], isoformat=options['isoformat'])
        except KeyboardInterrupt:
            err()
            err("Interrupted")
            raise SystemExit(1)

        save_tweets(filename, tweets, user_id)
        total += len(tweets)
        new = len(tweets) - before
        total_new += new
        print("Total tweets for %s: %i (%i new)" % (user, len(tweets), new))

    print("Total: %i tweets (%i new) for %i users"
          % (total, total_new, len(users)))
def follow(twitter, screen_name, followers=True):
    """Get the entire list of followers/following for a user."""
    user_ids = []
    cursor = -1
    fail = Fail()
    while True:
        try:
            portion, cursor = follow_portion(twitter, screen_name, cursor,
                                             followers)
        except TwitterError as e:
            if e.e.code == 401:
                reason = ("follow%s of that user are protected" %
                          ("ers" if followers else "ing"))
                err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
                break
            elif e.e.code == 429:
                err("Fail: %i API rate limit exceeded" % e.e.code)
                rls = twitter.application.rate_limit_status()
                reset = rls.rate_limit_reset
                reset = time.asctime(time.localtime(reset))
                delay = int(rls.rate_limit_reset -
                            time.time()) + 5  # avoid race
                err("Interval limit of %i requests reached, next reset on %s: "
                    "going to sleep for %i secs" %
                    (rls.rate_limit_limit, reset, delay))
                fail.wait(delay)
                continue
            elif e.e.code == 502:
                err("Fail: %i Service currently unavailable, retrying..." %
                    e.e.code)
            else:
                err("Fail: %s\nRetrying..." % str(e)[:500])
            fail.wait(3)
        except urllib2.URLError as e:
            err("Fail: urllib2.URLError %s - Retrying..." % str(e))
            fail.wait(3)
        except httplib.error as e:
            err("Fail: httplib.error %s - Retrying..." % str(e))
            fail.wait(3)
        except KeyError as e:
            err("Fail: KeyError %s - Retrying..." % str(e))
            fail.wait(3)
        else:
            new = -len(user_ids)
            user_ids = list(set(user_ids + portion))
            new += len(user_ids)
            what = "follow%s" % ("ers" if followers else "ing")
            err("Browsing %s %s, new: %i" % (screen_name, what, new))
            if cursor == 0:
                break
            fail = Fail()
    return user_ids
Пример #18
0
def lookup(twitter, user_ids):
    """Resolve an entire list of user ids to screen names."""
    users = {}
    api_limit = 100
    for i in range(0, len(user_ids), api_limit):
        fail = Fail()
        while True:
            try:
                portion = lookup_portion(twitter, user_ids[i:][:api_limit])
            except TwitterError as e:
                if e.e.code == 400:
                    err("Fail: %i API rate limit exceeded" % e.e.code)
                    rate = twitter.account.rate_limit_status()
                    reset = rate['reset_time_in_seconds']
                    reset = time.asctime(time.localtime(reset))
                    delay = int(rate['reset_time_in_seconds']
                                - time.time()) + 5 # avoid race
                    err("Hourly limit of %i requests reached, next reset on "
                        "%s: going to sleep for %i secs"
                        % (rate['hourly_limit'], reset, delay))
                    fail.wait(delay)
                    continue
                elif e.e.code == 502:
                    err("Fail: %i Service currently unavailable, retrying..."
                        % e.e.code)
                else:
                    err("Fail: %s\nRetrying..." % str(e)[:500])
                fail.wait(3)
            except urllib2.URLError as e:
                err("Fail: urllib2.URLError %s - Retrying..." % str(e))
                fail.wait(3)
            except httplib.error as e:
                err("Fail: httplib.error %s - Retrying..." % str(e))
                fail.wait(3)
            except KeyError as e:
                err("Fail: KeyError %s - Retrying..." % str(e))
                fail.wait(3)
            else:
                users.update(portion)
                err("Resolving user ids to screen names: %i/%i"
                    % (len(users), len(user_ids)))
                break
    return users
Пример #19
0
def main(args=sys.argv[1:]):
    options = {
        'oauth': False,
        'followers': True,
        'api-rate': False,
        'show_id': False
    }
    try:
        parse_args(args, options)
    except GetoptError as e:
        err("I can't do that, %s." % e)
        raise SystemExit(1)

    # exit if no user or given, except if asking for API rate
    if not options['extra_args'] and not options['api-rate']:
        print(__doc__)
        raise SystemExit(1)

    # authenticate using OAuth, asking for token if necessary
    if options['oauth']:
        oauth_filename = (os.getenv("HOME", "") + os.sep +
                          ".twitter-follow_oauth")
        if not os.path.exists(oauth_filename):
            oauth_dance("Twitter-Follow", CONSUMER_KEY, CONSUMER_SECRET,
                        oauth_filename)
        oauth_token, oauth_token_secret = read_token_file(oauth_filename)
        auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
                     CONSUMER_SECRET)
    else:
        auth = NoAuth()

    twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')

    if options['api-rate']:
        rate_limit_status(twitter)
        return

    # obtain list of followers (or following) for every given user
    for user in options['extra_args']:
        user_ids, users = [], {}
        try:
            user_ids = follow(twitter, user, options['followers'])
            users = lookup(twitter, user_ids)
        except KeyboardInterrupt as e:
            err()
            err("Interrupted.")
            raise SystemExit(1)

        for uid in user_ids:
            if options['show_id']:
                try:
                    print(str(uid) + "\t" + users[uid].encode("utf-8"))
                except KeyError:
                    pass

            else:
                try:
                    print(users[uid].encode("utf-8"))
                except KeyError:
                    pass

        # print total on stderr to separate from user list on stdout
        if options['followers']:
            err("Total followers for %s: %i" % (user, len(user_ids)))
        else:
            e