Exemplo n.º 1
0
def save_user_followers(user):
    try:
        c = Cursor(api.followers,user.user_id)
    except TweepError:
        print "tweep breaks!"
        print TweepError.message
    while(True):
        try:
            print 'taking a rest before move to next page'
            sleep(10)
            page = c.pages().next()
            print "start a new page of user ", user.scrn_name, \
                'page', c.pages().count
        except TweepError:
            print "tweep breaks!"
            print TweepError.message
            continue
        except StopIteration:
            print "Move to next unscanned"
            break
        
        for tweepy_user in page:
            print "follower -----", tweepy_user.screen_name, "----- found......"
            if TwitterUser.get_by_id(tweepy_user.id) or \
                is_in_no_chn(tweepy_user.id):
                print 'ALREADY in DB!!, skip'
                continue
            try:
                if not tweepy_user.protected or \
                        (tweepy_user.protected and tweepy_user.following):
                        if is_chn(tweepy_user):
                            print "and speaks Chinese! Saving...."
                            TwitterUser.save_tweepy_user(tweepy_user)
                        else:
                            save_non_chn(tweepy_user.id)
                            print "pitty, s/he is not Chinese Speaker, next..."
                            continue
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
            try:
                print "the remaining hit is ", \
                    api.rate_limit_status()['remaining_hits']
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
        page =[]
    user.update_scanned()
Exemplo n.º 2
0
def get_follower_ids(tweepy_obj):
    ids_list = []
    try:
        c = Cursor(api.followers_ids, tweepy_obj.id)
    except TweepError:
        print 'tweepy breaks!'
    while(True):
        try:
            print 'new page...'
            page = c.pages().next()
            sleep(2)
        except TweepError:
            print "tweep breaks!"
        except StopIteration:
            print 'done with', tweepy_obj.id
            break
        ids_list.extend(page)
        
    try:
        print "the remaining hit is ", \
            api.rate_limit_status()['remaining_hits']
    except TweepError:
        print "tweep breaks!"
        print TweepError.message
    return ids_list
Exemplo n.º 3
0
def main():
    hashtags = Counter()
    args = ArgumentParser()
    args.add_argument("username", help="username of the profile to parse")
    args.add_argument("-c", "--count", type=int, help="Number of tweets")
    args = args.parse_args()

    client = make_client()
    curr = Cursor(client.user_timeline,
                  screen_name=args.username,
                  count=args.count)

    for page in curr.pages(10):
        for tweet in page:
            hashtags.update(get_hashtags(tweet._json))

    print(hashtags.most_common(20))
import tweepy
from tweepy import Cursor
import time
import json

consumer_key = 'p8rnniy2PVcnQR7I01s71g'
consumer_secret = 'tLaYYeiXzkq1wDmS2gEHTSEArNxk8tSd4D3bQPX6FNM'
access_token = '1196322271-BN4pBpveJuKSfUscrwss7T7KckX0Mgv75vJoVfp'
access_token_secret = 'A4yooP5jkdUfqI1xMi7wzVi9XtCh8uwScrPvOZyXR4nTz'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

rep_cursor = Cursor(api.followers, id="repcurrie")

for page in rep_cursor.pages():
    try:
        print len(page)
        for user in page:
            user_str = str(user)
            username = user_str.split(", 'screen_name': ")[1].split(", '")[0]
            print username
        time.sleep(5)
    except BaseException, e:
        print "failed: ", str(e)
        time.sleep(10)
    
print "done pulling"
Exemplo n.º 5
0
import json
from sys import argv
from tweepy import Cursor
from make_client import make_client

client = make_client()
username = argv[1]

with open("tweets.json", "w+") as fout:
    curr = Cursor(client.user_timeline, screen_name=username, count=150)
    for page in curr.pages(10):
        for tweet in page:
            fout.write(json.dumps(tweet._json))
def pull_user_timeline(input_line, is_geo_enabled=True):
    user, event, start_date, end_date, extended = _unpack(input_line)
    user = slugify(user)
    logging.info('Pulling tweets for %s...' % user)
    if extended:
        cursor = Cursor(api.user_timeline,
                        id=user,
                        count=200,
                        tweet_mode='extended')
    else:
        cursor = Cursor(api.user_timeline, id=user, count=200)
    filename = _generate_filename(user, None, None)
    geofilename = _generate_geofilename(user, None, None)
    _ensure_folder_exists(filename)
    _ensure_folder_exists(geofilename)
    before_start_date = False
    before_end_date = False
    non_accessible = True
    geo_count = 0
    with open(filename, 'w') as f, open(geofilename, 'w') as gf:
        for n_page, page in enumerate(_twitter_errors_handler(cursor.pages())):
            non_accessible = False

            logging.info('Parsing page %d...' % n_page)
            logging.info('Tweets downloaded: %d' % len(page))
            for tweet in page:
                json.dump(tweet._json, f)
                f.write("\n")

                if start_date and not before_start_date:
                    before_start_date = tweet.created_at < start_date
                if end_date and not before_end_date:
                    before_end_date = tweet.created_at < end_date
                if not is_geo_enabled:
                    continue
                feature, is_geo = _tweet_to_feature(tweet._json)
                if is_geo:
                    geo_count += 1
                assert feature
                json.dump(feature, gf)
                gf.write("\n")
            logging.info('Parsing page %d...Done' % n_page)

    status = _generate_status(before_start_date, before_end_date,
                              non_accessible)
    logging.info('Status for %s is %s' % (user, status))

    if start_date or end_date:
        classified_filename = _generate_filename(user, event, status)
        classified_geofilename = _generate_geofilename(user, event, status)
    else:
        classified_filename = _generate_filename(user, event, 'no_date')
        classified_geofilename = _generate_geofilename(user, event, 'no_date')
    _ensure_folder_exists(classified_filename)
    os.rename(filename, classified_filename)
    if geo_count:
        _ensure_folder_exists(classified_geofilename)
        with open(geofilename, 'r') as inp, open(classified_geofilename,
                                                 'w') as out:
            out.write('{"type": "FeatureCollection", "features":[')
            first = True
            for l in inp:
                if not first:
                    out.write(',')
                else:
                    first = False
                out.write(l)

            out.write("]}")
        os.remove(geofilename)
        logging.info('%d geo features at %s' % (geo_count, user))
    else:
        classified_geofilename = None
        logging.info('No geo features at %s' % user)
    logging.info('Pulling tweets for %s...Done' % user)
    return classified_filename, classified_geofilename
Exemplo n.º 7
0
# open file for storing retweets number  
out=open("retweetsby.txt",'w')

#store error message
error_log = open('error_log.txt', 'w')

#retrieve tweets member by member
for member in members:
    print member
    try:
        c = Cursor(api.user_timeline, member)
        tweets = []
        time_is_right = True
        while(time_is_right):
            try:
                page = c.pages().next()
                tweets.extend(page)
                month = page[-1].created_at.month
                year = page[-1].created_at.year

                if year < 2012 :
                    time_is_right = False
            except StopIteration:
                break
        time.sleep(1)
    #error capture and store log into file
    except TweepError:
        error_log.write('%s\n' %member)
        continue

    source=member