def run(self):
        print('Worker started')
        # do some initialization here

        while True:
            data = self.queue.get(True)
            try:
                if data is None:
                    print('ALL FINISHED!!!!', self.conn_number)
                    break

                print('Starting: ', data)
                if self.gets_user_id:
                    user = TwitterUser(self.api_hook, user_id=data)
                else:
                    user = TwitterUser(self.api_hook, screen_name=data)

                user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir,"json"))

                if len(user.tweets) == 0:
                    if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers:
                        print 'pickling and dumping: ', user.screen_name
                        pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb"))
                    continue
                if self.populate_lists:
                    user.populate_lists_member_of()

                if self.populate_friends:
                    print 'populating friends, ', user.screen_name
                    user.populate_friends()

                if self.populate_followers:
                    print 'populating followers, ', user.screen_name
                    user.populate_followers()

                if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers:
                    # Pickle and dump user
                    print 'pickling and dumping (no tweets): ', user.screen_name
                    user.tweets = []
                    pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb"))
            except Exception:
                print('FAILED:: ', data)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                print("*** print_tb:")
                traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
                print("*** print_exception:")

            print('finished collecting data for: ', data)
示例#2
0
out_dir = sys.argv[2]
os.mkdir(out_dir)

#user_sns = [line.strip() for line in open(sys.argv[3]).readlines()]
user_sns = ['Neuro_Skeptic']

print 'num users: ', len(user_sns)

of = codecs.open("output_fil.tsv", "w", "utf8")
for i in range(len(user_sns)):
    #creates a Twitter User object to fill with information from the API
    user = TwitterUser(handles[i], screen_name=user_sns[i])
    user.populate_tweets_from_api(json_output_filename=out_dir + user_sns[i] +
                                  ".json",
                                  sleep_var=False)
    user.populate_followers()
    rts = 0
    gt = 0
    for t in user.tweets:
        if t.retweeted is not None:
            rts += 1
        if t.geocode_info is not None:
            gt += 1

    of.write(
        tab_stringify_newline([
            user.screen_name, gt, rts,
            len(user.tweets), user.earliest_tweet_time, user.latest_tweet_time,
            user.name, user.n_total_tweets, user.creation_date,
            user.followers_count, user.following_count
        ]))
access_token = "YOUR_ACCESS_TOKEN_HERE"
access_token_secret = "YOUR_ACCESS_TOKEN_SECRET_HERE"


## get a "hook", or connection, to the API using your consumer key/secret and access token/secret
api_hook = TwitterAPIHook(consumer_key,consumer_secret,
                          access_token=access_token,access_token_secret=access_token_secret)

#creates a Twitter User object to fill with information from the API
user = TwitterUser(api_hook, screen_name=username_to_collect_data_for)


# we call populate_tweets_from_api,which goes to the Twitter API
# and collects the user's data it is outputted to the file username_you_put.json
# the sleep_var param tells the function it shouldn't worry
# about rate limits (we're only collecting for one user, so it doesn't really matter
# If you remove the is_gzip argument, the output file will be gzipped
print 'populating users tweets!'
user.populate_tweets_from_api(json_output_filename=username_to_collect_data_for+".json",
                              sleep_var=False, is_gzip=False)
print 'user had {n_tweets} tweets'.format(n_tweets=len(user.tweets))

# we now will collect the user's followers
print 'populating user followers!'
user.populate_followers(sleep_var=False)
print 'user had {n_followers} followers!'.format(n_followers=len(user.follower_ids))




out_dir = sys.argv[2]
os.mkdir(out_dir)

#user_sns = [line.strip() for line in open(sys.argv[3]).readlines()]
user_sns = ['Neuro_Skeptic']

print 'num users: ', len(user_sns)

of = codecs.open("output_fil.tsv","w","utf8")
for i in range(len(user_sns)):
    #creates a Twitter User object to fill with information from the API
    user = TwitterUser(handles[i], screen_name=user_sns[i])
    user.populate_tweets_from_api(json_output_filename=out_dir+user_sns[i]+".json",
                                  sleep_var=False)
    user.populate_followers()
    rts = 0
    gt = 0
    for t in user.tweets:
        if t.retweeted is not None:
            rts+=1
        if t.geocode_info is not None:
            gt +=1

    of.write(tab_stringify_newline([user.screen_name,
                                 gt,
                                 rts,
                                len(user.tweets),
                                user.earliest_tweet_time,
                                user.latest_tweet_time,
                                user.name,
    def run(self):
        print('Worker started')
        # do some initialization here
        snow_sample_number = None
        since_tweet_id = None
        while True:
            data = self.queue.get(True)

            try:
                if data is None:
                    print 'ALL FINISHED!!!!'
                    break

                if len(data) == 1 or type(data) is str or type(
                        data) is unicode or type(data) is int:
                    user_identifier = data
                elif len(data) == 3:
                    user_identifier, snow_sample_number, since_tweet_id = data
                elif len(data) == 2:
                    if self.step_count:
                        user_identifier, snow_sample_number = data
                    elif self.gets_since_tweet_id:
                        user_identifier, since_tweet_id = data

                user_identifier = str(user_identifier)

                print 'Starting: ', data

                pickle_filename = os.path.join(self.out_dir, "obj",
                                               user_identifier)
                json_filename = os.path.join(self.out_dir, "json",
                                             user_identifier + ".json.gz")

                # Get the user's data
                if os.path.exists(pickle_filename) and os.path.exists(
                        json_filename) and not self.add_to_file:
                    print '\tgot existing data for: ', data
                    user = pickle.load(open(pickle_filename, "rb"))
                    user.populate_tweets_from_file(json_filename)
                else:
                    if self.gets_user_id:
                        user = TwitterUser(self.api_hook,
                                           user_id=user_identifier)
                    else:
                        user = TwitterUser(self.api_hook,
                                           screen_name=user_identifier)

                    print 'populating tweets', user_identifier

                    if self.populate_tweets:
                        if self.save_user_tweets:
                            print 'saving tweets to: ', json_filename
                            of_name, tweet_count = user.populate_tweets_from_api(
                                json_output_filename=json_filename,
                                since_id=since_tweet_id,
                                populate_object_with_tweets=False)
                        else:
                            of_name, tweet_count = user.populate_tweets_from_api(
                                since_id=since_tweet_id,
                                populate_object_with_tweets=False)

                        if self.tweet_count_file:
                            self.tweet_count_file.write(
                                str(user_identifier) + "\t" +
                                str(tweet_count) + "\n")

                    if self.populate_lists:
                        print 'populating lists', user.screen_name
                        user.populate_lists_member_of()

                    if self.populate_friends:
                        print 'populating friends, ', user.screen_name
                        user.populate_friends()

                    if self.populate_followers:
                        print 'populating followers, ', user.screen_name
                        user.populate_followers()

                    if self.save_user_data and \
                        (self.always_pickle or self.populate_lists
                         or self.populate_friends or self.populate_followers):
                        # Pickle and dump user
                        #print 'pickling and dumping (no tweets): ', user.screen_name
                        user.tweets = []
                        pickle.dump(user, open(pickle_filename, "wb"))

                # now add to queue if necessary
                if snow_sample_number is not None and snow_sample_number < self.step_count:
                    for user_identifier in self.add_users_to_queue_function(
                            user):
                        self.queue.put(
                            [str(user_identifier), snow_sample_number + 1])

                if self.post_process_function:
                    self.post_process_function(user)

            except KeyboardInterrupt as e:
                print e
                break
            except Exception:
                print('FAILED:: ', data)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                print("*** print_tb:")
                traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
                print("*** print_exception:")