def run(self): print('Worker started') # do some initialization here while True: data = self.queue.get(True) try: if data is None: print('ALL FINISHED!!!!', self.conn_number) break print('Starting: ', data) if self.gets_user_id: user = TwitterUser(self.api_hook, user_id=data) else: user = TwitterUser(self.api_hook, screen_name=data) user.populate_tweets_from_api(json_output_directory=os.path.join(self.out_dir,"json")) if len(user.tweets) == 0: if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers: print 'pickling and dumping: ', user.screen_name pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb")) continue if self.populate_lists: user.populate_lists_member_of() if self.populate_friends: print 'populating friends, ', user.screen_name user.populate_friends() if self.populate_followers: print 'populating followers, ', user.screen_name user.populate_followers() if self.to_pickle or self.populate_lists or self.populate_friends or self.populate_followers: # Pickle and dump user print 'pickling and dumping (no tweets): ', user.screen_name user.tweets = [] pickle.dump(user, open(os.path.join(self.out_dir,"obj",data), "wb")) except Exception: print('FAILED:: ', data) exc_type, exc_value, exc_traceback = sys.exc_info() print("*** print_tb:") traceback.print_tb(exc_traceback, limit=30, file=sys.stdout) print("*** print_exception:") print('finished collecting data for: ', data)
out_dir = sys.argv[2] os.mkdir(out_dir) #user_sns = [line.strip() for line in open(sys.argv[3]).readlines()] user_sns = ['Neuro_Skeptic'] print 'num users: ', len(user_sns) of = codecs.open("output_fil.tsv", "w", "utf8") for i in range(len(user_sns)): #creates a Twitter User object to fill with information from the API user = TwitterUser(handles[i], screen_name=user_sns[i]) user.populate_tweets_from_api(json_output_filename=out_dir + user_sns[i] + ".json", sleep_var=False) user.populate_followers() rts = 0 gt = 0 for t in user.tweets: if t.retweeted is not None: rts += 1 if t.geocode_info is not None: gt += 1 of.write( tab_stringify_newline([ user.screen_name, gt, rts, len(user.tweets), user.earliest_tweet_time, user.latest_tweet_time, user.name, user.n_total_tweets, user.creation_date, user.followers_count, user.following_count ]))
access_token = "YOUR_ACCESS_TOKEN_HERE" access_token_secret = "YOUR_ACCESS_TOKEN_SECRET_HERE" ## get a "hook", or connection, to the API using your consumer key/secret and access token/secret api_hook = TwitterAPIHook(consumer_key,consumer_secret, access_token=access_token,access_token_secret=access_token_secret) #creates a Twitter User object to fill with information from the API user = TwitterUser(api_hook, screen_name=username_to_collect_data_for) # we call populate_tweets_from_api,which goes to the Twitter API # and collects the user's data it is outputted to the file username_you_put.json # the sleep_var param tells the function it shouldn't worry # about rate limits (we're only collecting for one user, so it doesn't really matter # If you remove the is_gzip argument, the output file will be gzipped print 'populating users tweets!' user.populate_tweets_from_api(json_output_filename=username_to_collect_data_for+".json", sleep_var=False, is_gzip=False) print 'user had {n_tweets} tweets'.format(n_tweets=len(user.tweets)) # we now will collect the user's followers print 'populating user followers!' user.populate_followers(sleep_var=False) print 'user had {n_followers} followers!'.format(n_followers=len(user.follower_ids))
out_dir = sys.argv[2] os.mkdir(out_dir) #user_sns = [line.strip() for line in open(sys.argv[3]).readlines()] user_sns = ['Neuro_Skeptic'] print 'num users: ', len(user_sns) of = codecs.open("output_fil.tsv","w","utf8") for i in range(len(user_sns)): #creates a Twitter User object to fill with information from the API user = TwitterUser(handles[i], screen_name=user_sns[i]) user.populate_tweets_from_api(json_output_filename=out_dir+user_sns[i]+".json", sleep_var=False) user.populate_followers() rts = 0 gt = 0 for t in user.tweets: if t.retweeted is not None: rts+=1 if t.geocode_info is not None: gt +=1 of.write(tab_stringify_newline([user.screen_name, gt, rts, len(user.tweets), user.earliest_tweet_time, user.latest_tweet_time, user.name,
def run(self): print('Worker started') # do some initialization here snow_sample_number = None since_tweet_id = None while True: data = self.queue.get(True) try: if data is None: print 'ALL FINISHED!!!!' break if len(data) == 1 or type(data) is str or type( data) is unicode or type(data) is int: user_identifier = data elif len(data) == 3: user_identifier, snow_sample_number, since_tweet_id = data elif len(data) == 2: if self.step_count: user_identifier, snow_sample_number = data elif self.gets_since_tweet_id: user_identifier, since_tweet_id = data user_identifier = str(user_identifier) print 'Starting: ', data pickle_filename = os.path.join(self.out_dir, "obj", user_identifier) json_filename = os.path.join(self.out_dir, "json", user_identifier + ".json.gz") # Get the user's data if os.path.exists(pickle_filename) and os.path.exists( json_filename) and not self.add_to_file: print '\tgot existing data for: ', data user = pickle.load(open(pickle_filename, "rb")) user.populate_tweets_from_file(json_filename) else: if self.gets_user_id: user = TwitterUser(self.api_hook, user_id=user_identifier) else: user = TwitterUser(self.api_hook, screen_name=user_identifier) print 'populating tweets', user_identifier if self.populate_tweets: if self.save_user_tweets: print 'saving tweets to: ', json_filename of_name, tweet_count = user.populate_tweets_from_api( json_output_filename=json_filename, since_id=since_tweet_id, populate_object_with_tweets=False) else: of_name, tweet_count = user.populate_tweets_from_api( since_id=since_tweet_id, populate_object_with_tweets=False) if self.tweet_count_file: self.tweet_count_file.write( str(user_identifier) + "\t" + str(tweet_count) + "\n") if self.populate_lists: print 'populating lists', user.screen_name user.populate_lists_member_of() if self.populate_friends: print 'populating friends, ', user.screen_name user.populate_friends() if self.populate_followers: print 'populating followers, ', user.screen_name user.populate_followers() if self.save_user_data and \ (self.always_pickle or self.populate_lists or self.populate_friends or self.populate_followers): # Pickle and dump user #print 'pickling and dumping (no tweets): ', user.screen_name user.tweets = [] pickle.dump(user, open(pickle_filename, "wb")) # now add to queue if necessary if snow_sample_number is not None and snow_sample_number < self.step_count: for user_identifier in self.add_users_to_queue_function( user): self.queue.put( [str(user_identifier), snow_sample_number + 1]) if self.post_process_function: self.post_process_function(user) except KeyboardInterrupt as e: print e break except Exception: print('FAILED:: ', data) exc_type, exc_value, exc_traceback = sys.exc_info() print("*** print_tb:") traceback.print_tb(exc_traceback, limit=30, file=sys.stdout) print("*** print_exception:")