def collect_replies(self): """ Collect replies for all tweets from query using twarc :return: """ twarc = Twarc(self.ak, self.aks, self.at, self.ats) reply_count = 0 # loop through all parent tweets from query for tweet in self.tweets: replies = [] reps = twarc.replies( self.tweepy_to_twarc(tweet), recursive=False) # get iterator for replies from twarc rep = next( reps) # first "rep" is the parent tweet so we don't use it i = 0 # max 30 replies while i < 30: try: rep = next(reps) # get next reply and add it to list replies.append(rep) i = i + 1 except StopIteration: break except Exception as e: print('error: ', e) self.dict[tweet.id] = replies # add tweet to dict {id:replies} reply_count += len(replies) print(reply_count, ' replies were collected')
print(name) file_name = r"C:\\Users\\ravik\\OneDrive\\Desktop\\UsertimelineReplies\\" + str(name) + ".json" max_poi_tweet = 0 with open( file_name, "a", encoding='utf-8') as file: for tweet in t.timeline(screen_name=name): if 'retweeted_status' in tweet.keys(): print("Its a retweet") continue if max_poi_tweet > 3000: break json.dump(tweet, file, ensure_ascii=False) file.write("\n") max_poi_tweet +=1 max_replies = 0 if datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y").date() >= time_range: for reply in t.replies(tweet): #print("In") #preprocessing(tweet, file) if 'retweeted_status' in tweet.keys(): print("Its a retweet") continue else: json.dump(reply, file, ensure_ascii=False) file.write("\n") max_replies +=1 print("{} tweet {} reply number {}".format(name, max_poi_tweet, max_replies)) if max_replies > 21: break else: print("{} tweet {} Date didnt satisfy".format(name, max_poi_tweet)) #time.sleep(10)
def f(file): print file today = datetime.date.today() margin = datetime.timedelta(days=7) analyzer = SentimentIntensityAnalyzer() acess_changer = [c8, c9, c10, c7, c1, c2, c3, c4] # base_dir='/home/stealthuser/Perosnal/Sentimental/12 august/Data/Hamid output csvs/' base_dir = c_p.input_csv_path if os.path.exists(base_dir + 'reply/'): output_dir = base_dir + 'reply/' pass else: os.mkdir(base_dir + 'reply/') output_dir = base_dir + 'reply/' data = pd.read_csv(base_dir + file, low_memory=False, error_bad_lines=False) data['reply'] = '' reply = [] acess_changer_counter = 0 max_tweet = 35 config_key = 0 for index, tweet in enumerate(data['text']): print file + " " + str(index) t_id = data.ix[index, 'permalink'].split('/')[-1:][0] tweet_date = data.ix[index, 'date'].split()[0] if (acess_changer_counter % max_tweet == 0): access_point = acess_changer[config_key % len(acess_changer)] config_key += 1 t = Twarc(access_point.consumer_key, access_point.consumer_secret, access_point.access_token, access_point.access_token_secret) acess_changer_counter += 1 print access_point tweet_r = t.tweet(t_id) if (len(tweet_r) > 0): reply_tweets = [] req_format_date = tuple( map(lambda x: int(x), tweet_date.split('-'))) if (today - margin <= datetime.date(req_format_date[0], req_format_date[1], req_format_date[2])): for reply_tweet in t.replies(tweet_r): reply.append(reply_tweet['text'].encode('utf-8')) # print reply_tweet['text'].encode('utf-8') reply_tweets.append(reply_tweet['text'].encode('utf-8')) # delimiter for replies is '==<>==' data.ix[index, 'reply'] = '==<>=='.join(reply_tweets) df_reply_comment = pd.DataFrame({'replies': reply}) for index_comment, reply in enumerate(df_reply_comment['replies']): score = analyzer.polarity_scores(str(reply)) df_reply_comment.ix[index_comment, 'positive'] = score['pos'] df_reply_comment.ix[index_comment, 'negative'] = score['neg'] df_reply_comment.ix[index_comment, 'neutral'] = score['neu'] df_reply_comment.ix[index_comment, 'compound'] = score['compound'] # df_reply_comment.to_csv(output_dir+file[:-4]+'_replies_sentiment.csv') data.to_csv(output_dir + file[:-4] + '_comments.csv') status, paths_reply_label = classification_tweet(df_reply_comment, output_dir, file[:-4])
tweet_id = sys.argv[1] # Main t = Twarc( consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret, ) tweet = t.tweet(tweet_id) if not tweet: raise RuntimeError(f"tweet with id {tweet_id} does not exist") # replies is a generator object replies = t.replies(tweet, True) # List to hold dict of relevant photo data from each of the replies photo_data = [] for reply in replies: # Photos will be in a list stored at reply['extended_entities']['media'] print("Processing next reply") ee = reply.get("extended_entities") if ee is None: continue m = ee.get("media") if m is None: continue