def search(self, keywords, method, clean=False, uniq=True, loc="egypt", lang="ar"): if len(keywords) > 0: tweetList = [] keyword = " OR ".join(keywords) print keyword try: tweets = self.api.search(keyword, count=1000, lang=lang, locale=loc) if uniq is True: tmp = tweets tweets = [] for line in Utils.uniq(sorted(tmp)): tweets.append(line) if method is not None: for tweet in tweets: method( Tweet(tweet.id, tweet.text, language=lang, searchKeyword=keyword)) else: toReturn = [] for tweet in tweets: toReturn.append( Tweet(tweet.id, tweet.text, language=lang, searchKeyword=keyword)) return toReturn except tweepy.TweepError as e: print e if isinstance(e.message[0], dict): if e.message[0]['code'] == 88: print "changedkeys" self.shiftAuthKeys() return [] else: return []
def streamloop(self, keywords, method, loc="egypt", lang="ar"): print "streamloop mode" lastID = 0 keyword = " OR ".join(keywords) print keyword while 1: try: tweets = self.api.search(keyword, count=1000, lang=lang, locale=loc) tweets = sorted(tweets, key=lambda x: x.id) for tweet in tweets: if tweet.id > lastID: method( Tweet(tweet.id, tweet.text, language=lang, searchKeyword=keyword)) lastID = tweet.id time.sleep(self.streamSleep) except tweepy.TweepError as e: print tweepy.error.TweepError[0].code if str(e.message[0]['code']) in "88": self.shiftAuthKeys()
def readTsvFile(name): file_to_read=open(name,'r') list_of_tweets=[] for line in file_to_read: split_tabs=line.split('\t') if len(split_tabs)>=4: list_of_tweets.append(Tweet(split_tabs[0], split_tabs[3],_date=split_tabs[1][:-6],_user=split_tabs[2])) return list_of_tweets
def classifyTweets(tweetFile, history, tag, sSaveFile, offset=3): stopWords = getStopWords() tweets = [] for line in IO.readData_by_line(tweetFile): tweet = Tweet.Tweet() tweet.setTweet(line) if (tweet.containsTag("#" + tag)): stamp = tweet.date + timedelta(days=offset) if stamp.date() in history: tweet.label = history[stamp.date()] tweet.removeStopWords(stopWords) tweets.append(tweet) print len(tweets) tweetFile.close() IO.writeTweets(sSaveFile, tweets, ['label', 'trimmedMessage'])
def countAllTweets(sFile): twitterFile = open(sFile) my_dict = dict() for line in IO.readData_by_line(twitterFile): tweet = Tweet.Tweet() tweet.setTweet(line) tag = tweet.getDate() + "_" + tweet.getHour() if (tag in my_dict): my_dict[tag] += 1 else: my_dict[tag] = 1 twitterFile.close() return my_dict
def on_data(self, data): text = json.loads(data)['text'] id = json.loads(data)['id'] lang = json.loads(data)['lang'] country = None if json.loads(data)['place'] is not None: country = json.loads(data)['place']['name'] tweet = Tweet(id, text, country=country, language=lang) self.method(tweet) StdOutListener.counter += 1 if self.run: return True else: return False
def countTweetTags(sFile, method="byDay"): twitterFile = open(sFile) my_dict = NestedDict() searchedTags = [['#ibm'], ['#aapl'], ['#msft', '#microsoft'], ['#facebook']] tagHeaders = [] for tagList in searchedTags: tagHeaders.append(tagList[0]) for line in IO.readData_by_line(twitterFile): tweet = Tweet.Tweet() tweet.setTweet(line) stamp = tweet.getDate() tags = findHashTags(tweet.message, searchedTags) for tag in tags: if (tag in my_dict[stamp]): my_dict[stamp][tag] += 1 else: my_dict[stamp][tag] = 1 twitterFile.close() return tagHeaders, my_dict
def main(): #Get Emotions arrEmo = getEmotions() #Analyse Tweet Emotion data = open("data/scrapeCompanies.txt") tweet = Tweet.Tweet() #Read every tweet for line in readData_by_line(data): tweet.setTweet(line) #Check every emotion value = 0 for emo in arrEmo: word = emo[0] if word in tweet.tweet: #Update value by emotion if emo[1] == "1": value = 1 else: value = -1 if (value != 0): break tweet.label = value print tweet.label, " ", tweet.tweet