def search(self,
               keywords,
               method,
               clean=False,
               uniq=True,
               loc="egypt",
               lang="ar"):
        if len(keywords) > 0:
            tweetList = []
            keyword = " OR ".join(keywords)
            print keyword
            try:
                tweets = self.api.search(keyword,
                                         count=1000,
                                         lang=lang,
                                         locale=loc)
                if uniq is True:
                    tmp = tweets
                    tweets = []
                    for line in Utils.uniq(sorted(tmp)):
                        tweets.append(line)

                if method is not None:
                    for tweet in tweets:
                        method(
                            Tweet(tweet.id,
                                  tweet.text,
                                  language=lang,
                                  searchKeyword=keyword))
                else:
                    toReturn = []
                    for tweet in tweets:
                        toReturn.append(
                            Tweet(tweet.id,
                                  tweet.text,
                                  language=lang,
                                  searchKeyword=keyword))

                    return toReturn

            except tweepy.TweepError as e:
                print e
                if isinstance(e.message[0], dict):
                    if e.message[0]['code'] == 88:
                        print "changedkeys"
                        self.shiftAuthKeys()
                return []
        else:
            return []
    def streamloop(self, keywords, method, loc="egypt", lang="ar"):
        print "streamloop mode"
        lastID = 0
        keyword = " OR ".join(keywords)
        print keyword

        while 1:
            try:
                tweets = self.api.search(keyword,
                                         count=1000,
                                         lang=lang,
                                         locale=loc)
                tweets = sorted(tweets, key=lambda x: x.id)

                for tweet in tweets:
                    if tweet.id > lastID:
                        method(
                            Tweet(tweet.id,
                                  tweet.text,
                                  language=lang,
                                  searchKeyword=keyword))
                        lastID = tweet.id
                time.sleep(self.streamSleep)
            except tweepy.TweepError as e:
                print tweepy.error.TweepError[0].code
                if str(e.message[0]['code']) in "88":
                    self.shiftAuthKeys()
示例#3
0
 def readTsvFile(name):
     file_to_read=open(name,'r')
     list_of_tweets=[]
     for line in file_to_read:
         split_tabs=line.split('\t')
         if len(split_tabs)>=4:
             list_of_tweets.append(Tweet(split_tabs[0], split_tabs[3],_date=split_tabs[1][:-6],_user=split_tabs[2]))
     return list_of_tweets
示例#4
0
def classifyTweets(tweetFile, history, tag, sSaveFile, offset=3):
    stopWords = getStopWords()
    tweets = []
    for line in IO.readData_by_line(tweetFile):
        tweet = Tweet.Tweet()
        tweet.setTweet(line)
        if (tweet.containsTag("#" + tag)):
            stamp = tweet.date + timedelta(days=offset)
            if stamp.date() in history:
                tweet.label = history[stamp.date()]
                tweet.removeStopWords(stopWords)
                tweets.append(tweet)
    print len(tweets)
    tweetFile.close()
    IO.writeTweets(sSaveFile, tweets, ['label', 'trimmedMessage'])
示例#5
0
def countAllTweets(sFile):
    twitterFile = open(sFile)
    my_dict = dict()
    for line in IO.readData_by_line(twitterFile):        
        tweet = Tweet.Tweet()
        tweet.setTweet(line)
        
        tag = tweet.getDate() + "_" + tweet.getHour()

        if (tag in my_dict):
            my_dict[tag] += 1
        else:
            my_dict[tag] = 1
    twitterFile.close()
    return my_dict
    def on_data(self, data):
        text = json.loads(data)['text']
        id = json.loads(data)['id']
        lang = json.loads(data)['lang']

        country = None
        if json.loads(data)['place'] is not None:
            country = json.loads(data)['place']['name']

        tweet = Tweet(id, text, country=country, language=lang)
        self.method(tweet)
        StdOutListener.counter += 1

        if self.run:
            return True

        else:
            return False
示例#7
0
def countTweetTags(sFile, method="byDay"):
    twitterFile = open(sFile)
    my_dict = NestedDict()
    searchedTags = [['#ibm'], ['#aapl'], ['#msft', '#microsoft'], ['#facebook']]
    tagHeaders = []
    for tagList in searchedTags:
        tagHeaders.append(tagList[0])
    
    for line in IO.readData_by_line(twitterFile):
        tweet = Tweet.Tweet()
        tweet.setTweet(line)
               
        stamp = tweet.getDate()
        tags = findHashTags(tweet.message, searchedTags)
        for tag in tags:
            if (tag in my_dict[stamp]):
                my_dict[stamp][tag] += 1
            else:
                my_dict[stamp][tag] = 1
    twitterFile.close()
    return tagHeaders, my_dict
示例#8
0
def main():
    #Get Emotions
    arrEmo = getEmotions()
    #Analyse Tweet Emotion
    data = open("data/scrapeCompanies.txt")
    tweet = Tweet.Tweet()
    #Read every tweet
    for line in readData_by_line(data):
        tweet.setTweet(line)
        #Check every emotion
        value = 0
        for emo in arrEmo:
            word = emo[0]
            if word in tweet.tweet:
                #Update value by emotion
                if emo[1] == "1":
                    value = 1
                else:
                    value = -1
            if (value != 0):
                break
        tweet.label = value
        print tweet.label, " ", tweet.tweet