def get_unique_tweets(self, data_dict): # TODO: Implement filter to check if Tweet text starts with 'RT' """ :param data_dict: :return: """ flag = False try: text = data_dict['text'].encode('ascii', 'ignore').lower() # Check for 'retweeted_status' in metadata field to determine # if tweet is a retweet (1st check) if 'retweeted_status' not in data_dict: url_match = URL.match(text) # Check if link contains url if url_match: match_group = url_match.group() if len(self.key_list) > 0: if any(match_group in item for item in self.key_list): flag = True if flag is False: data_dict['text'] = match_group print "Inserted text: " + data_dict['text'] + '\n' self.key_list.append(match_group) sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(text) print ss['compound'] score = ss['compound'] if score < 0: score += (3 * score) for w in GOOGLE: if w in text and self.google_price >= 0: self.google_price = score self.google_text = text for w in MICROSOFT: if w in text and self.microsoft_price >= 0: self.microsoft_price = score self.microsoft_text = text for w in FACEBOOK: if w in text and self.facebook_price >= 0: self.facebook_price = score self.facebook_text = text p.trigger('test_channel', 'my_event', {'google': self.google_price, 'microsoft': self.microsoft_price, 'facebook': self.facebook_price}) p.trigger('tweet_channel', 'my_event', { 'google_text': self.google_text, 'microsoft_text': self.microsoft_text, 'facebook_text' : self.facebook_text }) self.google_price = 0 self.microsoft_price = 0 self.facebook_price = 0 else: self.key_list.append(url_match.group()) except TypeError, e: print >> sys.stderr, e self.log_error(str(e))
def get_unique_tweets(self, data_dict): # TODO: Implement filter to check if Tweet text starts with 'RT' """ :param data_dict: :return: """ flag = False try: text = data_dict['text'].encode('ascii', 'ignore').lower() # Check for 'retweeted_status' in metadata field to determine # if tweet is a retweet (1st check) if 'retweeted_status' not in data_dict: print "Number of tweets in collection: " + \ str(self.stream_filter.collection.count()) url_match = URL.match(text) # Check if link contains url if url_match: match_group = url_match.group() if len(self.key_list) > 0: if any(match_group in item for item in self.key_list): flag = True if flag is False: data_dict['text'] = match_group print "Inserted text: " + data_dict['text'] + '\n' self.key_list.append(match_group) self.stream_filter.collection.insert(data_dict) if self.wtf is True: if os.path.isfile(self.filename): with open(self.filename, 'a') as outfile: json.dump(data_dict['text'], outfile) outfile.write('\n') else: with open(self.filename, 'w') as outfile: json.dump(data_dict['text'], outfile) outfile.write('\n') else: self.key_list.append(url_match.group()) else: print "Inserted text: " + text self.stream_filter.collection.insert(data_dict) except TypeError, e: print >> sys.stderr, e self.log_error(str(e))