def convertTweetJSONToMessage(tweet, **twitter_stream_settings): tweetTime = getDateTimeObjectFromTweetTimestamp(tweet['created_at']) message = Message(tweet['user']['screen_name'], tweet['id'], tweet['text'], tweetTime) message.vector = Vector() for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), twitter_stream_settings['min_phrase_length'], twitter_stream_settings['max_phrase_length']): if phrase not in message.vector: message.vector[phrase]=0 message.vector[phrase]+=1 return message
def _tweetIterator(self): userMap = {} for tweet in TwitterIterators.iterateFromFile(self.fileName+'.gz'): user = tweet['user']['screen_name'] phrases = [phrase.replace(' ', unique_string) for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length'])] if phrases: if user not in userMap: userMap[user] = ' '.join(phrases) else: userMap[user]+= ' ' + ' '.join(phrases) return userMap.iteritems()
def convertTweetJSONToMessage(tweet, **twitter_stream_settings): tweetTime = getDateTimeObjectFromTweetTimestamp(tweet['created_at']) message = Message(tweet['user']['screen_name'], tweet['id'], tweet['text'], tweetTime) message.vector = Vector() for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), twitter_stream_settings['min_phrase_length'], twitter_stream_settings['max_phrase_length']): if phrase not in message.vector: message.vector[phrase] = 0 message.vector[phrase] += 1 return message
def _tweetWithTimestampIterator(self): userMap = defaultdict(dict) for tweet in TwitterIterators.iterateFromFile(self.fileName+'.gz'): user = tweet['user']['screen_name'] userMap[user]['user'] = {'screen_name': user} userMap[user]['id'] = tweet['id'] userMap[user]['created_at'] = tweet['created_at'] if 'text' not in userMap[user]: userMap[user]['text'] = ' ' phrases = [phrase.replace(' ', unique_string) for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length'])] if phrases: userMap[user]['text']+= ' ' + ' '.join(phrases) return userMap.iteritems()
def _tweetIterator(self): userMap = {} for tweet in TwitterIterators.iterateFromFile(self.fileName + '.gz'): user = tweet['user']['screen_name'] phrases = [ phrase.replace(' ', unique_string) for phrase in getPhrases( getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length']) ] if phrases: if user not in userMap: userMap[user] = ' '.join(phrases) else: userMap[user] += ' ' + ' '.join(phrases) return userMap.iteritems()
def _tweetWithTimestampIterator(self): userMap = defaultdict(dict) for tweet in TwitterIterators.iterateFromFile(self.fileName + '.gz'): user = tweet['user']['screen_name'] userMap[user]['user'] = {'screen_name': user} userMap[user]['id'] = tweet['id'] userMap[user]['created_at'] = tweet['created_at'] if 'text' not in userMap[user]: userMap[user]['text'] = ' ' phrases = [ phrase.replace(' ', unique_string) for phrase in getPhrases( getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length']) ] if phrases: userMap[user]['text'] += ' ' + ' '.join(phrases) return userMap.iteritems()