def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456')` #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched are %s',twits) #similarTweet = self.topicModelLSI(twits, Q) # new feeds from service similarTweet = self.posAnalysis(twits) #topicModelObj.close() #map(lambda tw:tw.update({'created_time': int(time.mktime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #map(lambda tw:tw.update({'created_time': int(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) #logger.debug('storing tweets of twitter of both location based on keyword mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) #self.similarTopicRemoval(ID,similarTweet,twits, Q) #self.insertFeedData(ID,twits) #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return len(twits)
def retrieveTweets(self, ID, Q, geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' passCnt = 0 logger.debug('retrieve tweets') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q, geoCode) map( lambda tw: tw.update({ 'created_time': timegm( time.gmtime( time.strptime(tw['created_at'], "%a %b %d %H:%M:%S +0000 %Y"))) }), twits) #map(lambda tw:tw.update({'created_time': int(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug( 'storing tweets of twitter of both location based on keyword mongoDb' ) #twits=sparkInt.wowFieldTrueOrFalse(twits) if len(twits): passCnt += mongoInt.insertFeedData(ID, twits) else: if not mongoInt.createCollection(ID): logger.warn('unable to create collection in mongodb') #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits
def retrieveTweets(self, ID, Q, geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q, geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) def removeRetweets(tweet): if 'retweeted_status' in tweet: tweet = tweet['retweeted_status'] tweet['alreadyRetweeted'] = True print tweet['text'] #json_obj = json.dumps(obj) return tweet #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) value = map(removeRetweets, twits) twits = value map( lambda tw: tw.update({ 'created_time': timegm( time.strptime(tw['created_at'], "%a %b %d %H:%M:%S +0000 %Y")) }), twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched for %s are %s', ID, len(twits)) if (len(twits)): uniqueTweetsFromDB = [] uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID) logger.error('existing uniqueTweetsFromDB :%s', len(uniqueTweetsFromDB)) logger.debug('total uniqe feeds %s', uniqueTweetsFromDB) #twits.extend(uniqueTweetsFromDB) uniqueTweetsFromDB.extend(twits) logger.error('total combined tweets :%s', len(uniqueTweetsFromDB)) #return uniqueTweetsFromDB else: return [] similarTweet = self.topicModelLSI(uniqueTweetsFromDB, Q) # new feeds from service if similarTweet != 0: self.updateRatio(ID, similarTweet, uniqueTweetsFromDB, Q) return self.runClassifier(ID)
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweetsBasedHashtag from twitter ''' passCnt = 0 logger.debug('retrieve tweets') twits = twitterInt.retrieveTweets(Q,geoCode) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug('storing tweets of twitter of both location baseed on keyworad mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) passCnt += mongoInt.insertFeedData(ID,twits) #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) def removeRetweets(tweet): if 'retweeted_status' in tweet: tweet = tweet['retweeted_status'] tweet['alreadyRetweeted'] = True print tweet['text'] #json_obj = json.dumps(obj) return tweet #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) value = map(removeRetweets, twits) print(value) twits = value map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched are chellaaa %s',len(twits)) if(len(twits)): uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID) logger.debug('existing uniqueTweetsFromDB :%s',len(uniqueTweetsFromDB)) #twits.extend(uniqueTweetsFromDB) uniqueTweetsFromDB.extend(twits) logger.debug('total combined tweets :%s',len(uniqueTweetsFromDB)) similarTweet = self.topicModelLSI(uniqueTweetsFromDB, Q) # new feeds from service if similarTweet != 0: self.updateRatio(ID,similarTweet,uniqueTweetsFromDB, Q) return len(uniqueTweetsFromDB)
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' passCnt = 0 logger.debug('retrieve tweets') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode) map(lambda tw:tw.update({'created_time': str(int(time.mktime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug('storing tweets of twitter of both location based on keyword mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) if len(twits): passCnt += mongoInt.insertFeedData(ID,twits) else: if not mongoInt.createCollection(ID): logger.warn('unable to create collection in mongodb') #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits