def processItem(self, tweet): #accs = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) // ES NECESARIO?? LO COMENTO POR AHORA #pprint (tweet) #pprint (tweet.getExtractedInfo()) follow_accounts = MongoManager.getFollowAccountsbyCampaign( max_age=timedelta(seconds=10)) bcs = ClassifierManager.getBrandClassifiers( ) #esto tendria que esta cacheado tambien en classifiermanager tcs = None pms = self.getBrandClassifiersByCampaign( tweet, bcs, follow_accounts ) ##FALTA AGREGAR TAMBIEN A LOS TWEETS QUE NO MATCHEAN PERO QUE SON DE UN USUARIO SEGUIDO POR LA MARCA #pprint(pms) for cid, pmlist in pms.items(): if tcs is None: tcs = ClassifierManager.getTopicClassifiers() tms = self.getTopicClassifiers(tweet, cid, tcs) tweet.setExtractedTopics(tms) tweet.setExtractedInfo(pmlist) tweet.setGender( GenderClassifier.extractGender(tweet.getDisplayName())) tweet.resetFollowAccountsMentionCount() user_mentions = tweet.getUserMentions() for fa in follow_accounts: if fa in user_mentions: for fainfo in follow_accounts[fa]: if fainfo['cid'] == cid: tweet.setFollowAccountsMentionCount(fa, 1) #pprint(pmlist) #pprint("saving tweet to campaign %s" % cid) MongoManager.saveDocument("tweets_%s" % cid, tweet.getDictionary()) return None #no devuelvo nada para que no se acumulen los tweets en la ultima lista y se sature la memoria