Пример #1
0
 def start(self, **kwargs):
     only_campaign= kwargs.get('campaign', None)
     regenerate_all = kwargs.get('regenerate', False)
     while True:
         end = self.getCurrentSummarizationEnd()
         for account in MongoManager.getActiveAccounts(max_age=timedelta(hours=1)):
             for campaign in account.getActiveCampaigns():
                 MongoManager.ensureIndex('summarized_tweets_%s' % campaign.getId(), [("start", 1)])
                 if only_campaign and only_campaign.getId() != campaign.getId(): continue
                 if regenerate_all:
                     self.clearSummarization(campaign)
                     collection_name = 'tweets_%s' % campaign.getId()
                     res = MongoManager.findTweets(collection_name, sort=("x_created_at", 1), limit=1)
                     if res.count():
                         lsd = res[0]['x_created_at'].replace(minute=0, second=0, microsecond=0)
                     else:
                         lsd = datetime.now().replace(minute=0, second=0, microsecond=0)
                 else:
                     lsd = self.getLastSummarizedDate(campaign)
                 if lsd < end:
                     while lsd < end:
                         self.summarize(campaign, lsd, min(end, lsd + timedelta(days=1)), timedelta(hours=1), None)
                         lsd = lsd + timedelta(days=1)
         pprint("sleeping 20 seconds")
         regenerate_all = False
         time.sleep(20)
Пример #2
0
 def getAllFeedURLs(self):
     res = []
     accs = MongoManager.getActiveAccounts()
     for acc in accs:
         for camp in acc.getActiveCampaigns():
             for url in camp.getForums():
                 res.append((acc, camp, url))
     return res
Пример #3
0
 def getAllFeedURLs(self):
     res = []
     accs = MongoManager.getActiveAccounts()
     for acc in accs:
         for camp in acc.getActiveCampaigns():
             for url in camp.getForums():
                 res.append((acc, camp, url))
     return res
Пример #4
0
 def generateGnipRulesFromMongo(self):
     accounts = MongoManager.getActiveAccounts()
     rules = []
     for acc in accounts:
         for camp in acc.getActiveCampaigns():
             for fp in camp.getFacebookFanpages():
                 #rules.append({"value": fp, "tag": "%s/%s/%s" % (acc.getName(), camp.getName(), fp)})
                 rules.append({"value": fp, "tag": None})
     return rules
Пример #5
0
 def getAllHistoryFeedURLs(self):
     res = []
     accs = MongoManager.getActiveAccounts()
     for acc in accs:
         for camp in acc.getActiveCampaigns():
             hff = camp.getHistoryFetchedForums()
             for url in camp.getForums():
                 if url not in hff:
                     res.append((acc, camp, url))
     return res
Пример #6
0
 def getAllHistoryFeedURLs(self):
     res = []
     accs = MongoManager.getActiveAccounts()
     for acc in accs:
         for camp in acc.getActiveCampaigns():
             hff = camp.getHistoryFetchedForums()
             for url in camp.getForums():
                 if url not in hff:
                     res.append((acc, camp, url))
     return res
Пример #7
0
 def getBrandClassifiers(cls):
     #faltaria buffer por max_age
     o = cls()
     accounts = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10))
     rules = []
     for acc in accounts:
         rules.extend(o.getAccountRules(acc))
     res = []
     for r in rules:
         res.append(o.generateBrandClassifier(r))
     return res
Пример #8
0
 def getFanpageToCampaignsDict(cls):
     if not cls.fanpage_to_campaigns_max_age or not cls.cached_fanpage_to_campaigns or (datetime.now() - cls.cached_fanpage_to_campaigns['fetch_time'] > cls.fanpage_to_campaigns_max_age):        
         print "refetching fanpages to campagins dict"
         accounts = MongoManager.getActiveAccounts()
         data = {}
         for acc in accounts:
             for camp in acc.getActiveCampaigns():
                 for fp in camp.getFacebookFanpages():
                     if fp not in data: data[fp] = []
                     data[fp].append(camp)
         cls.cached_fanpage_to_campaigns = {'data': data, 'fetch_time': datetime.now()}
     return cls.cached_fanpage_to_campaigns['data']
Пример #9
0
 def getTopicClassifiers(cls):
     #faltaria buffer por max_age
     #devuelve un diccionario con los topics x campania
     o = cls()
     res = {}
     accounts = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10))
     for acc in accounts:
         for campaign in acc.getActiveCampaigns():
             topics = campaign.getTopics()
             if not topics: continue
             res[campaign.getId()] = {}
             for topic in topics:
                 #topic['_id'] = topic.getId() ###ESTO VA???
                 res[campaign.getId()][topic.getId()] = o.generateTopicClassifier(topic)
     return res
Пример #10
0
 def start(self, **kwargs):
     only_campaign = kwargs.get('campaign', None)
     regenerate_all = kwargs.get('regenerate', False)
     while True:
         end = self.getCurrentSummarizationEnd()
         for account in MongoManager.getActiveAccounts(max_age=timedelta(
                 hours=1)):
             for campaign in account.getActiveCampaigns():
                 MongoManager.ensureIndex(
                     'summarized_tweets_%s' % campaign.getId(),
                     [("start", 1)])
                 if only_campaign and only_campaign.getId(
                 ) != campaign.getId():
                     continue
                 if regenerate_all:
                     self.clearSummarization(campaign)
                     collection_name = 'tweets_%s' % campaign.getId()
                     res = MongoManager.findTweets(collection_name,
                                                   sort=("x_created_at", 1),
                                                   limit=1)
                     if res.count():
                         lsd = res[0]['x_created_at'].replace(minute=0,
                                                              second=0,
                                                              microsecond=0)
                     else:
                         lsd = datetime.now().replace(minute=0,
                                                      second=0,
                                                      microsecond=0)
                 else:
                     lsd = self.getLastSummarizedDate(campaign)
                 if lsd < end:
                     while lsd < end:
                         self.summarize(campaign, lsd,
                                        min(end, lsd + timedelta(days=1)),
                                        timedelta(hours=1), None)
                         lsd = lsd + timedelta(days=1)
         pprint("sleeping 20 seconds")
         regenerate_all = False
         time.sleep(20)
Пример #11
0
 def generateGnipRulesFromMongo(self):
     accounts = MongoManager.getActiveAccounts()
     rules = []
     for acc in accounts:
         for camp in acc.getActiveCampaigns():
             for brand in camp.getBrands():
                 fa = sorted(brand.getFollowAccounts())
                 if fa:
                     rules.append({
                         "value":
                         " OR ".join(fa),
                         "tag":
                         "%s/%s/%s/follow accounts - mention" %
                         (acc.getName(), camp.getName(), brand.getName())
                     })
                     clean_user_names = [x.replace("@", "") for x in fa]
                     rules.append({
                         "value":
                         " OR ".join(
                             ["from:%s" % x for x in clean_user_names]),
                         "tag":
                         "%s/%s/%s/follow accounts - from" %
                         (acc.getName(), camp.getName(), brand.getName())
                     })
                     rules.append({
                         "value":
                         " OR ".join(
                             ["to:%s" % x for x in clean_user_names]),
                         "tag":
                         "%s/%s/%s/follow accounts - to" %
                         (acc.getName(), camp.getName(), brand.getName())
                     })
                 #BRAND RULES
                 for brule in brand.getIdentificationRules():
                     brule = brule.replace("[m]",
                                           "[M]").replace("[p]", "[P]")
                     for bsearch_keyword in brand.getSearchKeywords():
                         brand_replaced_rule = '"' + brule.replace(
                             "[M]", bsearch_keyword) + '"'
                         if (brule.upper().find("[P]") >= 0):
                             for product in brand.getProducts():
                                 if product.isUsingBrandIdRules():
                                     for psearch_keyword in product.getSearchKeywords(
                                     ):
                                         product_replaced_rule = brand_replaced_rule.replace(
                                             "[P]", psearch_keyword)
                                         rules.append({
                                             "value":
                                             product_replaced_rule,
                                             "tag":
                                             "%s/%s/%s/%s: %s" %
                                             (acc.getName(), camp.getName(),
                                              brand.getName(),
                                              product.getName(), brule)
                                         })
                         else:
                             rules.append({
                                 "value":
                                 brand_replaced_rule,
                                 "tag":
                                 "%s/%s/%s: %s" %
                                 (acc.getName(), camp.getName(),
                                  brand.getName(), brule)
                             })
                 #PRODUCT RULES
                 for product in brand.getProducts():
                     for prule in product.getIdentificationRules():
                         prule = prule.replace("[m]",
                                               "[M]").replace("[p]", "[P]")
                         for bsearch_keyword in brand.getSearchKeywords():
                             brand_replaced_rule = '"' + prule.replace(
                                 "[M]", bsearch_keyword) + '"'
                             for psearch_keyword in product.getSearchKeywords(
                             ):
                                 product_replaced_rule = brand_replaced_rule.replace(
                                     "[P]", psearch_keyword)
                                 rules.append({
                                     "value":
                                     product_replaced_rule,
                                     "tag":
                                     "%s/%s/%s/%s: %s" %
                                     (acc.getName(), camp.getName(),
                                      brand.getName(), product.getName(),
                                      prule)
                                 })
         for poll in acc.getActivePolls():
             rules.append({
                 "value":
                 " OR ".join(sorted(poll.getSearchHashtags())),
                 "tag":
                 "%s/poll %s" % (acc.getName(), poll.getName())
             })
     return rules