Пример #1
0
 def analyze(self, tag_weights=None):
     for email in self.emails:
         if self.emailranker.rank_email(email) == 1:
             self.best_email = email
             self.domain_doc = build_doc(self.responses)
             self.tags = build_tags(self.domain_doc, tag_weights)
             log.info((self.domain, self.emails, self.tags))
     return self.domain, self.best_email, self.emails, self.tags
Пример #2
0
def get_url_response(url):
    log.info("Processing %s" % url)
    try:
        response = requests.get(url, timeout=3)
    except (requests.exceptions.RequestException, UnicodeError) as e:
        log.debug("{} failed: {}".format(url, str(e)))
        response = None
        log.debug("done processing")
    return response
Пример #3
0
 def write(self):
     log.info("WRITING BATCH TO DB!!!!")
     for model in self.data:
         try:
             model.save()
         except Exception as e:
             log.info(
                 "FAILED TO WRITE RECORD: {} WITH EXCEPTION: {}".format(
                     model, e))
     self.empty_data()
Пример #4
0
 def add_or_update_seed(seed):
     existing_seeds = Seed.objects.filter(url=seed.url)
     if existing_seeds:
         log.info("Seed or Seeds {} found, updating....".format(seed.url))
         for e_seed in existing_seeds:
             e_seed.modified_count = F('modified_count') + 1
             e_seed.crawled = seed.crawled
             e_seed.weighted_terms = seed.weighted_terms
             e_seed.search_term = seed.search_term
             e_seed.save()
         return
     else:
         log.info("new Seed {} being saved to DB".format(seed))
         seed.save()
Пример #5
0
 def add_email(self, email, url, seed=None):
     if not self.blacklist.is_blacklisted(email):
         try:
             exists = Email.objects.get(email_address=email)
             log.info("Email {} found, updating....".format(email))
             exists.modified_count = F('modified_count') + 1
             exists.save()
             return
         except Email.DoesNotExist:
             pass
         email_model = Email(seed_url=seed,
                             email_address=email.lower(),
                             from_url=url,
                             tier=self.get_email_tier(email))
         self.__writer.add_data(email_model)
Пример #6
0
 def add_blogger(self, blogger):
     try:
         e_blogger = Blogger.objects.get(
             email_address=blogger.email_address)
         log.info("blogger {} already found. proceeding...".format(blogger))
         e_blogger.modified_count = F('modified_count') + 1
         e_blogger.save()
     except Blogger.MultipleObjectsReturned:
         log.info("blogger {} already found. proceeding...".format(blogger))
     except Blogger.DoesNotExist:
         log.info("new blogger {} being saved to DB".format(blogger))
         blogger.save()
Пример #7
0
 def add_data(self, model_data):
     log.info("adding %s to persistence" % model_data)
     self.data.add(model_data)
     self.check_should_write()