Exemplo n.º 1
0
 def _find_keyword_in_url(self, links, domain):
     # Get all url and normalize it
     normalized_links = self._normalize_elems(links)
     # Filter url that doesn't contain keyword
     candidate_links = \
         filter(lambda x:
                self._is_contain_keyword(x.replace(Utility.find_domain_name(domain), '')), normalized_links)
     return candidate_links
Exemplo n.º 2
0
 def sort_email(self, emails, domain):
     # If this is not governor's domain, do not get any email candidate with .go.id domain name
     if '.go.id' not in domain:
         emails = [email for email in emails if '.go.id' not in emails]
     domain_name = Utility.find_domain_name(domain)
     emails = map(lambda email: (email, domain_name), emails)
     # Sort based on score descending
     emails.sort(cmp=lambda a, b: -1
                 if self.email_scoring(a) > self.email_scoring(b) else 0)
     emails = [x for x, y in emails]
     return emails[:cfg.max_email]