Exemplo n.º 1
0
 def __init__(self):
     self._scrapers = {
         'name': [OrgNameScraper],
         'address': [OrgAddressScraper],
         'types': [OrgTypeScraper],
         'phone_numbers': [USPhoneNumberScraper, IndianPhoneNumberScraper],
         'emails': [EmailScraper],
         'contacts': [ContactScraper],
         'organization_url': [OrgUrlScraper],
         'partners': [OrgPartnersScraper],
         'facebook': [OrgFacebookScraper],
         'twitter': [OrgTwitterScraper],
         'keywords': [KeywordScraper],
         'page_rank_info': [PageRankScraper]
     }
     self._multiple = [
         'types', 'phone_numbers', 'emails', 'partners', 'contacts'
     ]
     self._required_words = [
         'prostitution', 'sex trafficking', 'child labor', 'child labour',
         'slavery', 'human trafficking', 'brothel', 'child trafficking',
         'anti trafficking', 'social justice'
     ]
     self._punctuation = re.compile('[%s]' % re.escape(string.punctuation))
     self.org_dao = OrganizationDAO
     self.url_frontier = URLFrontier()
Exemplo n.º 2
0
 def __init__(self):
     self.frontier = URLFrontier()
     self.contact_dao = ContactDAO()
     self.org_dao = OrganizationDAO()
     self.pub_dao = PublicationDAO()
     self.url_dao = URLMetadataDAO()