Exemplo n.º 1
0
 def module_run(self, domains):
     base_url = 'https://search.yahoo.com/search'
     for domain in domains:
         self.heading(domain, level=0)
         base_query = 'domain:' + domain
         hosts = []
         # control variables
         new = True
         page = 0
         nr = 100
         # execute search engine queries and scrape results storing hostnames in a list
         # loop until no new hostnames are found
         while new == True:
             content = None
             query = ''
             # build query based on results of previous results
             for host in hosts:
                 query += ' -domain:%s' % (host, )
             full_query = base_query + query
             payload = {'pz': nr, 'b': (page * nr) + 1, 'p': full_query}
             # yahoo does not appear to have a max url length
             self.verbose('URL: %s?%s' %
                          (base_url, encode_payload(payload)))
             # send query to search engine
             resp = self.request(base_url, method='POST', payload=payload)
             if resp.status_code != 200:
                 self.alert(
                     'Yahoo has encountered an error. Please submit an issue for debugging.'
                 )
                 break
             tree = fromstring(resp.text)
             sites = tree.xpath('//a[@class=" ac-algo ac-21th"]/@href')
             sites = [urlparse(x).hostname for x in sites]
             # create a unique list
             sites = list(set(sites))
             new = False
             # add hostname to list if not already exists
             for site in sites:
                 if site not in hosts:
                     hosts.append(site)
                     new = True
                     self.output(site)
                     self.add_hosts(site)
             if not new:
                 # exit if all hostnames have been found
                 if '>Next<' not in resp.text:
                     break
                 else:
                     page += 1
                     self.verbose(
                         'No New Subdomains Found on the Current Page. Jumping to Result %d.'
                         % ((page * nr) + 1))
                     new = True
             # sleep script to avoid lock-out
             self.verbose('Sleeping to avoid lockout...')
             time.sleep(random.randint(5, 15))
Exemplo n.º 2
0
 def get_company_id(self, company_name):
     self.heading(company_name, level=0)
     keywords = self.options['keywords']
     all_companies = []
     cnt = 0
     size = 50
     params = ' '.join([x for x in [company_name, keywords] if x])
     url = 'https://www.jigsaw.com/rest/searchCompany.json'
     #while True:
     payload = {
         'token': self.api_key,
         'name': params,
         'offset': cnt,
         'pageSize': size
     }
     self.verbose('Query: %s?%s' % (url, encode_payload(payload)))
     resp = self.request(url, payload=payload, redirect=False)
     jsonobj = resp.json
     if jsonobj['totalHits'] == 0:
         self.output('No company matches found.')
         return
     else:
         companies = jsonobj['companies']
         for company in companies:
             if company['activeContacts'] > 0:
                 location = '%s, %s, %s' % (
                     company['city'], company['state'], company['country'])
                 all_companies.append(
                     (company['companyId'], company['name'],
                      company['activeContacts'], location))
         #cnt += size
         #if cnt > jsonobj['totalHits']: break
         # jigsaw rate limits requests per second to the api
         #time.sleep(.25)
     if len(all_companies) == 0:
         self.output(
             'No contacts available for companies matching \'%s\'.' %
             (company_name))
         return
     if len(all_companies) == 1:
         company_id = all_companies[0][0]
         company_name = all_companies[0][1]
         contact_cnt = all_companies[0][2]
         self.output('Unique company match found: [%s - %s (%s contacts)]' %
                     (company_name, company_id, contact_cnt))
         return company_id
     id_len = len(max([str(x[0]) for x in all_companies], key=len))
     for company in all_companies:
         self.output('[%s] %s - %s (%s contacts)' % (str(
             company[0]).ljust(id_len), company[1], company[3], company[2]))
     company_id = raw_input('Enter Company ID from list [%s - %s]: ' %
                            (all_companies[0][1], all_companies[0][0]))
     if not company_id: company_id = all_companies[0][0]
     return company_id
Exemplo n.º 3
0
 def module_run(self, domains):
     base_url = 'https://search.yahoo.com/search'
     for domain in domains:
         self.heading(domain, level=0)
         base_query = 'domain:' + domain
         hosts = []
         # control variables
         new = True
         page = 0
         nr = 100
         # execute search engine queries and scrape results storing hostnames in a list
         # loop until no new hostnames are found
         while new == True:
             content = None
             query = ''
             # build query based on results of previous results
             for host in hosts:
                 query += ' -domain:%s' % (host,)
             full_query = base_query + query
             payload = {'pz':nr, 'b':(page*nr)+1, 'p':full_query}
             # yahoo does not appear to have a max url length
             self.verbose('URL: %s?%s' % (base_url, encode_payload(payload)))
             # send query to search engine
             resp = self.request(base_url, method='POST', payload=payload)
             if resp.status_code != 200:
                 self.alert('Yahoo has encountered an error. Please submit an issue for debugging.')
                 break
             tree = fromstring(resp.text)
             sites = tree.xpath('//a[@class=" ac-algo ac-21th"]/@href')
             sites = [urlparse(x).hostname for x in sites]
             # create a unique list
             sites = list(set(sites))
             new = False
             # add hostname to list if not already exists
             for site in sites:
                 if site not in hosts:
                     hosts.append(site)
                     new = True
                     self.output(site)
                     self.add_hosts(site)
             if not new:
                 # exit if all hostnames have been found
                 if '>Next<' not in resp.text:
                     break
                 else:
                     page += 1
                     self.verbose('No New Subdomains Found on the Current Page. Jumping to Result %d.' % ((page*nr)+1))
                     new = True
             # sleep script to avoid lock-out
             self.verbose('Sleeping to avoid lockout...')
             time.sleep(random.randint(5,15))
Exemplo n.º 4
0
 def module_run(self, domains):
     url = "http://searchdns.netcraft.com/"
     pattern = '<td align\="left">\s*<a href="http://(.*?)/"'
     # answer challenge cookie
     cookiejar = CookieJar()
     payload = {"restriction": "site+ends+with", "host": "test.com"}
     resp = self.request(url, payload=payload, cookiejar=cookiejar)
     cookiejar = resp.cookiejar
     for cookie in cookiejar:
         if cookie.name == "netcraft_js_verification_challenge":
             challenge = cookie.value
             response = hashlib.sha1(urllib.unquote(challenge)).hexdigest()
             cookiejar.set_cookie(
                 self.make_cookie("netcraft_js_verification_response", "%s" % response, ".netcraft.com")
             )
             break
     for domain in domains:
         self.heading(domain, level=0)
         payload["host"] = domain
         subs = []
         # execute search engine queries and scrape results storing subdomains in a list
         # loop until no Next Page is available
         while True:
             self.verbose("URL: %s?%s" % (url, encode_payload(payload)))
             resp = self.request(url, payload=payload, cookiejar=cookiejar)
             content = resp.text
             sites = re.findall(pattern, content)
             # create a unique list
             sites = list(set(sites))
             # add subdomain to list if not already exists
             for site in sites:
                 if site not in subs:
                     subs.append(site)
                     self.output("%s" % (site))
                     self.add_hosts(site)
             # verifies if there's more pages to look while grabbing the correct
             # values for our payload...
             link = re.findall(r"(\blast\=\b|\bfrom\=\b)(.*?)&", content)
             if not link:
                 break
             else:
                 payload["last"] = link[0][1]
                 payload["from"] = link[1][1]
                 self.verbose("Next page available! Requesting again...")
                 # sleep script to avoid lock-out
                 self.verbose("Sleeping to Avoid Lock-out...")
                 time.sleep(random.randint(5, 15))
         if not subs:
             self.output("No results found.")
Exemplo n.º 5
0
 def module_run(self, domains):
     url = 'http://searchdns.netcraft.com/'
     pattern = '<td align\=\"left\">\s*<a href=\"http://(.*?)/"'
     # answer challenge cookie
     cookiejar = CookieJar()
     payload = {'restriction': 'site+ends+with', 'host': 'test.com'}
     resp = self.request(url, payload=payload, cookiejar=cookiejar)
     cookiejar = resp.cookiejar
     for cookie in cookiejar:
         if cookie.name == 'netcraft_js_verification_challenge':
             challenge = cookie.value
             response = hashlib.sha1(urllib.unquote(challenge)).hexdigest()
             cookiejar.set_cookie(
                 self.make_cookie('netcraft_js_verification_response',
                                  '%s' % response, '.netcraft.com'))
             break
     for domain in domains:
         self.heading(domain, level=0)
         payload['host'] = domain
         subs = []
         # execute search engine queries and scrape results storing subdomains in a list
         # loop until no Next Page is available
         while True:
             self.verbose('URL: %s?%s' % (url, encode_payload(payload)))
             resp = self.request(url, payload=payload, cookiejar=cookiejar)
             content = resp.text
             sites = re.findall(pattern, content)
             # create a unique list
             sites = list(set(sites))
             # add subdomain to list if not already exists
             for site in sites:
                 if site not in subs:
                     subs.append(site)
                     self.output('%s' % (site))
                     self.add_hosts(site)
             # verifies if there's more pages to look while grabbing the correct
             # values for our payload...
             link = re.findall(r'(\blast\=\b|\bfrom\=\b)(.*?)&', content)
             if not link:
                 break
             else:
                 payload['last'] = link[0][1]
                 payload['from'] = link[1][1]
                 self.verbose('Next page available! Requesting again...')
                 # sleep script to avoid lock-out
                 self.verbose('Sleeping to Avoid Lock-out...')
                 time.sleep(random.randint(5, 15))
         if not subs:
             self.output('No results found.')
Exemplo n.º 6
0
 def get_company_id(self, company_name):
     self.heading(company_name, level=0)
     keywords = self.options['keywords']
     all_companies = []
     cnt = 0
     size = 50
     params = ' '.join([x for x in [company_name, keywords] if x])
     url = 'https://www.jigsaw.com/rest/searchCompany.json'
     #while True:
     payload = {'token': self.api_key, 'name': params, 'offset': cnt, 'pageSize': size}
     self.verbose('Query: %s?%s' % (url, encode_payload(payload)))
     resp = self.request(url, payload=payload, redirect=False)
     jsonobj = resp.json
     if jsonobj['totalHits'] == 0:
         self.output('No company matches found.')
         return
     else:
         companies = jsonobj['companies']
         for company in companies:
             if company['activeContacts'] > 0:
                 location = '%s, %s, %s' % (company['city'], company['state'], company['country'])
                 all_companies.append((company['companyId'], company['name'], company['activeContacts'], location))
         #cnt += size
         #if cnt > jsonobj['totalHits']: break
         # jigsaw rate limits requests per second to the api
         #time.sleep(.25)
     if len(all_companies) == 0:
         self.output('No contacts available for companies matching \'%s\'.' % (company_name))
         return
     if len(all_companies) == 1:
         company_id = all_companies[0][0]
         company_name = all_companies[0][1]
         contact_cnt = all_companies[0][2]
         self.output('Unique company match found: [%s - %s (%s contacts)]' % (company_name, company_id, contact_cnt))
         return company_id
     id_len = len(max([str(x[0]) for x in all_companies], key=len))
     for company in all_companies:
         self.output('[%s] %s - %s (%s contacts)' % (str(company[0]).ljust(id_len), company[1], company[3], company[2]))
     company_id = raw_input('Enter Company ID from list [%s - %s]: ' % (all_companies[0][1], all_companies[0][0]))
     if not company_id: company_id = all_companies[0][0]
     return company_id