def get_domains_by_ip(self, ip): print_status('Start get domain by ip through bing...') url = 'http://cn.bing.com/search?q=ip:%s&first=999999991&FORM=PERE' % ip html = request(url, 'GET') domain_regx = r''' <h2><a\shref="https?://([^"]*?)"\starget="_blank"\sh="ID=[^"]*?">[^<]*?</a></h2> ''' domain_list = re.findall(domain_regx, html, re.X) total_page_regx = r'''<span\sclass="sb_count">\d*?\s-\s\d*?\s[^\(]*?\([^\s]*?\s(\d*?)\s[^\)]*?\)</span>''' result = re.search(total_page_regx, html) try: total_num = int(result.group(1).replace(',', '')) except: total_num = 9 page_count = total_num / 9 if total_num % 9 > 0: page_count += 1 print_status('Total pages: %s, Total domains:%s' % (page_count, total_num)) if page_count > 0: for n in range(total_num-1): print_status('Get page %s domains...' % str(n+1)) url = 'http://cn.bing.com/search?q=ip:%s&first=%s1&FORM=PERE3' % (ip, n) html = request(url, 'GET') new_domain_list = re.findall(domain_regx, html, re.X) domain_list.extend(new_domain_list) time.sleep(20) domain_list = [ Domain.get_domain(domain) for domain in domain_list if Domain.get_domain(domain) ] return list(set(domain_list))
def get_subdomain_by_links(self, domain, level=4): domain = Domain.get_domain(domain) url = 'http://i.links.cn/subdomain/' data = { 'domain': domain, 'b2': 1, 'b3': 1 if level>=3 else 0, 'b4': 1 if level>=4 else 0, } html = request(url, 'POST', data=data) regex = '''<a\shref="http://[^"]*?"\srel=nofollow\starget=_blank>http://([^"]*?)</a></div>''' try: result = re.findall(regex, html) except: result = [] time.sleep(3) result.append(domain) return list(set(result))
def get_subdomain_by_links(self, domain, level=4): domain = Domain.get_domain(domain) url = 'http://i.links.cn/subdomain/' data = { 'domain': domain, 'b2': 1, 'b3': 1 if level >= 3 else 0, 'b4': 1 if level >= 4 else 0, } html = request(url, 'POST', data=data) regex = '''<a\shref="http://[^"]*?"\srel=nofollow\starget=_blank>http://([^"]*?)</a></div>''' try: result = re.findall(regex, html) except: result = [] time.sleep(3) result.append(domain) return list(set(result))
def _get_text_list(self, config, value): url = config[0] % value regx = config[1] text = request(url) result = re.findall(regx, text, re.I|re.S|re.X) return result
def _get_text_list(self, config, value): url = config[0] % value regx = config[1] text = request(url) result = re.findall(regx, text, re.I | re.S | re.X) return result