def fetch_alexa_cn(self): """get subdomains from alexa.cn""" sign = self.get_sign_alexa_cn() if sign is None: raise Exception("sign_fetch_is_failed") else: (domain, sig, keyt) = sign pre_domain = self.domain.split('.')[0] url = 'http://www.alexa.cn/api_150710.php' payload = { 'url': domain, 'sig': sig, 'keyt': keyt, } r = http_request_post(url, payload=payload).text for sub in r.split('*')[-1:][0].split('__'): if sub.split(':')[0:1][0] == 'OTHER': break else: sub_name = sub.split(':')[0:1][0] sub_name = ''.join((sub_name.split(pre_domain)[0], domain)) if is_domain(sub_name): self.subset.append(sub_name)
def baidu_site(key_domain='', sub_domain='', command=''): ''' Get baidu site:target.com result ''' headers = requests_headers() proxies = requests_proxies() if '://' in key_domain: key_domain = urlparse.urlparse(key_domain).hostname check = [] baidu_url = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{}'.format( key_domain) if command: baidu_url = 'https://www.baidu.com/s?ie=UTF-8&wd={}'.format(command) try: r = requests.get(url=baidu_url, headers=headers, timeout=10, proxies=proxies, verify=False).text if 'class="tip_head"' not in r: # Check first for page in xrange(0, 21): # max page_number pn = page * 50 if key_domain: newurl = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{}&pn={}&rn=50&tn=baiduadv'.format( key_domain, pn) if sub_domain: newurl = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{} -inurl:({})&pn={}&rn=50&tn=baiduadv'.format( key_domain, sub_domain, pn) # -site:(weibo.com) if command: newurl = 'https://www.baidu.com/s?ie=UTF-8&wd={}&pn={}&rn=50&tn=baiduadv'.format( command, pn) keys = requests.get(url=newurl, headers=headers, proxies=proxies, timeout=10, verify=False).content flags = re.findall( r'style=\"text-decoration:none;\">(.*?)%s.*?<\/a><div class=\"c-tools\"' % key_domain, keys) check_flag = keys.count('class="n"') for flag in flags: domain_handle = flag.replace('https://', '').replace( 'http://', '').replace('<b>', '').replace('</b>', '') if domain_handle != '': # xxooxxoo.xoxo.com ignore "..." domain_flag = domain_handle + key_domain if domain_flag not in check and is_domain(domain_flag): if domain_flag not in baidu_domainss: check.append(domain_flag) print '[+] Get baidu site: > ' + domain_flag baidu_domainss.append(domain_flag) if check_flag < 2 and page > 2: # for domain_key in baidu_domainss: # sub max num to inurl:( -flag) # baidu_domainss += baidu_site(domain_key) return check # list(set(baidu_domainss)) else: print '[!] baidu site:domain no result' return [] except Exception, e: # print traceback.format_exc() pass
def run(self): try: timestemp = time.time() url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format( self.url, timestemp, self.domain, timestemp, self.verify) result = json.loads(req.get(url).content) if result.get('status') == '1': for item in result.get('data'): if is_domain(item.get('domain')): self.subset.append(item.get('domain')) elif result.get('status') == 3: logging.info("chaxun.la api block you ip...") logging.info( "input you verify_code in http://subdomain.chaxun.la/wuyun.org/" ) # print('get verify_code():', self.verify) # self.verify_code() # self.run() self.subset = list(set(self.subset)) print "[-] {0} found {1} domains".format(self.engine_name, len(self.subset)) return self.subset except Exception as e: logging.info(str(e)) print "[-] {0} found {1} domains".format(self.engine_name, len(self.subset)) return self.subset
def run(self): try: timestemp = time.time() url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format( self.url, timestemp, self.domain, timestemp, self.verify) #response = req.get(url,proxies=self.proxy).content # no proxy needed for this class response = req.get(url).content result = json.loads(response) if result.get('status') == '1': for item in result.get('data'): if is_domain(item.get('domain')): self.domain_name.append(item.get('domain')) elif result.get('status') == 3: logger.warning("chaxun.la api block our ip...") logger.info("input you verify_code") # print('get verify_code():', self.verify) # self.verify_code() # self.run() self.domain_name = list(set(self.domain_name)) except Exception as e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1], e)) finally: logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name))) return self.domain_name,self.smiliar_domain_name,self.email
def fetch_chinaz(self): """get subdomains from alexa.chinaz.com""" url = 'http://alexa.chinaz.com/?domain={0}'.format(self.domain) r = http_request_get(url).content subs = re.compile(r'(?<="\>\r\n<li>).*?(?=</li>)') result = subs.findall(r) for sub in result: if is_domain(sub): self.subset.append(sub)
def github_site(subdoamin, key_domain): headers = requests_headers() proxies = requests_proxies() if '://' in key_domain: key_domain = urlparse.urlparse(url).hostname github_domains = [] session = requests.Session() headers['Cookie'] = github_cookie try: # check_login = '******' # req_check = session.get(url=check_login,headers=headers,proxies=proxies,timeout=10,verify=False).content # if github_account in req_check: # print '[*] Github site:domain login check Success' headers['Host'] = 'github.com' headers[ 'Referer'] = 'https://github.com/search?utf8=%E2%9C%93&q=*&type=Code' github_url = 'https://github.com/search?q={}&type=Code&utf8=%E2%9C%93'.format( subdoamin) req = session.get(url=github_url, headers=headers, proxies=proxies, timeout=10, verify=False).content if 'blankslate' not in req: #if 'code results' in req: for page in xrange(1, 100): newurl = 'https://github.com/search?p={}&q={}&type=Code&s=&utf8=%E2%9C%93'.format( page, subdoamin) req_new = session.get(url=newurl, headers=headers, proxies=proxies, timeout=10, verify=False).content req_new = req_new.replace('</em>', '').replace('<em>', '').replace( '</span>', '') url_regexs = [] url_regex_url,url_regex_host,url_regex_x,url_regex_a,url_regex_b,url_regex_c,url_regex_b_a,url_regex_c_a,url_regex_d = [],[],[],[],[],[],[],[],[] try: url_regex_url = re.findall(r'//([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_host = re.findall( r'"([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_x = re.findall(r''([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_a = re.findall(r'/([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_b = re.findall( r'\[<span .*?>([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_b_a = re.findall(r'\[([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_c_a = re.findall(r'\(([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_c = re.findall( r'\(<span .*?>([\s\S]*?)%s' % key_domain, req_new) except: pass try: url_regex_d = re.findall( r'<span .*?>([\s\S]*?)%s' % key_domain, req_new) except: pass url_regexs = url_regex_url + url_regex_host + url_regex_x + url_regex_a + url_regex_b + url_regex_c + url_regex_b_a + url_regex_c_a + url_regex_d for sub in url_regexs: if sub not in github_domains and sub_filter not in sub and sub != '.' and filter_list( module=sub, filter_list=github_sub_filter ) and sub[-1:] != '-' and sub[-1:] != '_': sub.replace(' ', '') if sub[-1:] == '.': subs = sub + key_domain else: subs = sub + '.' + key_domain if is_domain(subs) and subs not in github_domainss: print '[+] Get github site:domain > ' + subs github_domainss.append(subs) github_domains.append(subs) if 'next_page disabled' in req_new: return github_domains else: print '[!] github site:domain no result' pass # else: # print '[!] Github login check Error' # print '[*] Please try again' # pass except Exception, e: # print traceback.format_exc() pass