Exemplo n.º 1
0
 def parse_post(self, response):
     # post请求
     item = CrawlerItem()
     item['code'] = response.status
     item['resources_file_list'] = []
     item['sub_domains_list'] = []
     title_list = response.xpath('//title/text()').extract()
     item['title'] = None if len(title_list) == 0 else title_list[0]
     item['content'] = response.text
     item['current_url'] = response.meta['current_url']
     item['like_admin_login_url'] = False
     item['like_webshell_url'] = False
     if like_admin_login_content(item['content']):
         item['like_admin_login_url'] == True
     if check_url_has_webshell_content(item['current_url'], item['content'],
                                       item['code'], item['title'])['y1']:
         item['like_webshell_url'] == True
     yield item
Exemplo n.º 2
0
    def parse_get(self, response):
        #input(44444444444444)
        item = CrawlerItem()
        item['code'] = response.status
        item['current_url'] = response.url
        #print(response.url)
        #input(5555555555555)
        #print(response.data)
        #input(3333333333)
        #if response.url=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/?name=?name=?name=?name=?name=":
        #    print('fail ....................')
        #if response.url=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/index.php":
        #    print('succeed .................')

        item['resources_file_list'] = []
        item['sub_domains_list'] = []
        item['like_admin_login_url'] = False
        item['like_webshell_url'] = False

        #print(response.text)

        if response.status == 200:
            urls = collect_urls_from_html(response.text, response.url)
            title_list = response.xpath('//title/text()').extract()
            item['title'] = None if len(title_list) == 0 else title_list[0]
            item['content'] = response.text
        else:
            a = get_request(response.url, cookie=self.cookie)
            item['title'] = a['title']
            item['content'] = a['content']
            urls = collect_urls_from_html(a['content'], response.url)
            #ttt=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/?name=?name=?name=?name=?name="
            #if ttt in urls:
            #    print(response.url)
            #    input(333333333333333333)

        if like_admin_login_content(item['content']):
            item['like_admin_login_url'] == True
        if check_url_has_webshell_content(item['current_url'], item['content'],
                                          item['code'], item['title'])['y1']:
            item['like_webshell_url'] == True

        yield item

        url_main_target_domain = get_url_belong_main_target_domain(
            self.start_url)

        for url in urls:
            #if url=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/?name=?name=?name=?name=?name=":
            #    input(1111111111111)
            url_templet_list = get_url_templet_list(url)
            url_http_domain = get_http_domain_from_url(url)
            if url_is_sub_domain_to_http_domain(
                    url,
                    urlparse(url)[0] + "://" + url_main_target_domain
            ) and url_http_domain not in item['sub_domains_list']:
                item['sub_domains_list'].append(url_http_domain)
            if urlparse(url).hostname != self.domain:
                continue
            if url in self.collected_urls:
                continue
            _flag = 0
            for _ in url_templet_list:
                if _ in self.collected_urls:
                    _flag = 1
                    break
            if _flag == 1:
                continue

            self.add_url_templet_to_collected_urls(url)

            if "^" in url:
                # post类型url
                post_url_list = url.split("^")
                post_url = post_url_list[0]
                post_data = post_url_list[1]
                yield SplashRequest(post_url,
                                    callback=self.parse_post,
                                    endpoint='execute',
                                    magic_response=True,
                                    meta={
                                        'handle_httpstatus_all': True,
                                        'current_url': url
                                    },
                                    args={
                                        'lua_source': self.lua_script,
                                        'http_method': 'POST',
                                        'body': post_data
                                    })
            else:
                # get类型url
                #if url=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/?name=?name=?name=?name=?name=":
                #    input(9999999999999999)
                match_resource = re.match(RESOURCE_FILE_PATTERN, url)
                match_logoff = re.search(
                    r"(logout)|(logoff)|(exit)|(signout)|(signoff)", url, re.I)
                if match_resource:
                    item['resources_file_list'].append(url)
                elif match_logoff:
                    pass
                else:
                    #if url=="http://192.168.93.139/dvwa/vulnerabilities/xss_r/?name=?name=?name=?name=?name=":
                    #    input(8888888889999999999999)
                    yield SplashRequest(url,
                                        self.parse_get,
                                        endpoint='execute',
                                        magic_response=True,
                                        meta={'handle_httpstatus_all': True},
                                        args={'lua_source': self.lua_script})