Exemplo n.º 1
0
 def setUp(self):
     self.plugin = bruterPlugin()
     self.pluginArgs = []
     
     self.plugin.serviceConnector.setSocksProxySettings(config.socksHost, config.socksPort)
     self.plugin.setPluginArguments(self.pluginArgs)
     self.plugin.processPluginArguments()
Exemplo n.º 2
0
    def setUp(self):
        self.plugin = bruterPlugin()
        self.pluginArgs = []

        self.plugin.serviceConnector.setSocksProxySettings(
            config.socksHost, config.socksPort)
        self.plugin.setPluginArguments(self.pluginArgs)
        self.plugin.processPluginArguments()
Exemplo n.º 3
0
    def parse(self, response):
        if response.url in self.visitedLinks:
            return
        else:
            self.visitedLinks.append(response.url)
        if response.status == 401:
            #Request authentication.
            bruter = bruterPlugin(torNodes=[])
            if response.url.contains(
                    '.onion') and self.bruterOnProtectedResource:
                print "[+] HTTP Protected resource found in hiddenservice. As you've indicated, we're gonna start an HTTP Dictionary Attack."
                bruter.httpBruterOnHiddenService(response.url,
                                                 dictFile=self.dictFile)
            elif response.url.contains(
                    '.onion') == False and self.bruterOnProtectedResource:
                print "[+] HTTP Protected resource found in clear web site. As you've indicated, we're gonna start an HTTP Dictionary Attack."
                bruter.httpBruterOnSite(response.url, dictFile=self.dictFile)

        item = HiddenSitePage()
        if self.contents:
            onion = response.url

            onion = onion.replace(self.localTunnel, self.onionSite)
            onion = onion.replace('http://', '')
            onion = onion.replace('https://', '')
            onion = onion.replace(':', '')

            indexResource = onion.rfind('/')
            dirStructure = onion[:indexResource]
            resource = onion[indexResource:].replace('/', '')
            try:
                if os.path.exists(config.deepWebCrawlerOutdir +
                                  dirStructure) == False:
                    os.makedirs(config.deepWebCrawlerOutdir + dirStructure)
                if resource == '':
                    open(
                        config.deepWebCrawlerOutdir + dirStructure +
                        "/index.html", 'wb').write(response.body)
                else:
                    open(config.deepWebCrawlerOutdir + dirStructure + resource,
                         'wb').write(response.body)
            except:
                pass

        if response.headers['Content-Type'].find('text/') <= -1:
            return
        else:
            item['body'] = response.body

        selector = Selector(response)
        if len(selector.xpath('//title/text()').extract()) > 0:
            item['title'] = selector.xpath('//title/text()').extract()[0]
        else:
            item['title'] = 'No title'
        item['url'] = response.url

        headers = ''
        for header, value in response.headers.iteritems():
            headers = str(header) + " : " + str(value) + "\n" + headers
        item['headers'] = headers
        if self.images:
            item['imagesSrc'] = selector.xpath(self.crawlRulesImages).extract()
        if self.forms:
            if len(selector.xpath('//form').extract()) > 0:
                browser = mechanize.Browser()
                browser.open(response.url)
                cj = cookielib.LWPCookieJar()
                browser.set_cookiejar(cj)
                #some browser options.
                browser.set_handle_equiv(True)
                browser.set_handle_gzip(True)
                browser.set_handle_redirect(True)
                browser.set_handle_referer(True)
                browser.set_handle_robots(False)
                # User-Agent
                browser.addheaders = [(
                    'User-agent',
                    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
                )]
                try:
                    pageForms = {}
                    formId = 0
                    for form in browser.forms():
                        if form.name is None:
                            form.name = "form_" + str(formId)
                        controls = []
                        for control in form.controls:
                            controlName = control.name
                            controlType = control.type
                            controlValue = control.value
                            if controlName == None:
                                controlName = ""
                            if controlType == None:
                                controlType = ""
                            if controlValue == None:
                                controlValue = ""
                            controls.append(
                                (str(controlName), str(controlType),
                                 str(controlValue)))
                        formId = formId + 1
                        pageForms[form.name] = controls
                    item['forms'] = pageForms
                except:
                    pass

        if response.meta.has_key('item'):
            parent = response.meta.get('item')
            item['pageParent'] = parent

        if self.links and self.deepLinks is None:
            linksFound = response.xpath(self.crawlRulesLinks).extract()
            for url in linksFound:
                if len(self.userAgents) > 0:
                    userAgent = random.choice(self.userAgents)
                    newRequest = Request(urljoin(response.url, url),
                                         callback=self.parse,
                                         errback=lambda _: item,
                                         meta=dict(item=item))
                    newRequest.headers.setdefault('User-Agent', userAgent)
                    yield newRequest
                else:
                    yield Request(urljoin(response.url, url),
                                  callback=self.parse,
                                  errback=lambda _: item,
                                  meta=dict(item=item))

        if self.links and self.deepLinks is not None:
            linksFound = response.xpath(self.crawlRulesLinks).extract()
            for url in linksFound:
                if self.deepLinks > 0:
                    if len(self.userAgents) > 0:
                        userAgent = random.choice(self.userAgents)
                        newRequest = Request(urljoin(response.url, url),
                                             callback=self.parse,
                                             errback=lambda _: item,
                                             meta=dict(item=item))
                        newRequest.headers.setdefault('User-Agent', userAgent)
                        yield newRequest
                    else:
                        yield Request(urljoin(response.url, url),
                                      callback=self.parse,
                                      errback=lambda _: item,
                                      meta=dict(item=item))
                    self.deepLinks = self.deepLinks - 1
                else:
                    break
        yield item
Exemplo n.º 4
0
    def parse(self, response):
        if response.url in self.visitedLinks:
            return
        else:
            self.visitedLinks.append(response.url)
        if response.status == 401:
            #Request authentication.
            bruter = bruterPlugin(torNodes=[])
            if response.url.contains('.onion') and self.bruterOnProtectedResource:
                print "[+] HTTP Protected resource found in hiddenservice. As you've indicated, we're gonna start an HTTP Dictionary Attack."
                bruter.httpBruterOnHiddenService(response.url, dictFile=self.dictFile)
            elif response.url.contains('.onion') == False and self.bruterOnProtectedResource:
                print "[+] HTTP Protected resource found in clear web site. As you've indicated, we're gonna start an HTTP Dictionary Attack."
                bruter.httpBruterOnSite(response.url, dictFile=self.dictFile)



        item = HiddenSitePage()
        if self.contents:
            onion = response.url

            onion = onion.replace(self.localTunnel,self.onionSite)
            onion = onion.replace('http://', '')
            onion = onion.replace('https://', '')
            onion = onion.replace(':','')

            indexResource = onion.rfind('/')
            dirStructure = onion[:indexResource]
            resource = onion[indexResource:].replace('/','')
            try:
                if os.path.exists(config.deepWebCrawlerOutdir+dirStructure) == False:
                    os.makedirs(config.deepWebCrawlerOutdir+dirStructure)
                if resource == '':
                    open(config.deepWebCrawlerOutdir+dirStructure+"/index.html", 'wb').write(response.body)
                else:
                    open(config.deepWebCrawlerOutdir+dirStructure+resource, 'wb').write(response.body)
            except:
                pass

        if response.headers['Content-Type'].find('text/') <= -1:
            return
        else:
            item['body']  =  response.body


        selector = Selector(response)
        if len(selector.xpath('//title/text()').extract()) > 0:
            item['title'] = selector.xpath('//title/text()').extract()[0]
        else:
            item['title'] = 'No title'
        item['url']  =  response.url

        headers = ''
        for header, value in response.headers.iteritems():
            headers = str(header)+" : "+str(value)+"\n"+headers
        item['headers']  =  headers
        if self.images:
            item['imagesSrc'] = selector.xpath(self.crawlRulesImages).extract()
        if self.forms:
            if len(selector.xpath('//form').extract()) > 0:
                browser = mechanize.Browser()
                browser.open(response.url)
                cj = cookielib.LWPCookieJar()
                browser.set_cookiejar(cj)
                #some browser options.
                browser.set_handle_equiv(True)
                browser.set_handle_gzip(True)
                browser.set_handle_redirect(True)
                browser.set_handle_referer(True)
                browser.set_handle_robots(False)
                # User-Agent
                browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
                try:
                    pageForms = {}
                    formId = 0
                    for form in browser.forms():
                        if form.name is None:
                            form.name="form_"+str(formId)
                        controls = []
                        for control in form.controls:
                            controlName = control.name
                            controlType = control.type
                            controlValue = control.value
                            if controlName == None:
                                controlName = ""
                            if controlType == None:
                                controlType = ""
                            if controlValue == None:
                                controlValue = ""
                            controls.append( (str(controlName), str(controlType), str(controlValue)) )
                        formId = formId + 1
                        pageForms[form.name] = controls
                    item['forms'] = pageForms
                except:
                    pass




        if response.meta.has_key('item'):
            parent = response.meta.get('item')
            item['pageParent'] = parent

        if self.links and self.deepLinks is None:
            linksFound = response.xpath(self.crawlRulesLinks).extract()
            for url in linksFound:
                if len(self.userAgents) > 0:
                    userAgent = random.choice(self.userAgents)
                    newRequest = Request(urljoin(response.url, url), callback=self.parse,errback=lambda _: item,meta=dict(item=item))
                    newRequest.headers.setdefault('User-Agent', userAgent)
                    yield newRequest
                else:
                    yield Request(urljoin(response.url, url), callback=self.parse,errback=lambda _: item,meta=dict(item=item))


        if self.links and self.deepLinks is not None:
            linksFound = response.xpath(self.crawlRulesLinks).extract()
            for url in linksFound:
                if self.deepLinks > 0:
                    if len(self.userAgents) > 0:
                        userAgent = random.choice(self.userAgents)
                        newRequest = Request(urljoin(response.url, url), callback=self.parse,errback=lambda _: item,meta=dict(item=item))
                        newRequest.headers.setdefault('User-Agent', userAgent)
                        yield newRequest
                    else:
                        yield Request(urljoin(response.url, url), callback=self.parse,errback=lambda _: item,meta=dict(item=item))
                    self.deepLinks = self.deepLinks-1
                else:
                    break
        yield item