def setStartLinks(self,links): new_links = [] for link in links: if self.isAbsolute(link): # si baseurl esta contenida en el link absoluto es un link interno if self.domain in self.getDomain(link) and self.getDomain(link).startswith(self.domain): new_links.append(link.strip()) else: #if self.verbose: print 'entre a normalize con %s ' % link # get full domain full_domain = parseurls.getDomain(self.url) newlink = '%s/%s' % (full_domain,link) newlink = parseurls.normalize('',newlink) new_links.append(newlink) self.startlinks = new_links
def buildMap(bnde, resources): #print "ENtre a buildMap con ",bnde #print bnde #print "tipo ",type(bnde) bnode = simplenode(bnde, parent=None) bnode.setFPath() #print "<resources>\n",'\n'.join(resources),'\n</resources>' for res in resources: resource = res.split('//')[1] # Que pasa cuando no tiene el protocolo? tengo que ver si se realizo lo anterior dirlist = resource.split('/')[1:] agrega(bnode, dirlist) dom = ''.join(parseurls.getDomain(bnode.getUrl()).split('://')[1]).replace( 'www.', '') try: RenderTreeGraph(bnode).to_picture(dom + '.jpg') except: print 'Cant write sitemap imagefile' return bnode
def scan(site): req = rutils(not opts.skipcerts, opts.redirects, opts.cookies, opts.useragent, opts.tor, opts.timeout, opts.proxy) # Obtenemos el domain domain = getDomain(site) #################### Reporte ####################### reportex = reportmgr(domain, domain, opts.output) #################### Parametros de ejecucion ################# ejecucion = [ 'Scan date: ' + strftime("%Y-%m-%d", gmtime()), 'Startpage: ' + site, 'Site IP: ' + req.getSiteIP(site), 'Depth: ' + str(opts.depth), 'Delay: ' + str(opts.time), 'MaxFiles: ' + str(opts.maxfiles), 'Run External Tools: ' + str(opts.runexternaltools), 'Excluded dirs: ' + ','.join(opts.exclude), 'Start links: ' + ','.join(opts.startlinks), 'Bruteforce: ' + str(opts.bruteforce), 'Wordlist: ' + str(opts.wordlist), 'Bruteforce extensions: ' + ','.join(opts.extensions), 'Config file: ' + str(opts.cfgfile), 'Allow Redirects: ' + str(req.redirects()), 'Verify Certs: ' + str(req.verifyCert()), 'Cookies: ' + cgi.escape(str(req.cookies())), 'Useragent: ' + str(req.userAgent()), 'Tor: ' + str(req.useTor()), 'Proxies:' + str(req.getProxys()), 'Timeout: ' + str(req.getTimeout()), 'IP used: ' + str(req.getIP()).rstrip() ] if opts.save: print 'Saving startpage' req.savePage(site) # ejecucion if opts.color: try: print(Fore.BLUE + "Execution\n" + Style.RESET_ALL + '\n'.join(ejecucion)) except: print '\nExecution', '\n'.join(ejecucion) else: print '\nExecution', '\n'.join(ejecucion) reportex.fromList(['execution'] + ["Crawleet by truerandom"] + ejecucion, False, True) # Headers headersinfo = headers.headersAnalysis(req, parseurls.getDomain(site)) if opts.color: try: print(Fore.BLUE + "\nHeaders\n" + Style.RESET_ALL + '\n'.join(headersinfo)) except: print '\nHeaders', '\n'.join(headersinfo) else: print '\nHeaders', '\n'.join(headersinfo) reportex.fromList(['headers'] + headersinfo) # Metodos http metodos = req.getMethods(parseurls.getDomain(site)).keys() if opts.color: try: print(Fore.BLUE + "\nHTTP methods\n" + Style.RESET_ALL + '\n'.join(metodos)) except: print '\nHTTP methods', '\n'.join(metodos) else: print '\nHTTP methods', '\n'.join(metodos) reportex.fromList(['http methods'] + metodos) # Crawling crawly = ClassyCrawler(req, reportex, site, opts.depth, opts.time, opts.bruteforce, opts.backups, opts.wordlist, opts.runexternaltools, opts.cfgfile, opts.datadir, opts.extensions, opts.verbose, opts.exclude, opts.maxfiles, opts.color) # Si se proporcionaron links adicionales para hacer el crawling crawly.setStartLinks(opts.startlinks) # crawling crawly.crawl() # Registros DNS dnsmod = dnsenum() subdominios = dnsmod.getResults(getDomain(site)) if opts.color: try: print(Fore.BLUE + '\n' + '\n'.join(subdominios) + Style.RESET_ALL) except: print '\nSubdominios\n', '\n'.join(subdominios) else: print '\nSubdominios\n', '\n'.join(subdominios) reportex.fromList(subdominios) # Terminamos el reporte reportex.finish()
def crawl(self): startpage = self.url externallinks = [] # para que es la i ? i = 0 # lista de archivos encontrados self.visited=collections.OrderedDict() self.tovisit = collections.OrderedDict() # aqui se define el parent node self.tovisit[startpage.strip()] = nodoresultado(startpage.strip(),'',0) # quick patch for adding the startup links node_res = self.tovisit[startpage.strip()] # agrego los starlinks como nodo padre el inicial self.addLinks(self.startlinks,1,node_res) while len(self.tovisit)>0 and len(self.visited) < self.maxfiles: if self.verbose: if self.color: try: print (Fore.GREEN+"\nVisited elems: "+Fore.BLUE+len(self.visited)+Style.RESET_ALL) except: print "Visited elems: ",len(self.visited) else: print "Visited elems: ",len(self.visited) # 'url':nodores elem = self.tovisit.items()[0][1] actualpage = elem.getUrl() nivel = elem.getNivel() # elimino el elemento de tovisit del self.tovisit[actualpage] if self.color: try: print (Fore.GREEN+"\nRecurso: "+Fore.BLUE+actualpage+Style.RESET_ALL) print (Fore.GREEN+"Current level: "+Fore.BLUE+str(nivel)+Style.RESET_ALL) print (Fore.GREEN+"Remaining elems: "+Fore.BLUE+str(len(self.tovisit))+Style.RESET_ALL) except: print "\nRecurso: ",actualpage print 'current level: ',nivel print 'remaining elements: ',len(self.tovisit) else: print "\nRecurso: ",actualpage print 'current level: ',nivel print 'remaining elements: ',len(self.tovisit) # Hacemos el delay time.sleep(self.delay) # Hago una peticion head actreq = self.req.getHeadRequest(actualpage) #print('DEBUG: ClassyCrawler : actreq') #print('DEBUG: ClassyCrawler : actreq' , actreq) # Determino si es un recurso html (con los headers) status = self.isHTML(actreq) #print 'Status %s ' % status self.visited[actualpage]=elem #print('DEBUG: status ',status) #print('DEBUG: ',type(status)) if status is not None and status[0] == True: # Analizo por posibles vulnerabilidades en el recurso self.vulndetector.fromFilename(actualpage) # Analiza los headers del recurso para hacer fingerprint self.swdetector.fromHeaders(actreq.headers,actualpage) try: elem.setStatus(actreq.status_code) except Exception as e: # error en el servidor status[1] = 500 # Obtenemos el codigo fuente si es un codigo < 400 if status[1] < 400: try: actualcode = self.req.getHTMLCode(actualpage).text except Exception as e: print('crawler@crawl problem with %s' % actualpage) print(e) actualcode = None if actualcode is not None: # detecto elemetos del codigo fuente self.swdetector.fromCode(actualcode,actualpage) # Obtengo los links internos y externos del codigo # Debo pasar la url de este nodo, para que sus links # hijos relativos lo tengan links = self.getLinks(actualcode,actualpage) intlinks = links[0] # agrego los links al recurso elem.setLinks(intlinks) # obtengo los formularios formularios = self.getForms(actualcode) elem.setForms(formularios) # agrego este recurso a la lista de recursos visitados self.visitedresources.append(elem) if elem.hasForms() == True: print "Tiene formularios" # Verifico si hay listado habilitado dirlisting = self.directoryListing(actualpage) if dirlisting: print "Directory listing enabled" actualdir = self.getPreffix(actualpage) if self.verbose: print 'dir found ',actualdir if actualdir not in self.directories: self.directories.append(actualdir) intlinks.append(actualdir) # bruteforce if self.backups: # el padre de estos nodos debe ser el actual o el padre(actual)? bkplinks = self.bforcer.thisFile(actualpage) if len(bkplinks)>0: self.addLinks(bkplinks,nivel,elem) if self.bruteforce: blinks = self.bruteForce(actualpage) if blinks is not None and len(blinks) > 0: if nivel+1 < self.depth: self.addLinks(blinks,nivel,elem) # Si el nivel siguiente no es el limite los agregamos if nivel+1 < self.depth: self.addLinks(intlinks,nivel,elem) else: print "Something wrong with ",actualpage # encontre un 400 o 500 else: print "Broken link: ",actualpage if actualpage not in self.flist: self.brokenlist.append(actualpage) self.swdetector.fromFilename(actualpage) else: print "File found: ",actualpage # Detect from filename print "Detecting from filename -> ",actualpage self.swdetector.fromFilename(actualpage) self.flist.append(elem) # optimizar dirlisting = self.directoryListing(actualpage) if dirlisting: print "Directory Listing enabled" if self.verbose: print 'current level ',nivel actualdir = self.getPreffix(actualpage) if actualdir not in self.directories: self.directories.append(actualdir) if nivel+1 < self.depth: self.addLinks([actualdir],nivel,elem) if self.backups: # el padre de estos nodos debe ser el actual o el padre(actual)? bkplinks = self.bforcer.thisFile(actualpage) if bkplinks is not None and len(bkplinks)>0: self.addLinks(bkplinks,nivel,elem) if self.bruteforce == True: blinks = self.bruteForce(actualpage) if blinks is not None and len(blinks) > 0: if nivel+1 < self.depth: self.addLinks(blinks,nivel,elem) ####################### FIN CRAWLING ########################### ####################### IMPRESION CONSOLA ###################### ####################### Recursos ############################### if self.color: try: print (Fore.BLUE+"\n"+"*"*100+"\nResources\n"+"*"*100+"\n"+Style.RESET_ALL) except: print "*"*100+"\nResources\n","*"*100,"\n" else: print "*"*100+"\nResources\n","*"*100,"\n" for res in self.visitedresources: print "Url: ",res.url if res.hasForms() == True: for fx in res.getForms(): if fx.action is not None: print '\tForm: ',fx.action ####################### Links rotos ############################### if len(self.brokenlist)>0: if self.color: try: print (Fore.BLUE+"\nBroken Links: \n"+Style.RESET_ALL+"\n".join(self.brokenlist)) except: print "\nBroken Links: \n","\n".join(self.brokenlist) else: print "\nBroken Links: \n","\n".join(self.brokenlist) ####################### Files found ############################### if len(self.flist)>0: if self.color: try:print (Fore.BLUE+"\nFiles found: \n"+Style.RESET_ALL) except: print "\nFiles found:\n" else: print "\nFiles found:\n" for f in self.flist: print f.getUrl() ####################### Bruteforced files ####################### if len(self.bforcer.found_resources) > 0: if self.color: try: print (Fore.BLUE+"\nBruteforced files: \n"+Style.RESET_ALL+"\n".join(self.bforcer.found_resources)) except: print "\nBruteforced files: \n","\n".join(self.bforcer.found_resources) else: print "\nBruteforced files: \n","\n".join(self.bforcer.found_resources) ####################### Ext Links ############################### if len(self.extlinks)>0: if self.color: try: print (Fore.BLUE+"\nExternal links: \n"+Style.RESET_ALL+"\n".join(self.extlinks)) except: print "\nExternal links:\n","\n".join(self.extlinks) else: print "\nExternal links:\n","\n".join(self.extlinks) ####################### DirListing ############################### if len(self.directories)>0: if self.color: try: print (Fore.BLUE+"\nDir Listing: \n"+Style.RESET_ALL+"\n".join(sorted(set(self.directories)))) except: print "\nDirectory Listing:\n","\n".join(sorted(set(self.directories))) else: print "\nDirectory Listing:\n","\n".join(sorted(set(self.directories))) ####################### Raiz ################################## try: nraiz = self.visitedresources[0] except Exception as e: print "no visited elements: %s " % e ####################### Resultados modulos ##################### for res in self.swdetector.results(): if self.color: try: print (Fore.BLUE+res[0]+"\n"+Style.RESET_ALL+"\n".join(res[1:])) except: print '\n','\n'.join(res) else: print '\n','\n'.join(res) ####################### POST DETECTION ####################### self.swdetector.postCrawling() ##################### ExtResults I ######################## extresults = [] if self.runexternaltools: # obtenemos los resultados de las herramientas externas print "running external tools" extresults = self.swdetector.runExtTools() ######################### PUNTUACION ########################## self.puntuacion+= len(self.directories) self.puntuacion+= self.swdetector.getPuntuation() ######################### PRIORIDAD ########################### priority = self.getPriority() #print priority ############################################################### ########### INICIO DE REPORTES ########### ############################################################### # ESTADISTICAS estadisticas = ['Puntuation: '+str(self.puntuacion), 'Priority: ',str(priority).rstrip(), 'Resources: '+str(len(self.visitedresources)), 'Broken Links: '+str(len(self.brokenlist)), 'Files found: '+str(len(self.flist)).rstrip(), 'External links: '+str(len(self.extlinks)), 'Directory listing: '+str(len(self.directories))] # Lista para los resultados de los modulos de deteccion detectionres = [] for res in self.swdetector.results(): # Tomo los resultados del detector tmp = res detectionres.append(tmp) # Agrego las detecciones para las estadisticas estadisticas.append(tmp[0]+': '+str(len(tmp[1:]))) self.reportex.fromList(['statistics']+estadisticas) ######################### DETALLES ############################# if len(self.directories) > 0: self.reportex.fromList(['directory listing']+sorted(self.directories),True) ##########################Files################################# filelist = [] for f in self.flist: filelist.append(f.getUrl()) if len(filelist)>0: self.reportex.fromList(['files']+sorted(filelist),True) if len(self.bforcer.found_resources) > 0: self.reportex.fromList(['Bruteforced files']+sorted(self.bforcer.found_resources),True) if len(self.brokenlist)>0: self.reportex.fromList(['broken links']+sorted(self.brokenlist)) if len(self.extlinks)>0: self.reportex.fromList(['external links']+sorted(self.extlinks),True) # Genera los reportes para los hallazgos de los modulos de deteccion for detected in detectionres: self.reportex.fromList(detected) #print "\nDEBUG\n".join(detected) ###################### RESOURCES ######################## self.reportex.fromResources(self.visitedresources) print "\nPuntuacion: ",self.puntuacion ###########################Formularios########################## unida = parseurls.getDomain(self.url) if self.url.endswith('/'): unida+='/' listforms = [] # unique forms addedforms = [] # forms to report for res in self.visitedresources: actresurl = res.getUrl() if res.hasForms(): for f in res.getForms(): actaction = f.getAction() actpath = parseurls.normalize(actresurl,actaction) f.setPath(actpath) if actpath not in addedforms: addedforms.append(actpath) listforms.append(f) #listforms es una lista de objetos formulario if self.color: try: print (Fore.BLUE+'FORMS'+Style.RESET_ALL) except: print '\n','*'*40,'FORMS','*'*40 else: print '\n','*'*40,'FORMS','*'*40 for form in listforms: print form if len(listforms)> 0: self.reportex.fromForms(listforms) #################### VULNERABILITIES ########################### vulnres = [] for res in self.vulndetector.results(): # Tomo los resultados del detector tmp = res #print 'DEBUG VULN \n',tmp vulnres.append(tmp) for detected in vulnres: #print 'DEBUG DETECTED\n',detected self.reportex.fromList(detected) #################### REPORT EXTRESULTS ######################### if self.color: try: print (Fore.BLUE+"External Results"+Style.RESET_ALL) except: print "External Results" else: print "External Results" for extres in extresults: print extres # Si es un resultado externo, ahref = False, Extres=True self.reportex.fromList(extres.splitlines(),False,True) ###################GENERACION XML Y SITEMAP#################### # sitemap #smapobj = test.parseResources(self.domain,unida,self.visitedresources+self.flist,listforms) smapobj = site_mapper.parseResources(self.domain,unida,self.visitedresources+self.flist,listforms) #print('pase parseResources') #print '\n'.join(smap2[0]) print '\n'.join(smapobj.getMap()) # sitemap[0] = sitemap,ligas #print('pase getMap') self.reportex.sitemap(smapobj) #print('pase siteMap') self.reportex.sitemapXML(smapobj)