def guess(session, base_url, word_list, pages): if base_url[-1] != '/': base_url += '/' baselive, baseurl = is_live(session, base_url) if baselive: crawl(session, base_url, pages) for word in word_list: for ext in common_ext: live, url = is_live(session, base_url + word + ext) #if live: print(url + ' ' + str(url in found)) if live: crawl(session, url, pages)
def discover(args): print('DISCOVER') # set up session print('Creating Session...') session = requests.Session() session.cookies = MemoryCookieJar() if args.auth is not None: login(session, args.auth) pages = PageCollection() ### Try to discover linked-to pages here print('Crawling for links') crawling.crawl(session, args.url, pages) print('Trying to guess additional pages...') with open(args.word_file, 'rU') as wf: word_list = [x.strip('\n') for x in wf.readlines()] #end with guesser.guess(session, args.url, word_list, pages) found = pages.pages print('{n} accesible pages discovered:'.format(n=len(found))) for key in sorted(found): page = found[key] print("\t" + page.url) print("\t\tForm Inputs:") for i in page.form_inputs: print ("\t\t\t" + str(dict(i))); print("\t\tURL Inputs:") for i in page.url_inputs: print ("\t\t\t" + str(i)); print('{n} cookies found:'.format(n=len(session.cookies.memory))) for cookie in session.cookies.memory: print("\t" + str(cookie)) return list(found.values())