def tor_web_crawler(index, link, ip_address): """ This function is a web crawler for collection of traffic traces and saving those traces to pcap files. :param index: current trace of the link :param link: webpage address from where traffic is to be collected :param ip_address: ip-addres of the machine from which traffic is to be collected :param timeout: duration upto which traffic information needs to be collected :param pkt_count: number of packets to be collected for a particular trace :return: """ # Extracting domain name for saving trace separately url = link lnk = tldextract.extract(url) domain_name = lnk.domain + '.' + lnk.suffix # print('Processing trace for domain name crawl : ', domain_name) # interface = 'enp0s31f6' # interface = 'any' interface = 'eth0' cap = DesiredCapabilities().FIREFOX cap["marionette"] = True # optional # driver = TorBrowserDriver(TBB_PATH) try: driver = TorBrowserDriver(TBB_PATH) # saving the pcapfiles PP = PACP_PATH + '/' + domain_name # saving the screen shots SS = SCREEN_SHOT + '/' + domain_name driver.get(url) except wde as e: print('Browser crashed:') print(e) print('Trying again in 10 seconds ...') time.sleep(10) driver = driver print('Success!\n') except Exception as e: raise Exception(e) if not os.path.isdir(PP): print('Creating directory for saving capture files (pcap) ...') os.makedirs(PP) else: pass if not os.path.isdir(SS): print('Creating directory for saving screenshots ...') os.makedirs(SS) else: pass # command to be executed for capturing the trace # command = "sudo tcpdump -i " + str(interface) + " -n host " + str(ip_address) + " -c " + str(pkt_count) + " -w " + PP + "/" + domain_name + "_" + str(index) + ".pcap " command = "sudo timeout 60 tcpdump -i " + str( interface) + " -n host " + str( ip_address) + " -w " + PP + "/" + domain_name + "_" + str( index) + ".pcap" print('Capture trace ...') capture = subprocess.Popen(command, shell=True) # time.sleep(1) capture.wait() print('Traffic trace captured and saved successfully.') # save the screenshot driver.save_screenshot(SS + '/' + domain_name + '-' + str(index) + '.png') print('Screen shot of the webpage saved successfully.') driver.quit()
#!/usr/bin/env python3 from tbselenium.tbdriver import TorBrowserDriver import time from sys import argv, exit tbpath = "tor-browser_en-US" if len(argv) == 1: file = "good-onions.txt" else: file = argv[1] with open(file, "r") as f: pages = f.read().split("\n") driver = TorBrowserDriver(tbpath) driver.set_page_load_timeout(90) for page in pages: if len(page) == 0: continue try: driver.load_url("http://{}".format(page)) driver.save_screenshot('shots/{}.png'.format(page)) except Exception as e: print("Failed", page) driver.quit() exit(0)