Пример #1
0
def tor_web_crawler(index, link, ip_address):
    """
    This function is a web crawler for collection of traffic traces and saving those traces to pcap files.
    :param index: current trace of the link
    :param link: webpage address from where traffic is to be collected
    :param ip_address: ip-addres of the machine from which traffic is to be collected
    :param timeout: duration upto which traffic information needs to be collected
    :param pkt_count: number of packets to be collected for a particular trace
    :return:
    """

    # Extracting domain name for saving trace separately
    url = link
    lnk = tldextract.extract(url)
    domain_name = lnk.domain + '.' + lnk.suffix
    # print('Processing trace for domain name crawl : ', domain_name)

    # interface = 'enp0s31f6'
    # interface = 'any'
    interface = 'eth0'
    cap = DesiredCapabilities().FIREFOX
    cap["marionette"] = True  # optional
    # driver = TorBrowserDriver(TBB_PATH)
    try:
        driver = TorBrowserDriver(TBB_PATH)
        # saving the pcapfiles
        PP = PACP_PATH + '/' + domain_name
        # saving the screen shots
        SS = SCREEN_SHOT + '/' + domain_name
        driver.get(url)
    except wde as e:
        print('Browser crashed:')
        print(e)
        print('Trying again in 10 seconds ...')
        time.sleep(10)
        driver = driver
        print('Success!\n')
    except Exception as e:
        raise Exception(e)

    if not os.path.isdir(PP):
        print('Creating directory for saving capture files (pcap) ...')
        os.makedirs(PP)
    else:
        pass

    if not os.path.isdir(SS):
        print('Creating directory for saving screenshots ...')
        os.makedirs(SS)
    else:
        pass

    # command to be executed for capturing the trace
    # command = "sudo tcpdump -i " + str(interface) + " -n host " + str(ip_address) + " -c " + str(pkt_count) + " -w " + PP + "/" + domain_name + "_" + str(index) + ".pcap "
    command = "sudo timeout 60 tcpdump -i " + str(
        interface) + " -n host " + str(
            ip_address) + " -w " + PP + "/" + domain_name + "_" + str(
                index) + ".pcap"
    print('Capture trace ...')
    capture = subprocess.Popen(command, shell=True)
    #     time.sleep(1)
    capture.wait()
    print('Traffic trace captured and saved successfully.')
    # save the screenshot
    driver.save_screenshot(SS + '/' + domain_name + '-' + str(index) + '.png')
    print('Screen shot of the webpage saved successfully.')
    driver.quit()
Пример #2
0
#!/usr/bin/env python3
from tbselenium.tbdriver import TorBrowserDriver
import time

from sys import argv, exit

tbpath = "tor-browser_en-US"

if len(argv) == 1:
    file = "good-onions.txt"
else:
    file = argv[1]

with open(file, "r") as f:
    pages = f.read().split("\n")

driver = TorBrowserDriver(tbpath)
driver.set_page_load_timeout(90)

for page in pages:
    if len(page) == 0:
        continue
    try:
        driver.load_url("http://{}".format(page))
        driver.save_screenshot('shots/{}.png'.format(page))
    except Exception as e:
        print("Failed", page)

driver.quit()
exit(0)