示例#1
0
def launch_tb_with_custom_stem(tbb_dir):
    xvfb_display = start_xvfb()
    socks_port = free_port()
    control_port = free_port()
    tor_data_dir = tempfile.mkdtemp()
    tor_binary = join(tbb_dir, cm.DEFAULT_TOR_BINARY_PATH)
    print("SOCKS port: %s, Control port: %s" % (socks_port, control_port))

    torrc = {
        'ControlPort': str(control_port),
        'SOCKSPort': str(socks_port),
        'DataDirectory': tor_data_dir
    }
    tor_process = launch_tbb_tor_with_stem(tbb_path=tbb_dir,
                                           torrc=torrc,
                                           tor_binary=tor_binary)
    with Controller.from_port(port=control_port) as controller:
        controller.authenticate()
        with TorBrowserDriver(tbb_dir,
                              socks_port=socks_port,
                              control_port=control_port,
                              tor_cfg=cm.USE_STEM) as driver:
            driver.load_url("https://check.torproject.org", wait_on_page=3)
            print(driver.find_element_by("h1.on").text)
            print(driver.find_element_by(".content > p").text)
        print_tor_circuits(controller)

    stop_xvfb(xvfb_display)
    tor_process.kill()
示例#2
0
def run():
    # Parse arguments
    args, config = parse_arguments()

    # build dirs
    build_crawl_dirs(args.url_file)

    # Read URLs
    url_list = parse_url_list(args.url_file, args.start, args.stop)

    # Configure logger
    add_log_file_handler(wl_log, cm.DEFAULT_CRAWL_LOG)

    # Configure controller
    torrc_config = ut.get_dict_subconfig(config, args.config, "torrc")
    controller = TorController(cm.TBB_DIR,
                               torrc_dict=torrc_config,
                               pollute=False)

    # Configure browser
    ffprefs = ut.get_dict_subconfig(config, args.config, "ffpref")
    driver = TorBrowserWrapper(cm.TBB_DIR,
                               tbb_logfile_path=cm.DEFAULT_FF_LOG,
                               tor_cfg=USE_RUNNING_TOR,
                               pref_dict=ffprefs,
                               socks_port=int(torrc_config['socksport']))

    # Instantiate crawler
    crawler = crawler_mod.Crawler(driver, controller, args.screenshots,
                                  args.device)

    # Configure crawl
    job_config = ut.get_dict_subconfig(config, args.config, "job")
    job = crawler_mod.CrawlJob(job_config, url_list)

    # Setup stem headless display
    if args.virtual_display:
        xvfb_h = int(args.virtual_display.split('x')[0])
        xvfb_w = int(args.virtual_display.split('x')[1])
    else:
        xvfb_h = cm.DEFAULT_XVFB_WIN_H
        xvfb_w = cm.DEFAULT_XVFB_WIN_W
    xvfb_display = start_xvfb(xvfb_w, xvfb_h)

    # Run the crawl
    chdir(cm.CRAWL_DIR)
    try:
        crawler.crawl(job)
    except KeyboardInterrupt:
        wl_log.warning("Keyboard interrupt! Quitting...")
        sys.exit(-1)
    finally:
        # Post crawl
        post_crawl()

        # Close display
        stop_xvfb(xvfb_display)

    # die
    sys.exit(0)
示例#3
0
def pytest_sessionfinish(session, exitstatus):
    xvfb_display = test_conf.get("xvfb_display")
    tor_process = test_conf.get("tor_process")
    if xvfb_display:
        stop_xvfb(xvfb_display)

    if tor_process:
        tor_process.kill()
def pytest_sessionfinish(session, exitstatus):
    xvfb_display = test_conf.get("xvfb_display")
    tor_process = test_conf.get("tor_process")
    if xvfb_display:
        stop_xvfb(xvfb_display)

    if tor_process:
        tor_process.kill()
    rmtree(test_conf["temp_data_dir"], ignore_errors=True)
示例#5
0
def pytest_sessionfinish(session, exitstatus):
    xvfb_display = test_conf.get("xvfb_display")
    tor_process = test_conf.get("tor_process")
    if xvfb_display:
        stop_xvfb(xvfb_display)

    if tor_process:
        tor_process.kill()
    rmtree(test_conf["temp_data_dir"], ignore_errors=True)
示例#6
0
def headless_visit(tbb_dir):
    out_img = join(dirname(realpath(__file__)), "headless_screenshot.png")
    # start a virtual display
    xvfb_display = start_xvfb()
    with TorBrowserDriver(tbb_dir) as driver:
        driver.load_url("https://check.torproject.org")
        driver.get_screenshot_as_file(out_img)
        print("Screenshot is saved as %s" % out_img)

    stop_xvfb(xvfb_display)
示例#7
0
def headless_visit(tbb_dir):
    out_img = join(dirname(realpath(__file__)), "headless_screenshot.png")
    # start a virtual display
    xvfb_display = start_xvfb()
    with TorBrowserDriver(tbb_dir) as driver:
        for i in range(len(load_table)):
            start_time = time.clock_gettime_ns(time.CLOCK_REALTIME)
            driver.load_url(load_table[i][URLS])
            end_time = time.clock_gettime_ns(time.CLOCK_REALTIME)

            driver.get_screenshot_as_file(out_img)
            print("Screenshot is saved as %s" % out_img)

            elapsed_time = (end_time - start_time) / 1000000000
            print("Load time: ", str(elapsed_time) + "s")
            load_table[i][VANILLA] = elapsed_time

    col = -1
    for bridge in BRIDGE_TYPE:
        with TorBrowserDriver(tbb_dir, default_bridge_type=bridge) as bdriver:
            if bridge == "obfs4":
                col = 2
                print("obfs4..........")
            if bridge == "meek-azure":
                col = 3
                print("meek-azure..........")
            if col == -1:
                break

            for i in range(len(load_table)):
                start_time = time.clock_gettime_ns(time.CLOCK_REALTIME)
                bdriver.load_url(load_table[i][URLS])
                end_time = time.clock_gettime_ns(time.CLOCK_REALTIME)

                bdriver.get_screenshot_as_file(out_img)
                print("Screenshot is saved as %s" % out_img)

                elapsed_time = (end_time - start_time) / 1000000000
                print("Load time: ", str(elapsed_time) + "s")
                load_table[i][col] = elapsed_time
    print("About to print..........")
    write_csv()
    stop_xvfb(xvfb_display)
示例#8
0
 def close(self):
     self.logger.info("Beginning Crawler exit process...")
     if "tb_driver" in dir(self):
         self.logger.info("Closing Tor Browser...")
         self.tb_driver.quit()
     if "virtual_framebuffer" in dir(self):
         self.logger.info("Closing the virtual framebuffer...")
         # A bug in pyvirtualdisplay triggers a KeyError exception when closing a
         # virtual framebuffer if the $DISPLAY environment variable is not set.
         try:
             stop_xvfb(self.virtual_framebuffer)
         except KeyError:
             pass
     if "cell_log" in dir(self):
         self.logger.info("Closing the Tor cell stream...")
         self.cell_log.close()
     if "tor_process" in dir(self):
         self.logger.info("Killing the tor process...")
         self.tor_process.kill()
     self.logger.info("Crawler exit completed.")
    def close(self):
        self.logger.info("Beginning Crawler exit process...")
        if "tb_driver" in dir(self):
            self.logger.info("Closing Tor Browser...")
            self.tb_driver.quit()
        if "virtual_framebuffer" in dir(self):
            self.logger.info("Closing the virtual framebuffer...")
	    # A bug in pyvirtualdisplay triggers a KeyError exception when closing a
            # virtual framebuffer if the $DISPLAY environment variable is not set.
            try:
                stop_xvfb(self.virtual_framebuffer)
            except KeyError:
                pass
        if "cell_log" in dir(self):
            self.logger.info("Closing the Tor cell stream...")
            self.cell_log.close()
        if "tor_process" in dir(self):
            self.logger.info("Killing the tor process...")
            self.tor_process.kill()
        self.logger.info("Crawler exit completed.")
示例#10
0
def main():
    global workdir
    desc = "Take a screenshot using TorBrowserDriver"
    default_url = "https://check.torproject.org"
    parser = ArgumentParser(description=desc)
    parser.add_argument('tbb_path')
    parser.add_argument('output_dir', default=workdir)
    parser.add_argument('url', nargs='?', default=default_url)
    args = parser.parse_args()
    out_img = realpath(join(args.output_dir, "screenshot.png"))

    if default_url is None:
        print("ERROR: cannot detect main URL")
        return 1

    xvfb_display = start_xvfb()

    with TorBrowserDriver(args.tbb_path, headless=True) as driver:
        visit_and_screenshot(driver, default_url, out_img)

    stop_xvfb(xvfb_display)
def pullpage():

    xvfb_display = start_xvfb()
    t1 = 'https://whatismyipaddress.com/'
    t2 = "https://www.bulq.com/"
    t3 = 'https://cultofrick.com'
    target = t1

    driver = webdriver.Firefox()
    #TorBrowserDriver.FirefoxProfile();

    #driver=TorBrowserDriver('/GMDelight/GMDelight/webtools/tor-browser_en-US')
    driver.get(target)
    driver.refresh()
    #print(driver.page_source)
    driver.get_screenshot_as_file(
        '/GMDelight/GMDelight/static/headless_screenshot.png')
    pgsource = str(driver.page_source)
    driver.quit()
    stop_xvfb(xvfb_display)
    return pgsource
示例#12
0
def flight_search():
    parser = argparse.ArgumentParser(description="A headless flight searcher")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-c",
                       "--nocookies",
                       help="disables cookies",
                       action='store_true')
    group.add_argument("-t",
                       "--tor",
                       help="search using TOR",
                       action='store_true')
    parser.add_argument(
        "-f",
        "--file",
        help=
        "store (timestamp,depart hour,lowest price) in a generated archive in \'flight_data/\'",
        action='store_true')
    parser.add_argument("-v",
                        "--verbosity",
                        help="program verbosity",
                        action='store_true')
    parser.add_argument("-hd",
                        "--headless",
                        help="use headless browser",
                        action='store_true')
    parser.add_argument("company",
                        help="select company of the flight",
                        default="iberia")
    parser.add_argument("fromc",
                        help="from (recommended city name or airport code)")
    parser.add_argument("to", help="to (airport code)")
    parser.add_argument("date", help="date (MM/DD/YYYY)")
    args = parser.parse_args()

    v = args.verbosity

    if not set_conf():
        return

    if not internet_on():
        print('Error: There is no Internet Connection')
        return

    if v:
        print('Init of search with %s: %s to %s, %s' %
              (args.company, args.fromc, args.to, args.date))

    comp = args.company.lower()
    prev_wd = os.getcwd()
    try:
        # prepare the driver
        driver, display, tor_process = Driver.prepare_driver(
            args.nocookies, args.tor, v, args.headless)
        if driver is None:
            return
        # select the company search function. This if/elif will grow.
        if (comp == 'iberia'):
            results = Iberia.search(driver, args.fromc, args.to, args.date,
                                    args.nocookies, args.tor, v)
        elif (comp == 'ryanair'):
            results = Ryanair.search(driver, args.fromc, args.to, args.date,
                                     args.nocookies, args.tor, v)
        else:
            print('Error: Company %s not supported' % (comp))
            return

        if not results or not len(results):
            print('No results for %s %s - %s ON DATE %s.' %
                  (args.company.upper(), args.fromc.upper(), args.to.upper(),
                   args.date))
            return
        else:
            print('%s SEARCH RESULTS OF %s - %s ON DATE %s:' %
                  (args.company.upper(), args.fromc.upper(), args.to.upper(),
                   args.date))
            pprint.pprint(results)
        if args.file:
            save_flight_to_text(results, prev_wd, args, v)
            save_flight_database(results, prev_wd, args, v)
    except selenium.common.exceptions.ElementNotInteractableException:
        print('Error: Website currently blocked or different from usual')
        raise
    finally:
        if driver:
            driver.close()
            if args.tor:
                if args.headless:
                    stop_xvfb(display)
                tor_process.kill()
from tbselenium.utils import start_xvfb, stop_xvfb
from tbselenium.tbdriver import TorBrowserDriver
from os.path import join, dirname, realpath

out_img = join(dirname(realpath(__file__)), "headless_screenshot.png")
xvfb_display = start_xvfb()
with TorBrowserDriver(
        '/home/manivannan/pythonexamle/selenium_example/tor-browser_en-US'
) as driver:
    driver.load_url("https://check.torproject.org")
    driver.get_screenshot_as_file(out_img)
    print("Screenshot is saved as %s" % out_img)

stop_xvfb(xvfb_display)
if __name__ == '__main__':
    main()
"""
"""
xvfb_display = start_xvfb()
driver = webdriver.Firefox()
#driver.implicitly_wait(10)
#driver.get("http://www.python.org")
bh="https://www.bulq.com/lots/search/?last_activated_at=2021-05-22T23:40:37.275Z&page=1"
driver.get(bh)
print("page_source")
print(driver.page_source)
#assert "Python" in driver.title
stop_xvfb(xvfb_display)
"""
"""
out_img = join(dirname(realpath(__file__)), "headless_screenshot.png")
xvfb_display = start_xvfb()
with TorBrowserDriver('/home/manivannan/pythonexamle/selenium_example/tor-browser_en-US') as driver:
    driver.load_url("https://check.torproject.org")
    driver.get_screenshot_as_file(out_img)
    print("Screenshot is saved as %s" % out_img)

stop_xvfb(xvfb_display)
"""
"""
with TorBrowserDriver('/etc/tor') as driver:
    driver.load_url("https://check.torproject.org")
    driver.get_screenshot_as_file(out_img)
    print("Screenshot is saved as %s" % out_img)
"""
示例#15
0
def launch_tor_with_custom_stem(datalist, browser):
    print("length of data: ", len(datalist))
    tor_binary = join(cm.TorProxypath, cm.DEFAULT_TOR_BINARY_PATH)
    tor_process, controller = 0, 0
    try:
        TRYTOR_CNT = cm.TRYCNT
        while TRYTOR_CNT > 0 and tor_process == 0 and controller == 0:
            print("try to setup tor:", str(TRYTOR_CNT))
            tor_process, controller = TorSetup(tor_binary)
            TRYTOR_CNT -= 1
        if tor_process == 0:
            raise TorSetupError
        print("finish tor proxy setup...")
        xvfb_display = start_xvfb()  # virtual display
        for ele in datalist:
            t = getTime()
            savepath, out_img = SetOutputPath(ele, t)
            p = 0
            try:
                driver, TRYCNT = 0, cm.TRYCNT
                while driver == 0 and TRYCNT != 0:
                    print("try to setup tbb:", str(TRYCNT))
                    args = (cm.driverpath, controller,
                            ele[2]) if browser == 'TBB' else ()
                    options = {
                        'TBB': TBBSetup,
                        'FF': FFSetup,
                        'CR': ChromeSetup
                    }
                    driver = options[browser](*args)
                    TRYCNT -= 1
                if driver == 0:
                    raise TBBSetupError

                cmd = "tcpdump -i %s tcp and not port ssh -w %s" % (
                    cm.netInterface, savepath)
                print('cmd = ', cmd)
                cmd = cmd.split(' ')
                p = subprocess.Popen(cmd)
                try:
                    timeout(cm.VISITPAGE_TIMEOUT)
                    driver.get('https://' + ele[0])
                    cancel_timeout()
                    time.sleep(cm.DURATION_VISIT_PAGE)
                    p.terminate()
                    if (ele[2] == 0 or ele[2] == 2):
                        driver.get_screenshot_as_file(out_img)
                    writeLog(str(t) + "," + ele[0] + "," + str(ele[2]))
                    print("Finish tcpdump sleep...")
                except TimeExceededError:
                    writeLog("Error crawling," + ele[0] + "," + str(ele[2]) +
                             "\n" + str("Page visit Timeout"))
                finally:
                    cancel_timeout()
            except TBBSetupError:
                print("[crawl.py error]: unable to setup TBB")
                writeLog("[crawl.py error]: unable to setup TBB")
            except Exception as e:
                with open(cm.ErrorFilePath, 'a+') as fw:
                    fw.write(ele[0] + "," + str(e) + "\n")
                writeLog("Error crawling," + ele[0] + "," + str(ele[2]) +
                         "\n" + str(e))
            finally:
                if p != 0 and p.returncode != 0:
                    try:
                        p.terminate()
                    except Exception as e:
                        writeLog("[crawl.py] tcpdump terminate error: " +
                                 str(e))
                if controller != 0:
                    cleanupStream(controller, str(ele[2]), ele[0])
                if driver != 0:
                    try:
                        timeout(30)
                        driver.quit()
                        cancel_timeout()
                    except Exception as e:
                        cancel_timeout()
                        writeLog("[crawl.py] driver quit error: " + str(e))
                if ele[2] != 3:
                    time.sleep(cm.PAUSE_BETWEEN_INSTANCES)
                else:
                    time.sleep(cm.PAUSE_BETWEEN_SITES)
                RemoveTmpFile()
                RemoveProcess()
    except TorSetupError:
        print("[crawl.py] unable to set up tor proxy")
        writeLog("[crawl.py] unable to set up tor proxy")
    except Exception as e:
        print("[crawl.py]launch_tor_with_custom_stem Error")
        print("Error:", str(e))
        writeLog("[crawl.py]launch_tor_with_custom_stem Error : " + str(e))
    finally:
        if tor_process != 0:
            tor_process.kill()
        stop_xvfb(xvfb_display)