def launch_tb_with_custom_stem(tbb_dir): xvfb_display = start_xvfb() socks_port = free_port() control_port = free_port() tor_data_dir = tempfile.mkdtemp() tor_binary = join(tbb_dir, cm.DEFAULT_TOR_BINARY_PATH) print("SOCKS port: %s, Control port: %s" % (socks_port, control_port)) torrc = { 'ControlPort': str(control_port), 'SOCKSPort': str(socks_port), 'DataDirectory': tor_data_dir } tor_process = launch_tbb_tor_with_stem(tbb_path=tbb_dir, torrc=torrc, tor_binary=tor_binary) with Controller.from_port(port=control_port) as controller: controller.authenticate() with TorBrowserDriver(tbb_dir, socks_port=socks_port, control_port=control_port, tor_cfg=cm.USE_STEM) as driver: driver.load_url("https://check.torproject.org", wait_on_page=3) print(driver.find_element_by("h1.on").text) print(driver.find_element_by(".content > p").text) print_tor_circuits(controller) stop_xvfb(xvfb_display) tor_process.kill()
def run(): # Parse arguments args, config = parse_arguments() # build dirs build_crawl_dirs(args.url_file) # Read URLs url_list = parse_url_list(args.url_file, args.start, args.stop) # Configure logger add_log_file_handler(wl_log, cm.DEFAULT_CRAWL_LOG) # Configure controller torrc_config = ut.get_dict_subconfig(config, args.config, "torrc") controller = TorController(cm.TBB_DIR, torrc_dict=torrc_config, pollute=False) # Configure browser ffprefs = ut.get_dict_subconfig(config, args.config, "ffpref") driver = TorBrowserWrapper(cm.TBB_DIR, tbb_logfile_path=cm.DEFAULT_FF_LOG, tor_cfg=USE_RUNNING_TOR, pref_dict=ffprefs, socks_port=int(torrc_config['socksport'])) # Instantiate crawler crawler = crawler_mod.Crawler(driver, controller, args.screenshots, args.device) # Configure crawl job_config = ut.get_dict_subconfig(config, args.config, "job") job = crawler_mod.CrawlJob(job_config, url_list) # Setup stem headless display if args.virtual_display: xvfb_h = int(args.virtual_display.split('x')[0]) xvfb_w = int(args.virtual_display.split('x')[1]) else: xvfb_h = cm.DEFAULT_XVFB_WIN_H xvfb_w = cm.DEFAULT_XVFB_WIN_W xvfb_display = start_xvfb(xvfb_w, xvfb_h) # Run the crawl chdir(cm.CRAWL_DIR) try: crawler.crawl(job) except KeyboardInterrupt: wl_log.warning("Keyboard interrupt! Quitting...") sys.exit(-1) finally: # Post crawl post_crawl() # Close display stop_xvfb(xvfb_display) # die sys.exit(0)
def pytest_sessionfinish(session, exitstatus): xvfb_display = test_conf.get("xvfb_display") tor_process = test_conf.get("tor_process") if xvfb_display: stop_xvfb(xvfb_display) if tor_process: tor_process.kill()
def pytest_sessionfinish(session, exitstatus): xvfb_display = test_conf.get("xvfb_display") tor_process = test_conf.get("tor_process") if xvfb_display: stop_xvfb(xvfb_display) if tor_process: tor_process.kill() rmtree(test_conf["temp_data_dir"], ignore_errors=True)
def headless_visit(tbb_dir): out_img = join(dirname(realpath(__file__)), "headless_screenshot.png") # start a virtual display xvfb_display = start_xvfb() with TorBrowserDriver(tbb_dir) as driver: driver.load_url("https://check.torproject.org") driver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) stop_xvfb(xvfb_display)
def headless_visit(tbb_dir): out_img = join(dirname(realpath(__file__)), "headless_screenshot.png") # start a virtual display xvfb_display = start_xvfb() with TorBrowserDriver(tbb_dir) as driver: for i in range(len(load_table)): start_time = time.clock_gettime_ns(time.CLOCK_REALTIME) driver.load_url(load_table[i][URLS]) end_time = time.clock_gettime_ns(time.CLOCK_REALTIME) driver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) elapsed_time = (end_time - start_time) / 1000000000 print("Load time: ", str(elapsed_time) + "s") load_table[i][VANILLA] = elapsed_time col = -1 for bridge in BRIDGE_TYPE: with TorBrowserDriver(tbb_dir, default_bridge_type=bridge) as bdriver: if bridge == "obfs4": col = 2 print("obfs4..........") if bridge == "meek-azure": col = 3 print("meek-azure..........") if col == -1: break for i in range(len(load_table)): start_time = time.clock_gettime_ns(time.CLOCK_REALTIME) bdriver.load_url(load_table[i][URLS]) end_time = time.clock_gettime_ns(time.CLOCK_REALTIME) bdriver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) elapsed_time = (end_time - start_time) / 1000000000 print("Load time: ", str(elapsed_time) + "s") load_table[i][col] = elapsed_time print("About to print..........") write_csv() stop_xvfb(xvfb_display)
def close(self): self.logger.info("Beginning Crawler exit process...") if "tb_driver" in dir(self): self.logger.info("Closing Tor Browser...") self.tb_driver.quit() if "virtual_framebuffer" in dir(self): self.logger.info("Closing the virtual framebuffer...") # A bug in pyvirtualdisplay triggers a KeyError exception when closing a # virtual framebuffer if the $DISPLAY environment variable is not set. try: stop_xvfb(self.virtual_framebuffer) except KeyError: pass if "cell_log" in dir(self): self.logger.info("Closing the Tor cell stream...") self.cell_log.close() if "tor_process" in dir(self): self.logger.info("Killing the tor process...") self.tor_process.kill() self.logger.info("Crawler exit completed.")
def main(): global workdir desc = "Take a screenshot using TorBrowserDriver" default_url = "https://check.torproject.org" parser = ArgumentParser(description=desc) parser.add_argument('tbb_path') parser.add_argument('output_dir', default=workdir) parser.add_argument('url', nargs='?', default=default_url) args = parser.parse_args() out_img = realpath(join(args.output_dir, "screenshot.png")) if default_url is None: print("ERROR: cannot detect main URL") return 1 xvfb_display = start_xvfb() with TorBrowserDriver(args.tbb_path, headless=True) as driver: visit_and_screenshot(driver, default_url, out_img) stop_xvfb(xvfb_display)
def pullpage(): xvfb_display = start_xvfb() t1 = 'https://whatismyipaddress.com/' t2 = "https://www.bulq.com/" t3 = 'https://cultofrick.com' target = t1 driver = webdriver.Firefox() #TorBrowserDriver.FirefoxProfile(); #driver=TorBrowserDriver('/GMDelight/GMDelight/webtools/tor-browser_en-US') driver.get(target) driver.refresh() #print(driver.page_source) driver.get_screenshot_as_file( '/GMDelight/GMDelight/static/headless_screenshot.png') pgsource = str(driver.page_source) driver.quit() stop_xvfb(xvfb_display) return pgsource
def flight_search(): parser = argparse.ArgumentParser(description="A headless flight searcher") group = parser.add_mutually_exclusive_group() group.add_argument("-c", "--nocookies", help="disables cookies", action='store_true') group.add_argument("-t", "--tor", help="search using TOR", action='store_true') parser.add_argument( "-f", "--file", help= "store (timestamp,depart hour,lowest price) in a generated archive in \'flight_data/\'", action='store_true') parser.add_argument("-v", "--verbosity", help="program verbosity", action='store_true') parser.add_argument("-hd", "--headless", help="use headless browser", action='store_true') parser.add_argument("company", help="select company of the flight", default="iberia") parser.add_argument("fromc", help="from (recommended city name or airport code)") parser.add_argument("to", help="to (airport code)") parser.add_argument("date", help="date (MM/DD/YYYY)") args = parser.parse_args() v = args.verbosity if not set_conf(): return if not internet_on(): print('Error: There is no Internet Connection') return if v: print('Init of search with %s: %s to %s, %s' % (args.company, args.fromc, args.to, args.date)) comp = args.company.lower() prev_wd = os.getcwd() try: # prepare the driver driver, display, tor_process = Driver.prepare_driver( args.nocookies, args.tor, v, args.headless) if driver is None: return # select the company search function. This if/elif will grow. if (comp == 'iberia'): results = Iberia.search(driver, args.fromc, args.to, args.date, args.nocookies, args.tor, v) elif (comp == 'ryanair'): results = Ryanair.search(driver, args.fromc, args.to, args.date, args.nocookies, args.tor, v) else: print('Error: Company %s not supported' % (comp)) return if not results or not len(results): print('No results for %s %s - %s ON DATE %s.' % (args.company.upper(), args.fromc.upper(), args.to.upper(), args.date)) return else: print('%s SEARCH RESULTS OF %s - %s ON DATE %s:' % (args.company.upper(), args.fromc.upper(), args.to.upper(), args.date)) pprint.pprint(results) if args.file: save_flight_to_text(results, prev_wd, args, v) save_flight_database(results, prev_wd, args, v) except selenium.common.exceptions.ElementNotInteractableException: print('Error: Website currently blocked or different from usual') raise finally: if driver: driver.close() if args.tor: if args.headless: stop_xvfb(display) tor_process.kill()
from tbselenium.utils import start_xvfb, stop_xvfb from tbselenium.tbdriver import TorBrowserDriver from os.path import join, dirname, realpath out_img = join(dirname(realpath(__file__)), "headless_screenshot.png") xvfb_display = start_xvfb() with TorBrowserDriver( '/home/manivannan/pythonexamle/selenium_example/tor-browser_en-US' ) as driver: driver.load_url("https://check.torproject.org") driver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) stop_xvfb(xvfb_display)
if __name__ == '__main__': main() """ """ xvfb_display = start_xvfb() driver = webdriver.Firefox() #driver.implicitly_wait(10) #driver.get("http://www.python.org") bh="https://www.bulq.com/lots/search/?last_activated_at=2021-05-22T23:40:37.275Z&page=1" driver.get(bh) print("page_source") print(driver.page_source) #assert "Python" in driver.title stop_xvfb(xvfb_display) """ """ out_img = join(dirname(realpath(__file__)), "headless_screenshot.png") xvfb_display = start_xvfb() with TorBrowserDriver('/home/manivannan/pythonexamle/selenium_example/tor-browser_en-US') as driver: driver.load_url("https://check.torproject.org") driver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) stop_xvfb(xvfb_display) """ """ with TorBrowserDriver('/etc/tor') as driver: driver.load_url("https://check.torproject.org") driver.get_screenshot_as_file(out_img) print("Screenshot is saved as %s" % out_img) """
def launch_tor_with_custom_stem(datalist, browser): print("length of data: ", len(datalist)) tor_binary = join(cm.TorProxypath, cm.DEFAULT_TOR_BINARY_PATH) tor_process, controller = 0, 0 try: TRYTOR_CNT = cm.TRYCNT while TRYTOR_CNT > 0 and tor_process == 0 and controller == 0: print("try to setup tor:", str(TRYTOR_CNT)) tor_process, controller = TorSetup(tor_binary) TRYTOR_CNT -= 1 if tor_process == 0: raise TorSetupError print("finish tor proxy setup...") xvfb_display = start_xvfb() # virtual display for ele in datalist: t = getTime() savepath, out_img = SetOutputPath(ele, t) p = 0 try: driver, TRYCNT = 0, cm.TRYCNT while driver == 0 and TRYCNT != 0: print("try to setup tbb:", str(TRYCNT)) args = (cm.driverpath, controller, ele[2]) if browser == 'TBB' else () options = { 'TBB': TBBSetup, 'FF': FFSetup, 'CR': ChromeSetup } driver = options[browser](*args) TRYCNT -= 1 if driver == 0: raise TBBSetupError cmd = "tcpdump -i %s tcp and not port ssh -w %s" % ( cm.netInterface, savepath) print('cmd = ', cmd) cmd = cmd.split(' ') p = subprocess.Popen(cmd) try: timeout(cm.VISITPAGE_TIMEOUT) driver.get('https://' + ele[0]) cancel_timeout() time.sleep(cm.DURATION_VISIT_PAGE) p.terminate() if (ele[2] == 0 or ele[2] == 2): driver.get_screenshot_as_file(out_img) writeLog(str(t) + "," + ele[0] + "," + str(ele[2])) print("Finish tcpdump sleep...") except TimeExceededError: writeLog("Error crawling," + ele[0] + "," + str(ele[2]) + "\n" + str("Page visit Timeout")) finally: cancel_timeout() except TBBSetupError: print("[crawl.py error]: unable to setup TBB") writeLog("[crawl.py error]: unable to setup TBB") except Exception as e: with open(cm.ErrorFilePath, 'a+') as fw: fw.write(ele[0] + "," + str(e) + "\n") writeLog("Error crawling," + ele[0] + "," + str(ele[2]) + "\n" + str(e)) finally: if p != 0 and p.returncode != 0: try: p.terminate() except Exception as e: writeLog("[crawl.py] tcpdump terminate error: " + str(e)) if controller != 0: cleanupStream(controller, str(ele[2]), ele[0]) if driver != 0: try: timeout(30) driver.quit() cancel_timeout() except Exception as e: cancel_timeout() writeLog("[crawl.py] driver quit error: " + str(e)) if ele[2] != 3: time.sleep(cm.PAUSE_BETWEEN_INSTANCES) else: time.sleep(cm.PAUSE_BETWEEN_SITES) RemoveTmpFile() RemoveProcess() except TorSetupError: print("[crawl.py] unable to set up tor proxy") writeLog("[crawl.py] unable to set up tor proxy") except Exception as e: print("[crawl.py]launch_tor_with_custom_stem Error") print("Error:", str(e)) writeLog("[crawl.py]launch_tor_with_custom_stem Error : " + str(e)) finally: if tor_process != 0: tor_process.kill() stop_xvfb(xvfb_display)