def isolate(self): # Generate a random string, which will be the "namespace" of the # minicluster. This namespace will be used to add random suffixes to # container names so that they do not collide on the same docker # daemon. # TODO: Use this namespace. letters = string.ascii_lowercase rand_str = ''.join(random.choice(letters) for i in range(3)) self._namespace = rand_str + '-' self.cli = Client(base_url="unix://var/run/docker.sock", namespace=self._namespace) # Now we need to randomize the ports. # TODO: Fix race condition between the find_free_port() and the process # that will actually bind to that port. self.config["local_master_port"] = utils.find_free_port() self.config["local_zk_port"] = utils.find_free_port() self.config["local_cassandra_cql_port"] = utils.find_free_port() self.config["local_cassandra_thrift_port"] = utils.find_free_port() self.mesos_agent_ports = utils.randomize_ports(self.mesos_agent_ports) self.resmgr_ports = utils.randomize_ports(self.resmgr_ports) self.hostmgr_ports = utils.randomize_ports(self.hostmgr_ports) self.jobmgr_ports = utils.randomize_ports(self.jobmgr_ports) self.aurorabridge_ports = utils.randomize_ports( self.aurorabridge_ports) self.apiserver_ports = utils.randomize_ports(self.apiserver_ports) self.archiver_ports = utils.randomize_ports(self.archiver_ports) self.placement_ports = utils.randomize_ports(self.placement_ports) self.mockcqos_ports = utils.randomize_ports(self.mockcqos_ports) # TODO: Save those to local disk, or print them to stdout. return self._namespace
def __init__(self, take_ownership=True, # Tor dies when the Crawler does torrc_config={"CookieAuth": "1"}, tor_log="/var/log/tor/tor.log", tor_cell_log="/var/log/tor/tor_cell_seq.log", control_port=9051, socks_port=9050, run_in_xvfb=True, tbb_path=join("/opt","tbb","tor-browser_en-US"), tb_log_path=join(_log_dir,"firefox.log"), tb_tor_cfg=USE_RUNNING_TOR, page_load_timeout=20, wait_on_page=5, wait_after_closing_circuits=0, restart_on_sketchy_exception=True, additional_control_fields={}, db_handler=None): self.logger = setup_logging(_log_dir, "crawler") self.torrc_config = torrc_config self.socks_port = find_free_port(socks_port, control_port) self.torrc_config.update({"SocksPort": str(self.socks_port)}) self.control_port = find_free_port(control_port, self.socks_port) self.torrc_config.update({"ControlPort": str(self.control_port)}) self.torrc_config.update({"Log": "INFO file {}".format(tor_log)}) self.logger.info("Starting tor process with config " "{torrc_config}.".format(**locals())) self.tor_process = launch_tor_with_config(config=self.torrc_config, take_ownership=take_ownership) self.authenticate_to_tor_controlport() self.logger.info("Opening cell log stream...") self.cell_log = open(tor_cell_log, "rb") if run_in_xvfb: self.logger.info("Starting Xvfb...") self.run_in_xvfb = True self.virtual_framebuffer = start_xvfb() self.logger.info("Starting Tor Browser...") self.tb_driver = TorBrowserDriver(tbb_path=tbb_path, tor_cfg=tb_tor_cfg, tbb_logfile_path=tb_log_path, socks_port=self.socks_port, control_port=self.control_port) self.wait_after_closing_circuits = wait_after_closing_circuits self.page_load_timeout = page_load_timeout self.tb_driver.set_page_load_timeout(page_load_timeout) self.wait_on_page = wait_on_page self.restart_on_sketchy_exception = restart_on_sketchy_exception self.control_data = self.get_control_data(page_load_timeout, wait_on_page, wait_after_closing_circuits, additional_control_fields) self.db_handler = db_handler if db_handler: self.crawlid = self.db_handler.add_crawl(self.control_data)
def main(): videoPath = str(sys.argv[1]) print("Your ip is: {}".format(utils.get_ip())) commands = [] Collector_Receiving_Ports = [] Collector_Sending_Ports = [] # Generate needed random free ports producerPort = str(utils.get_ip()) + ":" + str(utils.find_free_port()) for i in range(math.ceil(utils.N / 2)): Collector_Receiving_Ports.append( str(utils.get_ip()) + ":" + str(utils.find_free_port())) Collector_Sending_Ports.append( str(utils.get_ip()) + ":" + str(utils.find_free_port())) # Send Collector Ports to second computer try: ipPortConnecton = str(utils.SENDER) + ":" + utils.CONNECTION_PORT senderSocket, senderContext = utils.configure_port( ipPortConnecton, zmq.PUSH, "bind") data = pickle.dumps(Collector_Sending_Ports) senderSocket.send(data) print("Ports data has been sent...") except: print("Machine 1 (Sender) ERROR IN SENDING CONNECTION DATA, " + "Try Chaning the CONNECTION_PORT in utils.py file") # Generate needed Processes # Generate Producer commands.append('python Producer.py {} {}'.format(videoPath, producerPort)) # Generate N Consumers1 for i in range(utils.N): commands.append('python Consumer1.py {} {}'.format( producerPort, Collector_Receiving_Ports[math.floor(i / 2)])) # Generate N / 2 Collector for i in range(math.ceil(utils.N / 2)): commands.append('python Collector.py {} {}'.format( Collector_Receiving_Ports[i], Collector_Sending_Ports[i])) # Run in parallel processes = [Popen(cmd, shell=True) for cmd in commands] for p in processes: p.wait() senderSocket.close() senderContext.destroy()
async def srs(loop, port=None): # create app handler and closing event closing_time = asyncio.Event() closing_task = asyncio.create_task(closing_time.wait()) closers.append(closing_time) handler = ClosingHandler(closing_time) # add handler to new application runner app = web.Application() app.add_routes([web.get("/", handler.default)]) runner = web.AppRunner(app) await runner.setup() # start app on a specific tcp port if port == None: port = find_free_port() site = web.TCPSite(runner, "0.0.0.0", port) logging.info(f"{site.name} starting") try: await site.start() except OSError: logging.warn(f"port {port} already in use, trying a different one") await srs(loop) # wait for closing event await closing_task logging.info(f"{site.name} closing") await runner.cleanup() loop.create_task(srs(loop, handler.next_port))
async def default(self, request): self.exit_event.set() self.next_port = find_free_port() data = { "now": request.url.port, "next": self.next_port, "flag-slice": random_flag_index(), } return web.json_response(data)
def test_run_container(channel, img, gpu): with TempDir() as tmp: args = Arguments( channel, img, tmp, None, False, "", gpu, True, False, False, "us-docker.pkg.dev/android-emulator-268719/images", False, False, ) emu_docker.accept_licenses(args) devices = emu_docker.create_docker_image(args) assert devices for device in devices: port = find_free_port() # Launch this thing. container = device.launch({"5555/tcp": port}) # Now we are going to insepct this thing. api_client = device.get_api_client() status = api_client.inspect_container(container.id) state = status["State"] assert state["Status"] == "running" # Acceptable states: # starting --> We are still launching # healthy --> Yay, we booted! Good to go.. health = state["Health"]["Status"] while health == "starting": health = api_client.inspect_container( container.id)["State"]["Health"]["Status"] assert health == "healthy" # Good, good.. From an internal perspective things look great. # Can we connect with adb from outside the container? adb = find_adb() # Erase knowledge of existing devices. subprocess.check_output([adb, "kill-server"]) name = "localhost:{}".format(port) subprocess.check_output([adb, "connect", name]) # Boot complete should be true.. res = subprocess.check_output( [adb, "-s", name, "shell", "getprop", "dev.bootcomplete"]) assert "1" in str(res) api_client.stop(container.id)
def __init__(self, zone, id, tracker, build_indexes): self.zone = zone self.id = id self.tracker = tracker self.build_indexes = build_indexes self.torrent_client_port = find_free_port() self.registry_port = find_free_port() self.port = find_free_port() self.config_file = 'test-{zone}.yaml'.format(zone=zone) self.name = 'kraken-agent-{id}-{zone}'.format(id=id, zone=zone) populate_config_template( 'agent', self.config_file, trackers=yaml_list([self.tracker.addr]), build_indexes=yaml_list([bi.addr for bi in self.build_indexes])) self.volumes = create_volumes('agent', self.name) self.start()
def __init__(self, zone, id, tracker, build_indexes, with_docker_socket=False): self.zone = zone self.id = id self.tracker = tracker self.build_indexes = build_indexes self.torrent_client_port = find_free_port() self.registry_port = find_free_port() self.port = find_free_port() self.config_file = 'test-{zone}.yaml'.format(zone=zone) self.name = 'kraken-agent-{id}-{zone}'.format(id=id, zone=zone) self.with_docker_socket = with_docker_socket populate_config_template('agent', self.config_file, trackers=yaml_list([self.tracker.addr]), build_indexes=yaml_list( [bi.addr for bi in self.build_indexes])) if self.with_docker_socket: # In aditional to the need to mount docker socket, also avoid using # local cache volume, otherwise the process would run as root and # create local cache files that's hard to clean outside of the # container. self.volumes = create_volumes('agent', self.name, local_cache=False) self.volumes['/var/run/docker.sock'] = { 'bind': '/var/run/docker.sock', 'mode': 'rw', } else: self.volumes = create_volumes('agent', self.name) self.start()
def __init__(self, zone, origin_cluster): self.zone = zone self.origin_cluster = origin_cluster self.port = find_free_port() self.config_file = 'test-{zone}.yaml'.format(zone=zone) self.name = 'kraken-tracker-{zone}'.format(zone=zone) populate_config_template( 'tracker', self.config_file, origins=yaml_list([o.addr for o in self.origin_cluster.origins])) self.volumes = create_volumes('tracker', self.name) self.start()
def __init__(self, zone, origin_cluster, build_indexes): self.zone = zone self.origin_cluster = origin_cluster self.build_indexes = build_indexes self.port = find_free_port() self.config_file = 'test-{zone}.yaml'.format(zone=zone) self.name = 'kraken-proxy-{zone}'.format(zone=zone) populate_config_template( 'proxy', self.config_file, build_indexes=yaml_list([bi.addr for bi in self.build_indexes]), origins=yaml_list([o.addr for o in self.origin_cluster.origins])) self.volumes = create_volumes('proxy', self.name) self.start()
def test_run_container(channel, img): assert not "linux" in sys.platform assert docker.from_env().ping() with TempDir() as tmp: args = Arguments(channel, img, tmp, None, False, "") device = emu_docker.create_docker_image(args) port = find_free_port() # Launch this thing. device.launch(device.identity, port) # Now we are going to insepct this thing. api_client = device.get_api_client() status = api_client.inspect_container(device.container.id) state = status["State"] assert state["Status"] == "running" # Acceptable states: # starting --> We are still launching # healthy --> Yay, we booted! Good to go.. health = state["Health"]["Status"] while health == "starting": health = api_client.inspect_container( device.container.id)["State"]["Health"]["Status"] assert health == "healthy" # Good, good.. From an internal perspective things look great. # Can we connect with adb from outside the container? adb = find_adb() # Erase knowledge of existing devices. subprocess.check_output([adb, "kill-server"]) name = "localhost:{}".format(port) subprocess.check_output([adb, "connect", name]) # Boot complete should be true.. res = subprocess.check_output( [adb, "-s", name, "shell", "getprop", "dev.bootcomplete"]) assert "1" in str(res) api_client.stop(device.container.id)
def __init__(self, take_ownership=True, # Tor dies when the Sorter does torrc_config={"ControlPort": "9051", "CookieAuth": "1"}, socks_port=9050, page_load_timeout=20, max_tasks=10, db_handler=None): self.logger = setup_logging(_log_dir, "sorter") self.db_handler = db_handler self.logger.info("Opening event loop for Sorter...") self.loop = asyncio.get_event_loop() self.max_tasks = max_tasks self.logger.info("Creating Sorter queue...") self.q = asyncio.Queue() # Start tor and create an aiohttp tor connector self.torrc_config = torrc_config self.socks_port = str(find_free_port(socks_port)) self.torrc_config.update({"SocksPort": self.socks_port}) self.logger.info("Starting tor process with config " "{self.torrc_config}.".format(**locals())) self.tor_process = launch_tor_with_config(config=self.torrc_config, take_ownership=take_ownership) onion_proxy = aiosocks.Socks5Addr('127.0.0.1', socks_port) conn = SocksConnector(proxy=onion_proxy, remote_resolve=True) # aiohttp's ClientSession does connection pooling and HTTP keep-alives # for us self.logger.info("Creating aiohttp ClientSession with our event loop " "and tor proxy connector...") self.session = aiohttp.ClientSession(loop=self.loop, connector=conn) # Pretend we're Tor Browser in order to get rejected by less sites/WAFs u = "Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0" self.headers = {'user-agent': u} self.page_load_timeout = page_load_timeout
def main(): outputPath = str(sys.argv[1]) print("Your ip is: {}".format(utils.get_ip())) commands = [] Collector_Sending_Ports = [] # Generate needed random free ports finalCollectorPort = str(utils.get_ip()) + ":" + \ str(utils.find_free_port()) # Recieve Collector Ports from frst computer # try: ipPortConnecton = str(utils.SENDER) + ":" + utils.CONNECTION_PORT recieverSocket, recieverContext = utils.configure_port( ipPortConnecton, zmq.PULL, "connect") Collector_Sending_Ports = pickle.loads(recieverSocket.recv()) print("Port has been recieved from the sender's collector") # except: # print("Machine 2 (Reciever) ERROR IN RECIVING CONNECTION DATA," + # "Try Chaning the CONNECTION_PORT in utils.py file") # Generate needed Processes # Generate N Consumers2 for i in range(utils.N): commands.append('python Consumer2.py {} {}'.format( Collector_Sending_Ports[int(math.floor(i/2))], finalCollectorPort)) # Generate Final Collector commands.append('python Final_Collector.py {} {}'.format( outputPath, finalCollectorPort)) # Run in parallel processes = [Popen(cmd, shell=True) for cmd in commands] for p in processes: p.wait() recieverSocket.close() recieverContext.destroy()
def main(): omniscript_path = os.path.dirname(__file__) omnisci_server = None args = None port_default_value = -1 parser = argparse.ArgumentParser( description="Run internal tests from ibis project") required = parser._action_groups.pop() optional = parser.add_argument_group("optional arguments") omnisci = parser.add_argument_group("omnisci") benchmark = parser.add_argument_group("benchmark") mysql = parser.add_argument_group("mysql") commits = parser.add_argument_group("commits") possible_tasks = ["build", "test", "benchmark"] benchmarks = ["ny_taxi", "santander", "census", "plasticc"] # Task required.add_argument( "-task", dest="task", required=True, help= f"Task for execute {possible_tasks}. Use , separator for multiple tasks", ) # Environment required.add_argument("-en", "--env_name", dest="env_name", help="Conda env name.") optional.add_argument( "-ec", "--env_check", dest="env_check", default=False, type=str_arg_to_bool, help="Check if env exists. If it exists don't recreate.", ) optional.add_argument( "-s", "--save_env", dest="save_env", default=False, type=str_arg_to_bool, help="Save conda env after executing.", ) optional.add_argument( "-r", "--report_path", dest="report_path", default=os.path.join(omniscript_path, ".."), help="Path to report file.", ) optional.add_argument( "-ci", "--ci_requirements", dest="ci_requirements", default=os.path.join(omniscript_path, "ci_requirements.yml"), help="File with ci requirements for conda env.", ) optional.add_argument( "-py", "--python_version", dest="python_version", default="3.7", help="File with ci requirements for conda env.", ) # Ibis required.add_argument( "-i", "--ibis_path", dest="ibis_path", required=True, help="Path to ibis directory.", ) # Ibis tests optional.add_argument( "-expression", dest="expression", default=" ", help= "Run tests which match the given substring test names and their parent " "classes. Example: 'test_other', while 'not test_method' matches those " "that don't contain 'test_method' in their names.", ) # Omnisci server parameters omnisci.add_argument( "-executable", dest="executable", required=True, help="Path to omnisci_server executable.", ) omnisci.add_argument( "--omnisci_cwd", dest="omnisci_cwd", help="Path to omnisci working directory. " "By default parent directory of executable location is used. " "Data directory is used in this location.", ) omnisci.add_argument( "-port", dest="port", default=port_default_value, type=int, help="TCP port number to run omnisci_server on.", ) omnisci.add_argument( "-http_port", dest="http_port", default=port_default_value, type=int, help="HTTP port number to run omnisci_server on.", ) omnisci.add_argument( "-calcite_port", dest="calcite_port", default=port_default_value, type=int, help="Calcite port number to run omnisci_server on.", ) omnisci.add_argument( "-user", dest="user", default="admin", help="User name to use on omniscidb server.", ) omnisci.add_argument( "-password", dest="password", default="HyperInteractive", help="User password to use on omniscidb server.", ) omnisci.add_argument( "-database_name", dest="database_name", default="agent_test_ibis", help="Database name to use in omniscidb server.", ) omnisci.add_argument( "-table", dest="table", default="benchmark_table", help="Table name name to use in omniscidb server.", ) omnisci.add_argument( "-ipc_conn", dest="ipc_connection", default=True, type=str_arg_to_bool, help="Table name name to use in omniscidb server.", ) # Benchmark parameters benchmark.add_argument( "-bench_name", dest="bench_name", choices=benchmarks, help="Benchmark name.", ) benchmark.add_argument( "-data_file", dest="data_file", help="A datafile that should be loaded.", ) benchmark.add_argument( "-dfiles_num", dest="dfiles_num", default=1, type=int, help="Number of datafiles to input into database for processing.", ) benchmark.add_argument( "-iterations", dest="iterations", default=1, type=int, help= "Number of iterations to run every query. Best result is selected.", ) benchmark.add_argument("-dnd", default=False, type=str_arg_to_bool, help="Do not delete old table.") benchmark.add_argument( "-dni", default=False, type=str_arg_to_bool, help="Do not create new table and import any data from CSV files.", ) benchmark.add_argument( "-validation", dest="validation", default=False, type=str_arg_to_bool, help= "validate queries results (by comparison with Pandas queries results).", ) benchmark.add_argument( "-optimizer", choices=["intel", "stock"], dest="optimizer", default="intel", help="Which optimizer is used", ) benchmark.add_argument( "-no_ibis", default=False, type=str_arg_to_bool, help="Do not run Ibis benchmark, run only Pandas (or Modin) version", ) benchmark.add_argument( "-pandas_mode", choices=["Pandas", "Modin_on_ray", "Modin_on_dask", "Modin_on_python"], default="Pandas", help="Specifies which version of Pandas to use: " "plain Pandas, Modin runing on Ray or on Dask", ) benchmark.add_argument( "-ray_tmpdir", default="/tmp", help="Location where to keep Ray plasma store. " "It should have enough space to keep -ray_memory", ) benchmark.add_argument( "-ray_memory", default=200 * 1024 * 1024 * 1024, help="Size of memory to allocate for Ray plasma store", ) benchmark.add_argument( "-no_ml", default=False, type=str_arg_to_bool, help="Do not run machine learning benchmark, only ETL part", ) optional.add_argument( "-gpu_memory", dest="gpu_memory", type=int, help="specify the memory of your gpu, default 16. " "(This controls the lines to be used. Also work for CPU version. )", default=16, ) # MySQL database parameters mysql.add_argument( "-db_server", dest="db_server", default="localhost", help="Host name of MySQL server.", ) mysql.add_argument( "-db_port", dest="db_port", default=3306, type=int, help="Port number of MySQL server.", ) mysql.add_argument( "-db_user", dest="db_user", help="Username to use to connect to MySQL database. " "If user name is specified, script attempts to store results in MySQL " "database using other -db-* parameters.", ) mysql.add_argument( "-db_pass", dest="db_pass", default="omniscidb", help="Password to use to connect to MySQL database.", ) mysql.add_argument( "-db_name", dest="db_name", default="omniscidb", help="MySQL database to use to store benchmark results.", ) optional.add_argument( "-db_table_etl", dest="db_table_etl", help="Table to use to store ETL results for this benchmark.", ) optional.add_argument( "-db_table_ml", dest="db_table_ml", help="Table to use to store ML results for this benchmark.", ) # Additional information commits.add_argument( "-commit_omnisci", dest="commit_omnisci", default="1234567890123456789012345678901234567890", help="Omnisci commit hash to use for tests.", ) commits.add_argument( "-commit_ibis", dest="commit_ibis", default="1234567890123456789012345678901234567890", help="Ibis commit hash to use for tests.", ) try: args = parser.parse_args() os.environ["IBIS_TEST_OMNISCIDB_DATABASE"] = args.database_name os.environ["IBIS_TEST_DATA_DB"] = args.database_name os.environ["IBIS_TEST_OMNISCIDB_PORT"] = str(args.port) os.environ["PYTHONIOENCODING"] = "UTF-8" os.environ["PYTHONUNBUFFERED"] = "1" if args.port == port_default_value: args.port = find_free_port() if args.http_port == port_default_value: args.http_port = find_free_port() if args.calcite_port == port_default_value: args.calcite_port = find_free_port() required_tasks = args.task.split(",") tasks = {} for task in possible_tasks: tasks[task] = True if task in required_tasks else False if True not in list(tasks.values()): print( f"Only {list(tasks.keys())} are supported, {required_tasks} cannot find possible tasks" ) sys.exit(1) if args.python_version not in ["3.7", "3,6"]: print( f"Only 3.7 and 3.6 python versions are supported, {args.python_version} is not supported" ) sys.exit(1) ibis_requirements = os.path.join( args.ibis_path, "ci", f"requirements-{args.python_version}-dev.yml") requirements_file = "requirements.yml" conda_env = CondaEnvironment(args.env_name) print("PREPARING ENVIRONMENT") combinate_requirements(ibis_requirements, args.ci_requirements, requirements_file) conda_env.create(args.env_check, requirements_file=requirements_file) if tasks["build"]: install_ibis_cmdline = [ "python3", os.path.join("setup.py"), "install" ] print("IBIS INSTALLATION") conda_env.run(install_ibis_cmdline, cwd=args.ibis_path, print_output=False) if tasks["test"]: ibis_data_script = os.path.join(args.ibis_path, "ci", "datamgr.py") dataset_download_cmdline = [ "python3", ibis_data_script, "download" ] dataset_import_cmdline = [ "python3", ibis_data_script, "omniscidb", "-P", str(args.port), "--database", args.database_name, ] report_file_name = ( f"report-{args.commit_ibis[:8]}-{args.commit_omnisci[:8]}.html" ) if not os.path.isdir(args.report_path): os.makedirs(args.report_path) report_file_path = os.path.join(args.report_path, report_file_name) ibis_tests_cmdline = [ "pytest", "-m", "omniscidb", "--disable-pytest-warnings", "-k", args.expression, f"--html={report_file_path}", ] print("STARTING OMNISCI SERVER") omnisci_server = OmnisciServer( omnisci_executable=args.executable, omnisci_port=args.port, http_port=args.http_port, calcite_port=args.calcite_port, database_name=args.database_name, omnisci_cwd=args.omnisci_cwd, user=args.user, password=args.password, ) omnisci_server.launch() print("PREPARING DATA") conda_env.run(dataset_download_cmdline) conda_env.run(dataset_import_cmdline) print("RUNNING TESTS") conda_env.run(ibis_tests_cmdline, cwd=args.ibis_path) if tasks["benchmark"]: # if not args.bench_name or args.bench_name not in benchmarks: # print( # f"Benchmark {args.bench_name} is not supported, only {benchmarks} are supported") # sys.exit(1) if not args.data_file: print( f"Parameter --data_file was received empty, but it is required for benchmarks" ) sys.exit(1) benchmark_script_path = os.path.join(omniscript_path, "run_ibis_benchmark.py") benchmark_cmd = ["python3", benchmark_script_path] possible_benchmark_args = [ "bench_name", "data_file", "dfiles_num", "iterations", "dnd", "dni", "validation", "optimizer", "no_ibis", "pandas_mode", "ray_tmpdir", "ray_memory", "no_ml", "gpu_memory", "db_server", "db_port", "db_user", "db_pass", "db_name", "db_table_etl", "db_table_ml", "executable", "omnisci_cwd", "port", "http_port", "calcite_port", "user", "password", "ipc_connection", "database_name", "table", "commit_omnisci", "commit_ibis", ] args_dict = vars(args) args_dict["data_file"] = f"'{args_dict['data_file']}'" for arg_name in list(parser._option_string_actions.keys()): try: pure_arg = re.sub(r"^--*", "", arg_name) if pure_arg in possible_benchmark_args: arg_value = args_dict[pure_arg] if arg_value: benchmark_cmd.extend([arg_name, str(arg_value)]) except KeyError: pass print(benchmark_cmd) conda_env.run(benchmark_cmd) except Exception: traceback.print_exc(file=sys.stdout) sys.exit(1) finally: if omnisci_server: omnisci_server.terminate() if args and args.save_env is False: conda_env.remove()
def __init__( self, take_ownership=True, # Tor dies when the Crawler does torrc_config={"CookieAuth": "1"}, tor_log="/var/log/tor/tor.log", tor_cell_log="/var/log/tor/tor_cell_seq.log", control_port=9051, socks_port=9050, run_in_xvfb=True, tbb_path=join("/opt", "tbb", "tor-browser_en-US"), tb_log_path=join(_log_dir, "firefox.log"), tb_tor_cfg=USE_RUNNING_TOR, page_load_timeout=20, wait_on_page=5, wait_after_closing_circuits=0, restart_on_sketchy_exception=True, additional_control_fields={}, db_handler=None): self.logger = setup_logging(_log_dir, "crawler") # Set stem logging level to INFO - "high level library activity" stem.util.log.get_logger().setLevel(stem.util.log.Runlevel.INFO) self.torrc_config = torrc_config self.socks_port = find_free_port(socks_port, control_port) self.torrc_config.update({"SocksPort": str(self.socks_port)}) self.control_port = find_free_port(control_port, self.socks_port) self.torrc_config.update({"ControlPort": str(self.control_port)}) self.torrc_config.update({"Log": "INFO file {}".format(tor_log)}) self.logger.info("Starting tor process with config " "{torrc_config}.".format(**locals())) self.tor_process = launch_tor_with_config( config=self.torrc_config, take_ownership=take_ownership) self.authenticate_to_tor_controlport() self.logger.info("Opening cell log stream...") self.cell_log = open(tor_cell_log, "rb") if run_in_xvfb: self.logger.info("Starting Xvfb...") self.run_in_xvfb = True self.virtual_framebuffer = start_xvfb() self.logger.info("Starting Tor Browser...") self.tb_driver = TorBrowserDriver(tbb_path=tbb_path, tor_cfg=tb_tor_cfg, tbb_logfile_path=tb_log_path, socks_port=self.socks_port, control_port=self.control_port) self.wait_after_closing_circuits = wait_after_closing_circuits self.page_load_timeout = page_load_timeout self.tb_driver.set_page_load_timeout(page_load_timeout) self.wait_on_page = wait_on_page self.restart_on_sketchy_exception = restart_on_sketchy_exception self.control_data = self.get_control_data(page_load_timeout, wait_on_page, wait_after_closing_circuits, additional_control_fields) self.db_handler = db_handler if db_handler: self.crawlid = self.db_handler.add_crawl(self.control_data)
def main(): omniscript_path = os.path.dirname(__file__) args = None omnisci_server = None port_default_value = -1 benchmarks = ["ny_taxi", "santander", "census", "plasticc"] parser = argparse.ArgumentParser( description="Run internal tests from ibis project") optional = parser._action_groups.pop() required = parser.add_argument_group("required arguments") parser._action_groups.append(optional) required.add_argument( "-bench_name", dest="bench_name", choices=benchmarks, help="Benchmark name.", required=True, ) required.add_argument( "-data_file", dest="data_file", help="A datafile that should be loaded.", required=True, ) optional.add_argument( "-dfiles_num", dest="dfiles_num", default=1, type=int, help="Number of datafiles to input into database for processing.", ) optional.add_argument( "-iterations", dest="iterations", default=1, type=int, help= "Number of iterations to run every query. Best result is selected.", ) optional.add_argument("-dnd", default=False, type=str_arg_to_bool, help="Do not delete old table.") optional.add_argument( "-dni", default=False, type=str_arg_to_bool, help="Do not create new table and import any data from CSV files.", ) optional.add_argument( "-validation", dest="validation", default=False, type=str_arg_to_bool, help= "validate queries results (by comparison with Pandas queries results).", ) optional.add_argument( "-optimizer", choices=["intel", "stock"], dest="optimizer", default="intel", help="Which optimizer is used", ) optional.add_argument( "-no_ibis", default=False, type=str_arg_to_bool, help="Do not run Ibis benchmark, run only Pandas (or Modin) version", ) optional.add_argument( "-pandas_mode", choices=["Pandas", "Modin_on_ray", "Modin_on_dask", "Modin_on_python"], default="Pandas", help= "Specifies which version of Pandas to use: plain Pandas, Modin runing on Ray or on Dask", ) optional.add_argument( "-ray_tmpdir", default="/tmp", help= "Location where to keep Ray plasma store. It should have enough space to keep -ray_memory", ) optional.add_argument( "-ray_memory", default=200 * 1024 * 1024 * 1024, help="Size of memory to allocate for Ray plasma store", ) optional.add_argument( "-no_ml", default=False, type=str_arg_to_bool, help="Do not run machine learning benchmark, only ETL part", ) optional.add_argument( "-gpu_memory", dest="gpu_memory", type=int, help= "specify the memory of your gpu, default 16. (This controls the lines to be used. Also work for CPU version. )", default=16, ) # MySQL database parameters optional.add_argument( "-db_server", dest="db_server", default="localhost", help="Host name of MySQL server.", ) optional.add_argument( "-db_port", dest="db_port", default=3306, type=int, help="Port number of MySQL server.", ) optional.add_argument( "-db_user", dest="db_user", help="Username to use to connect to MySQL database. " "If user name is specified, script attempts to store results in MySQL " "database using other -db-* parameters.", ) optional.add_argument( "-db_pass", dest="db_pass", default="omniscidb", help="Password to use to connect to MySQL database.", ) optional.add_argument( "-db_name", dest="db_name", default="omniscidb", help="MySQL database to use to store benchmark results.", ) optional.add_argument( "-db_table_etl", dest="db_table_etl", help="Table to use to store ETL results for this benchmark.", ) optional.add_argument( "-db_table_ml", dest="db_table_ml", help="Table to use to store ML results for this benchmark.", ) # Omnisci server parameters optional.add_argument( "-executable", dest="executable", help="Path to omnisci_server executable.", ) optional.add_argument( "-omnisci_cwd", dest="omnisci_cwd", help="Path to omnisci working directory. " "By default parent directory of executable location is used. " "Data directory is used in this location.", ) optional.add_argument( "-port", dest="port", default=port_default_value, type=int, help="TCP port number to run omnisci_server on.", ) optional.add_argument( "-http_port", dest="http_port", default=port_default_value, type=int, help="HTTP port number to run omnisci_server on.", ) optional.add_argument( "-calcite_port", dest="calcite_port", default=port_default_value, type=int, help="Calcite port number to run omnisci_server on.", ) optional.add_argument( "-user", dest="user", default="admin", help="User name to use on omniscidb server.", ) optional.add_argument( "-password", dest="password", default="HyperInteractive", help="User password to use on omniscidb server.", ) optional.add_argument( "-database_name", dest="database_name", default="omnisci", help="Database name to use in omniscidb server.", ) optional.add_argument( "-table", dest="table", default="benchmark_table", help="Table name name to use in omniscidb server.", ) optional.add_argument( "-ipc_conn", dest="ipc_connection", default=True, type=str_arg_to_bool, help="Table name name to use in omniscidb server.", ) # Additional information optional.add_argument( "-commit_omnisci", dest="commit_omnisci", default="1234567890123456789012345678901234567890", help="Omnisci commit hash to use for benchmark.", ) optional.add_argument( "-commit_ibis", dest="commit_ibis", default="1234567890123456789012345678901234567890", help="Ibis commit hash to use for benchmark.", ) try: os.environ["PYTHONIOENCODING"] = "UTF-8" os.environ["PYTHONUNBUFFERED"] = "1" omnisci_server_worker = None args = parser.parse_args() if args.port == port_default_value: args.port = find_free_port() if args.http_port == port_default_value: args.http_port = find_free_port() if args.calcite_port == port_default_value: args.calcite_port = find_free_port() if args.bench_name == "ny_taxi": from taxi import run_benchmark elif args.bench_name == "santander": from santander import run_benchmark elif args.bench_name == "census": from census import run_benchmark elif args.bench_name == "plasticc": from plasticc import run_benchmark parameters = { "data_file": args.data_file, "dfiles_num": args.dfiles_num, "no_ml": args.no_ml, "no_ibis": args.no_ibis, "optimizer": args.optimizer, "pandas_mode": args.pandas_mode, "ray_tmpdir": args.ray_tmpdir, "ray_memory": args.ray_memory, "gpu_memory": args.gpu_memory, } if not args.no_ibis: if args.executable is None: parser.error( "Omnisci executable should be specified with -e/--executable for Ibis part" ) omnisci_server = OmnisciServer( omnisci_executable=args.executable, omnisci_port=args.port, http_port=args.http_port, calcite_port=args.calcite_port, database_name=args.database_name, user=args.user, password=args.password, ) parameters["database_name"] = args.database_name parameters["table"] = args.table parameters["dnd"] = args.dnd parameters["dni"] = args.dni parameters["validation"] = args.validation etl_results = [] ml_results = [] print(parameters) run_id = int(round(time.time())) for iter_num in range(1, args.iterations + 1): print(f"Iteration #{iter_num}") if not args.no_ibis: omnisci_server_worker = OmnisciServerWorker(omnisci_server) parameters["omnisci_server_worker"] = omnisci_server_worker parameters["ipc_connection"] = args.ipc_connection omnisci_server.launch() result = run_benchmark(parameters) if not args.no_ibis: omnisci_server.terminate() for backend_res in result["ETL"]: if backend_res: backend_res["Iteration"] = iter_num backend_res["run_id"] = run_id etl_results.append(backend_res) for backend_res in result["ML"]: if backend_res: backend_res["Iteration"] = iter_num backend_res["run_id"] = run_id ml_results.append(backend_res) # Reporting to MySQL database if args.db_user is not None: if iter_num == 1: db = mysql.connector.connect( host=args.db_server, port=args.db_port, user=args.db_user, passwd=args.db_pass, db=args.db_name, ) reporting_init_fields = { "OmnisciCommitHash": args.commit_omnisci, "IbisCommitHash": args.commit_ibis } reporting_fields_benchmark_etl = { x: "VARCHAR(500) NOT NULL" for x in etl_results[0] } if len(etl_results) is not 1: reporting_fields_benchmark_etl.update({ x: "VARCHAR(500) NOT NULL" for x in etl_results[1] }) db_reporter_etl = DbReport(db, args.db_table_etl, reporting_fields_benchmark_etl, reporting_init_fields) if len(ml_results) is not 0: reporting_fields_benchmark_ml = { x: "VARCHAR(500) NOT NULL" for x in ml_results[0] } if len(ml_results) is not 1: reporting_fields_benchmark_ml.update({ x: "VARCHAR(500) NOT NULL" for x in ml_results[1] }) db_reporter_ml = DbReport( db, args.db_table_ml, reporting_fields_benchmark_ml, reporting_init_fields) for result_etl in etl_results: db_reporter_etl.submit(result_etl) if len(ml_results) is not 0: for result_ml in ml_results: db_reporter_ml.submit(result_ml) except Exception: traceback.print_exc(file=sys.stdout) sys.exit(1) finally: if omnisci_server_worker: omnisci_server_worker.terminate()
def __init__(self, name): self.name = name self.hostname = get_docker_bridge() self.port_rez = PortReservation() self.peer_port = find_free_port()
def __init__(self, zone): self.zone = zone self.port = find_free_port() self.start()