def test_args_default_resource_limits(test_microvm_with_initrd): """ Test the default resource limits are correctly set by the jailer. @type: security """ test_microvm = test_microvm_with_initrd test_microvm.spawn() # Get firecracker's PID pid = int(test_microvm.jailer_clone_pid) assert pid != 0 # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE) # Check that the default limit was set. assert soft == 2048 assert hard == 2048 # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE) # Check that no limit was set assert soft == -1 assert hard == -1
def __enter__(self): if self._has_entered: raise RuntimeError( "You can not use the same AsyncEvaluator in two different contexts." ) self._has_entered = True self._input = self._queue_manager.Queue() self._output = self._queue_manager.Queue() log.debug( f"Process {self._main_process.pid} starting {self._n_jobs} subprocesses." ) for _ in range(self._n_jobs): mp_process = multiprocessing.Process( target=evaluator_daemon, args=(self._input, self._output, AsyncEvaluator.defaults), daemon=True, ) mp_process.start() subprocess = psutil.Process(mp_process.pid) self._processes.append(subprocess) if resource and AsyncEvaluator.memory_limit_mb: limit = AsyncEvaluator.memory_limit_mb * (2**20) resource.prlimit(subprocess.pid, resource.RLIMIT_AS, (limit, limit)) self._log_memory_usage() return self
def proc_rlimit( proc: "Process", res: int, new_limits: Optional[Tuple[int, int]] = None) -> Tuple[int, int]: if new_limits is None: return (resource.prlimit( # pylint: disable=no-member # pytype: disable=missing-parameter proc.pid, res)) else: return resource.prlimit(proc.pid, res, new_limits) # pylint: disable=no-member
def check_limits(pid, no_file, fsize): """Verify resource limits against expected values.""" # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE) assert soft == no_file assert hard == no_file # Fetch firecracker process limits for maximum file size (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE) assert soft == fsize assert hard == fsize
def test_sigxfsz_handler(test_microvm_with_api): """ Test intercepting and handling SIGXFSZ. @type: functional """ microvm = test_microvm_with_api microvm.spawn() # We don't need to monitor the memory for this test. microvm.memory_monitor = None # We need to use the Sync file engine type. If we use io_uring we will not # get a SIGXFSZ. We'll instead get an errno 27 File too large as the # completed entry status code. microvm.basic_config(rootfs_io_engine="Sync") # Configure metrics based on a file. metrics_path = os.path.join(microvm.path, 'metrics_fifo') utils.run_cmd("touch {}".format(metrics_path)) response = microvm.metrics.put( metrics_path=microvm.create_jailed_resource(metrics_path) ) assert microvm.api_session.is_status_no_content(response.status_code) microvm.start() metrics_jail_path = os.path.join(microvm.jailer.chroot_path(), metrics_path) metrics_fd = open(metrics_jail_path, encoding='utf-8') line_metrics = metrics_fd.readlines() assert len(line_metrics) == 1 firecracker_pid = int(microvm.jailer_clone_pid) size = os.path.getsize(metrics_jail_path) # The SIGXFSZ is triggered because the size of rootfs is bigger than # the size of metrics file times 3. Since the metrics file is flushed # twice we have to make sure that the limit is bigger than that # in order to make sure the SIGXFSZ metric is logged res.prlimit(firecracker_pid, res.RLIMIT_FSIZE, (size*3, res.RLIM_INFINITY)) while True: try: utils.run_cmd("ps -p {}".format(firecracker_pid)) sleep(1) except ChildProcessError: break microvm.expect_kill_by_signal = True msg = 'Shutting down VM after intercepting signal 25, code 0' microvm.check_log_message(msg) metric_line = json.loads(metrics_fd.readlines()[0]) assert metric_line["signals"]["sigxfsz"] == 1
def proc_rlimit( pid: int, res: int, new_limits: Optional[Tuple[int, int]] = None) -> Tuple[int, int]: if pid == 0: # prlimit() treats pid=0 specially. # psutil doesn't, so we don't either. raise ProcessLookupError if new_limits is None: return resource.prlimit( # pytype: disable=missing-parameter # pylint: disable=no-member pid, res) else: return resource.prlimit(pid, res, new_limits) # pylint: disable=no-member
def limit_resources(timeout, pid=None): """Apply resource limit given by ``--memout`` and timeout arguments.""" if pid: setlimit = lambda *args: resource.prlimit(pid, *args) # noqa: E731 else: setlimit = lambda *args: resource.setrlimit(*args) # noqa: E731 if options.args().memout: setlimit(resource.RLIMIT_AS, (options.args().memout * 1024 * 1024, resource.RLIM_INFINITY)) if timeout: timeout = math.ceil(timeout) setlimit(resource.RLIMIT_CPU, (timeout, timeout))
def run_rnasubopt(ifname, deltaenergy, number=None): errs = [] rna_args = ['nice', args.rnasubopt, '-e', str(deltaenergy), '-i', ifname] safety_args = ['nice', args.trivialsafety] if number != None: safety_args.append('-num') safety_args.append(str(number)) #print(' '.join(rna_args), '|', ' '.join(safety_args)) rna = psutil.Popen(rna_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) safety = psutil.Popen(safety_args, stdin=rna.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) rna.stdout.close() if args.timeout > 0: resource.prlimit( rna.pid, resource.RLIMIT_CPU, (int(args.timeout * 60 * 60), int(args.timeout * 60 * 60))) folddata = None try: folddata = json.loads(safety.stdout.read().decode('utf-8')) except json.decoder.JSONDecodeError as e: errs.append('{}: Failed to decode trivial safety output:'.format( ifname, e)) rnaerr = rna.stderr.read() if rnaerr != None and len(rnaerr) > 0: errs.append('RNAsubopt for {} returned errors:\n{}'.format( ifname, rnaerr)) safetyerr = safety.stderr.read() if safetyerr != None and len(safetyerr) > 0: errs.append('Trivial safety for {} returned errors:\n{}'.format( ifname, safetyerr)) rpid, rstatus, rres = os.wait4(rna.pid, 0) #print('RNA: Status: {}, user (s): {:5.1f}, sys (s): {:5.1f}, maxrss (kB): {:6d}'.format( # rstatus, rres.ru_utime, rres.ru_stime, rres.ru_maxrss)) spid, sstatus, sres = os.wait4(safety.pid, 0) #print('Safety: Status: {}, user (s): {:5.1f}, sys (s): {:5.1f}, maxrss (kB): {:6d}'.format( # sstatus, sres.ru_utime, sres.ru_stime, sres.ru_maxrss)) if os.WIFSIGNALED(rstatus) and not (number != None and os.WTERMSIG(rstatus) == signal.SIGPIPE): errs.append('{}: RNAsubopt was terminated with signal {}'.format( ifname, os.WTERMSIG(rstatus))) return (None, errs) if os.WIFSIGNALED(sstatus): errs.append('{}: Trivialsafety was terminated with signal {}'.format( ifname, os.WTERMSIG(sstatus))) return (None, errs) if folddata == None: errs.append('{}: folddata was None for unknown reason'.format(ifname)) return (None, errs) folddata['Command'] = ' '.join(rna_args), '|', ' '.join(safety_args) folddata['Resources'] = { 'RNAsuboptUser': rres.ru_utime, 'RNAsuboptSys': rres.ru_stime, 'RNAsuboptRSS': rres.ru_maxrss, 'TrivialSafetyUser': sres.ru_utime, 'TrivialSafetySys': sres.ru_stime, 'TrivialSafetyRSS': sres.ru_maxrss, } return (folddata, errs)
def test_api_machine_config(test_microvm_with_api): """ Test /machine_config PUT/PATCH scenarios that unit tests can't cover. @type: functional """ test_microvm = test_microvm_with_api test_microvm.spawn() # Test invalid vcpu count < 0. response = test_microvm.machine_cfg.put(vcpu_count='-2') assert test_microvm.api_session.is_status_bad_request(response.status_code) # Test invalid type for ht_enabled flag. response = test_microvm.machine_cfg.put(ht_enabled='random_string') assert test_microvm.api_session.is_status_bad_request(response.status_code) # Test invalid CPU template. response = test_microvm.machine_cfg.put(cpu_template='random_string') assert test_microvm.api_session.is_status_bad_request(response.status_code) response = test_microvm.machine_cfg.patch(track_dirty_pages=True) assert test_microvm.api_session.is_status_bad_request(response.status_code) response = test_microvm.machine_cfg.patch(cpu_template='C3') if platform.machine() == "x86_64": assert test_microvm.api_session.is_status_no_content( response.status_code) else: assert test_microvm.api_session.is_status_bad_request( response.status_code) assert "CPU templates are not supported on aarch64" in response.text # Test invalid mem_size_mib < 0. response = test_microvm.machine_cfg.put(mem_size_mib='-2') assert test_microvm.api_session.is_status_bad_request(response.status_code) # Test invalid mem_size_mib > usize::MAX. bad_size = 1 << 64 response = test_microvm.machine_cfg.put(mem_size_mib=bad_size) fail_msg = "error occurred when deserializing the json body of a " \ "request: invalid type" assert test_microvm.api_session.is_status_bad_request(response.status_code) assert fail_msg in response.text # Test mem_size_mib of valid type, but too large. test_microvm.basic_config() firecracker_pid = int(test_microvm.jailer_clone_pid) resource.prlimit(firecracker_pid, resource.RLIMIT_AS, (MEM_LIMIT, resource.RLIM_INFINITY)) bad_size = (1 << 64) - 1 response = test_microvm.machine_cfg.patch(mem_size_mib=bad_size) assert test_microvm.api_session.is_status_no_content(response.status_code) response = test_microvm.actions.put(action_type='InstanceStart') fail_msg = "Invalid Memory Configuration: MmapRegion(Mmap(Os { code: " \ "12, kind: Other, message: Out of memory }))" assert test_microvm.api_session.is_status_bad_request(response.status_code) assert fail_msg in response.text # Test invalid mem_size_mib = 0. response = test_microvm.machine_cfg.patch(mem_size_mib=0) assert test_microvm.api_session.is_status_bad_request(response.status_code) assert "The memory size (MiB) is invalid." in response.text # Test valid mem_size_mib. response = test_microvm.machine_cfg.patch(mem_size_mib=256) assert test_microvm.api_session.is_status_no_content(response.status_code) response = test_microvm.actions.put(action_type='InstanceStart') if utils.get_cpu_vendor() != utils.CpuVendor.INTEL: # We shouldn't be able to apply Intel templates on AMD hosts fail_msg = "Internal error while starting microVM: Error configuring" \ " the vcpu for boot: Cpuid error: InvalidVendor" assert test_microvm.api_session.is_status_bad_request( response.status_code) assert fail_msg in response.text else: assert test_microvm.api_session.is_status_no_content( response.status_code) # Validate full vm configuration after patching machine config. response = test_microvm.full_cfg.get() assert test_microvm.api_session.is_status_ok(response.status_code) assert response.json()['machine-config']['vcpu_count'] == 2 assert response.json()['machine-config']['mem_size_mib'] == 256
async def start(self): exe_dir = path.join(conf.get('home'), 'bin') exe_path = path.join(exe_dir, 'jamovi-engine') env = os.environ.copy() env['R_HOME'] = conf.get('r_home', env.get('R_HOME', '')) env['R_LIBS'] = conf.get('r_libs', env.get('R_LIBS', '')) env['FONTCONFIG_PATH'] = conf.get('fontconfig_path', env.get('FONTCONFIG_PATH', '')) env['JAMOVI_MODULES_PATH'] = conf.get( 'modules_path', env.get('JAMOVI_MODULES_PATH', '')) con = '--con={}'.format(self._conn_path) pth = '--path={}'.format(self._data_path) try: if platform.uname().system == 'Windows': si = subprocess.STARTUPINFO() # makes the engine windows visible in debug mode (on windows) if not conf.get('debug', False): si.dwFlags |= subprocess.STARTF_USESHOWWINDOW self._process = subprocess.Popen( [exe_path, con, pth], startupinfo=si, stdout=None, # stdouts seem to break things on windows stderr=None, env=env) else: # stdin=PIPE, because the engines use the closing of # stdin to terminate themselves. self._process = await create_subprocess_exec( exe_path, con, pth, stdout=None, stderr=None, stdin=subprocess.PIPE, env=env) mem_limit = conf.get('memory_limit_engine', None) if mem_limit: if platform.uname().system == 'Linux': import resource try: limit = int(mem_limit) * 1024 * 1024 # Mb resource.prlimit(self._process.pid, resource.RLIMIT_AS, (limit, limit)) except ValueError: raise ValueError('memory_limit_engine: bad value') else: raise ValueError( 'memory_limit_engine is unavailable on systems other than linux' ) if self._monitor is not None: self._monitor.monitor(self._process) self._socket = nanomsg.Socket(nanomsg.PAIR) self._socket._set_recv_timeout(500) self._socket.bind(self._conn_path) # need a separate thread for nanomsg :/ self._thread = threading.Thread(target=self._run) self._thread.start() except Exception as e: log.exception(e) self._parent._notify_engine_event({ 'type': 'error', 'message': 'Engine process could not be started', 'cause': str(e), })
def run(args): # Set a relatively low cap on max open sessions, so we can saturate it in a reasonable amount of time args.max_open_sessions = 100 args.max_open_sessions_hard = args.max_open_sessions + 20 # Chunk often, so that new fds are regularly requested args.ledger_chunk_bytes = "500B" with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: check = infra.checker.Checker() network.start_and_join(args) primary, _ = network.find_nodes() primary_pid = primary.remote.remote.proc.pid initial_fds = psutil.Process(primary_pid).num_fds() assert ( initial_fds < args.max_open_sessions ), f"Initial number of file descriptors has already reached session limit: {initial_fds} >= {args.max_open_sessions}" num_fds = initial_fds LOG.success(f"{primary_pid} has {num_fds} open file descriptors") def create_connections_until_exhaustion(target, continue_to_hard_cap=False): with contextlib.ExitStack() as es: clients = [] LOG.success(f"Creating {target} clients") consecutive_failures = 0 for i in range(target): logs = [] try: clients.append( es.enter_context( primary.client("user0", connection_timeout=1))) r = clients[-1].post( "/app/log/private", { "id": 42, "msg": "foo" }, log_capture=logs, ) if r.status_code == http.HTTPStatus.OK: check( r, result=True, ) consecutive_failures = 0 elif r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE: if continue_to_hard_cap: consecutive_failures = 0 continue raise RuntimeError(r.body.text()) else: flush_info(logs) raise ValueError( f"Unexpected response status code: {r.status_code}" ) except (CCFConnectionException, RuntimeError) as e: flush_info(logs) LOG.warning(f"Hit exception at client {i}: {e}") clients.pop(-1) if consecutive_failures < 5: # Maybe got unlucky and tried to create a session while many files were open - keep trying consecutive_failures += 1 continue else: # Ok you've really hit a wall, stop trying to create clients break else: raise AllConnectionsCreatedException( f"Successfully created {target} clients without exception - expected this to exhaust available connections" ) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) # Submit many requests, and at least enough to trigger additional snapshots more_requests = max( len(clients) * 3, args.snapshot_tx_interval * 2) LOG.info( f"Submitting an additional {more_requests} requests from existing clients" ) for _ in range(more_requests): client = random.choice(clients) logs = [] try: client.post( "/app/log/private", { "id": 42, "msg": "foo" }, timeout=1, log_capture=logs, ) except Exception as e: flush_info(logs) LOG.error(e) raise e time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") return num_fds # For initial safe tests, we have many more fds than the maximum sessions, so file operations should still succeed even when network is saturated max_fds = args.max_open_sessions + (initial_fds * 2) resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success( f"Setting max fds to safe initial value {max_fds} on {primary_pid}" ) nb_conn = (max_fds - num_fds) * 2 num_fds = create_connections_until_exhaustion(nb_conn) to_create = max_fds - num_fds + 1 num_fds = create_connections_until_exhaustion(to_create) try: create_connections_until_exhaustion(to_create, True) except AllConnectionsCreatedException as e: # This is fine! The soft cap means this test no longer reaches the hard cap. # It gets HTTP errors but then _closes_ sockets, fast enough that we never hit the hard cap pass # Now set a low fd limit, so network sessions completely exhaust them - expect this to cause failures max_fds = args.max_open_sessions // 2 resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success( f"Setting max fds to dangerously low {max_fds} on {primary_pid}") try: num_fds = create_connections_until_exhaustion(to_create) except Exception as e: LOG.warning( f"Node with only {max_fds} fds crashed when allowed to created {args.max_open_sessions} sessions, as expected" ) LOG.warning(e) network.ignore_errors_on_shutdown() else: raise RuntimeError("Expected a fatal crash and saw none!")
def run(args): hosts = ["localhost"] * (4 if args.consensus == "pbft" else 1) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: check = infra.checker.Checker() network.start_and_join(args) primary, _ = network.find_nodes() primary_pid = primary.remote.remote.proc.pid num_fds = psutil.Process(primary_pid).num_fds() max_fds = num_fds + 150 LOG.success(f"{primary_pid} has {num_fds} open file descriptors") resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success(f"set max fds to {max_fds} on {primary_pid}") nb_conn = (max_fds - num_fds) * 2 clients = [] with contextlib.ExitStack() as es: LOG.success(f"Creating {nb_conn} clients") for i in range(nb_conn): try: clients.append(es.enter_context(primary.client("user0"))) LOG.info(f"Created client {i}") except OSError: LOG.error(f"Failed to create client {i}") # Creating clients may not actually create connections/fds. Send messages until we run out of fds for i, c in enumerate(clients): if psutil.Process(primary_pid).num_fds() >= max_fds: LOG.warning(f"Reached fd limit at client {i}") break LOG.info(f"Sending as client {i}") check(c.post("/app/log/private", { "id": 42, "msg": "foo" }), result=True) try: clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"}) except Exception: pass else: assert False, "Expected error due to fd limit" num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") with contextlib.ExitStack() as es: to_create = max_fds - num_fds + 1 LOG.success(f"Creating {to_create} clients") for i in range(to_create): clients.append(es.enter_context(primary.client("user0"))) LOG.info(f"Created client {i}") for i, c in enumerate(clients): if psutil.Process(primary_pid).num_fds() >= max_fds: LOG.warning(f"Reached fd limit at client {i}") break LOG.info(f"Sending as client {i}") check(c.post("/app/log/private", { "id": 42, "msg": "foo" }), result=True) try: clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"}) except Exception: pass else: assert False, "Expected error due to fd limit" num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
def run(args): # Set a relatively low cap on max open sessions, so we can saturate it in a reasonable amount of time args.max_open_sessions = 40 args.max_open_sessions_hard = args.max_open_sessions + 5 # Listen on additional RPC interfaces with even lower session caps for i, node_spec in enumerate(args.nodes): additional_args = [] caps = interface_caps(i) for address, cap in caps.items(): additional_args.append(f"--rpc-interface={address},,{cap}") node_spec.additional_raw_node_args = additional_args # Chunk often, so that new fds are regularly requested args.ledger_chunk_bytes = "500B" with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: check = infra.checker.Checker() network.start_and_join(args) primary, _ = network.find_nodes() caps = interface_caps(primary.local_node_id) primary_pid = primary.remote.remote.proc.pid initial_fds = psutil.Process(primary_pid).num_fds() assert ( initial_fds < args.max_open_sessions ), f"Initial number of file descriptors has already reached session limit: {initial_fds} >= {args.max_open_sessions}" num_fds = initial_fds LOG.success(f"{primary_pid} has {num_fds} open file descriptors") initial_metrics = get_session_metrics(primary) assert initial_metrics["active"] <= initial_metrics["peak"], initial_metrics main_session_metrics = initial_metrics["interfaces"][ f"{primary.rpc_host}:{primary.rpc_port}" ] assert ( main_session_metrics["soft_cap"] == args.max_open_sessions ), initial_metrics assert ( main_session_metrics["hard_cap"] == args.max_open_sessions_hard ), initial_metrics max_fds = args.max_open_sessions + (initial_fds * 2) def create_connections_until_exhaustion( target, continue_to_hard_cap=False, client_fn=primary.client ): with contextlib.ExitStack() as es: clients = [] LOG.success(f"Creating {target} clients") consecutive_failures = 0 i = 1 healthy_clients = [] while i <= target: logs = [] try: clients.append( es.enter_context( client_fn( identity="user0", connection_timeout=1, limits=httpx.Limits( max_connections=1, max_keepalive_connections=1, keepalive_expiry=30, ), ) ) ) r = clients[-1].post( "/app/log/private", {"id": 42, "msg": "foo"}, log_capture=logs, ) if r.status_code == http.HTTPStatus.OK: check( r, result=True, ) consecutive_failures = 0 i += 1 healthy_clients.append(clients[-1]) elif r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE: if continue_to_hard_cap: consecutive_failures = 0 i += 1 continue raise RuntimeError(r.body.text()) else: flush_info(logs) raise ValueError( f"Unexpected response status code: {r.status_code}" ) except (CCFConnectionException, RuntimeError) as e: flush_info(logs) LOG.warning(f"Hit exception at client {i}/{target}: {e}") clients.pop(-1) if consecutive_failures < 5: # Maybe got unlucky and tried to create a session while many files were open - keep trying consecutive_failures += 1 continue else: # Ok you've really hit a wall, stop trying to create clients break else: raise AllConnectionsCreatedException( f"Successfully created {target} clients without exception - expected this to exhaust available connections" ) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) r = clients[0].get("/node/metrics") assert r.status_code == http.HTTPStatus.OK, r.status_code peak_metrics = r.body.json()["sessions"] assert peak_metrics["active"] <= peak_metrics["peak"], peak_metrics assert peak_metrics["active"] == len(healthy_clients), ( peak_metrics, len(healthy_clients), ) # Submit many requests, and at least enough to trigger additional snapshots more_requests = max(len(clients) * 3, args.snapshot_tx_interval * 2) LOG.info( f"Submitting an additional {more_requests} requests from existing clients" ) for _ in range(more_requests): client = random.choice(healthy_clients) logs = [] try: client.post( "/app/log/private", {"id": 42, "msg": "foo"}, timeout=1, log_capture=logs, ) except Exception as e: flush_info(logs) LOG.error(e) raise e time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success(f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") return num_fds # For initial safe tests, we have many more fds than the maximum sessions, so file operations should still succeed even when network is saturated resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success(f"Setting max fds to safe initial value {max_fds} on {primary_pid}") nb_conn = (max_fds - num_fds) * 2 num_fds = create_connections_until_exhaustion(nb_conn) to_create = max_fds - num_fds + 1 num_fds = create_connections_until_exhaustion(to_create) # Check that lower caps are enforced on each interface for i, (address, cap) in enumerate(caps.items()): create_connections_until_exhaustion( cap + 1, client_fn=functools.partial(primary.client, interface_idx=i + 1), ) try: create_connections_until_exhaustion(to_create, True) except AllConnectionsCreatedException: # This is fine! The soft cap means this test no longer reaches the hard cap. # It gets HTTP errors but then _closes_ sockets, fast enough that we never hit the hard cap pass final_metrics = get_session_metrics(primary) assert final_metrics["active"] <= final_metrics["peak"], final_metrics assert final_metrics["peak"] > initial_metrics["peak"], ( initial_metrics, final_metrics, ) assert final_metrics["peak"] >= args.max_open_sessions, final_metrics assert final_metrics["peak"] < args.max_open_sessions_hard, final_metrics # Now set a low fd limit, so network sessions completely exhaust them - expect this to cause failures max_fds = args.max_open_sessions // 2 resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success(f"Setting max fds to dangerously low {max_fds} on {primary_pid}") try: num_fds = create_connections_until_exhaustion(to_create) except Exception as e: LOG.warning( f"Node with only {max_fds} fds crashed when allowed to created {args.max_open_sessions} sessions, as expected" ) LOG.warning(e) network.ignore_errors_on_shutdown() else: LOG.warning("Expected a fatal crash and saw none!")
import resource import os import sys FD_LIMIT = int(sys.argv[1]) resource.prlimit(os.getpid(), resource.RLIMIT_NOFILE, (FD_LIMIT, FD_LIMIT)) try: fptr = open("hello.txt", "r") print(fptr.readlines()) fptr.close() except: pass
async def start(self): self._at_startup = True self._process_stopping = threading.Event() self._process_abandoned = threading.Event() if self._socket is not None: try: self._socket.close() except Exception as e: log.exception(e) self._socket = None self._conn_path = f'{self._conn_root}-{self._parent._next_conn_index}' self._parent._next_conn_index += 1 bin_dir = 'bin' if platform.system() != 'Darwin' else 'MacOS' exe_dir = path.join(self._config.get('home'), bin_dir) exe_path = path.join(exe_dir, 'jamovi-engine') env = os.environ.copy() env['R_HOME'] = self._config.get('r_home', env.get('R_HOME', '')) env['R_LIBS'] = self._config.get('r_libs', env.get('R_LIBS', '')) env['FONTCONFIG_PATH'] = self._config.get('fontconfig_path', env.get('FONTCONFIG_PATH', '')) env['JAMOVI_MODULES_PATH'] = self._config.get('modules_path', env.get('JAMOVI_MODULES_PATH', '')) if platform.uname().system == 'Linux': # plotting under linux sometimes doesn't work without this env['LC_ALL'] = 'en_US.UTF-8' # https://github.com/jamovi/jamovi/issues/801 # https://github.com/jamovi/jamovi/issues/831 con = '--con={}'.format(self._conn_path) pth = '--path={}'.format(self._data_path) try: if platform.uname().system == 'Windows': si = subprocess.STARTUPINFO() # makes the engine windows visible in debug mode (on windows) if not self._config.get('debug', False): si.dwFlags |= subprocess.STARTF_USESHOWWINDOW self._process = subprocess.Popen( [ exe_path, con, pth ], startupinfo=si, stdout=None, # stdouts seem to break things on windows stderr=None, env=env) else: # stdin=PIPE, because the engines use the closing of # stdin to terminate themselves. self._process = await create_subprocess_exec( exe_path, con, pth, stdout=None, stderr=None, stdin=subprocess.PIPE, env=env) mem_limit = self._config.get('memory_limit_engine', None) if mem_limit: if platform.uname().system == 'Linux': import resource try: limit = int(mem_limit) * 1024 * 1024 # Mb resource.prlimit(self._process.pid, resource.RLIMIT_AS, (limit, limit)) except ValueError: raise ValueError('memory_limit_engine: bad value') else: raise ValueError('memory_limit_engine is unavailable on systems other than linux') if self._monitor is not None: self._monitor.monitor(self._process) self._socket = nanomsg.Socket(nanomsg.PAIR) self._socket._set_recv_timeout(500) self._socket.bind(self._conn_path) # need a separate thread for nanomsg :/ self._thread = threading.Thread(target=self._run_loop, args=( self._socket, self._process, self._process_stopping, self._process_abandoned)) self._thread.start() self._stopped.clear() self._running.set() except Exception as e: log.exception(e) self._parent._notify_engine_event({ 'type': 'error', 'message': 'Engine process could not be started', 'cause': str(e), })
def run(args): hosts = ["localhost"] with infra.ccf.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: check = infra.checker.Checker() network.start_and_join(args) primary, others = network.find_nodes() primary_pid = primary.remote.remote.proc.pid num_fds = psutil.Process(primary_pid).num_fds() max_fds = num_fds + 50 LOG.info(f"{primary_pid} has {num_fds} open file descriptors") resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.info(f"set max fds to {max_fds} on {primary_pid}") nb_conn = (max_fds - num_fds) * 2 clients = [] with contextlib.ExitStack() as es: for i in range(nb_conn): try: clients.append( es.enter_context(primary.user_client(format="json"))) LOG.info(f"Connected client {i}") except OSError: LOG.error(f"Failed to connect client {i}") c = clients[int(random.random() * len(clients))] check(c.rpc("LOG_record", {"id": 42, "msg": "foo"}), result=True) assert ( len(clients) >= max_fds - num_fds - 1 ), f"{len(clients)}, expected at least {max_fds - num_fds - 1}" num_fds = psutil.Process(primary_pid).num_fds() LOG.info(f"{primary_pid} has {num_fds} open file descriptors") LOG.info(f"Disconnecting clients") time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.info(f"{primary_pid} has {num_fds} open file descriptors") clients = [] with contextlib.ExitStack() as es: for i in range(max_fds - num_fds): clients.append( es.enter_context(primary.user_client(format="json"))) LOG.info(f"Connected client {i}") c = clients[int(random.random() * len(clients))] check(c.rpc("LOG_record", {"id": 42, "msg": "foo"}), result=True) assert ( len(clients) >= max_fds - num_fds - 1 ), f"{len(clients)}, expected at least {max_fds - num_fds - 1}" num_fds = psutil.Process(primary_pid).num_fds() LOG.info(f"{primary_pid} has {num_fds} open file descriptors") LOG.info(f"Disconnecting clients") time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.info(f"{primary_pid} has {num_fds} open file descriptors")
def execute(logger, args, env=None, cwd=None, timeout=0.1, collect_all_stdout=False, filter_func=None, enforce_limitations=False, cpu_time_limit=450, memory_limit=1000000000): cmd = args[0] logger.debug('Execute:\n{0}{1}{2}'.format( cmd, '' if len(args) == 1 else ' ', ' '.join('"{0}"'.format(arg) for arg in args[1:]))) if enforce_limitations: soft_time, hard_time = resource.getrlimit(resource.RLIMIT_CPU) soft_mem, hard_mem = resource.getrlimit(resource.RLIMIT_AS) logger.debug( 'Got the following limitations: CPU time = {}s, memory = {}B'. format(cpu_time_limit, memory_limit)) p = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) if enforce_limitations: resource.prlimit(p.pid, resource.RLIMIT_CPU, [cpu_time_limit, hard_time]) resource.prlimit(p.pid, resource.RLIMIT_AS, [memory_limit, hard_mem]) out_q, err_q = (StreamQueue(p.stdout, 'STDOUT', collect_all_stdout), StreamQueue(p.stderr, 'STDERR', True)) for stream_q in (out_q, err_q): stream_q.start() # Print to logs everything that is printed to STDOUT and STDERR each timeout seconds. Last try is required to # print last messages queued before command finishes. last_try = True while not out_q.finished or not err_q.finished or last_try: if out_q.traceback: raise RuntimeError( 'STDOUT reader thread failed with the following traceback:\n{0}' .format(out_q.traceback)) if err_q.traceback: raise RuntimeError( 'STDERR reader thread failed with the following traceback:\n{0}' .format(err_q.traceback)) last_try = not out_q.finished or not err_q.finished time.sleep(timeout) for stream_q in (out_q, err_q): output = [] while True: line = stream_q.get() if line is None: break output.append(line) if output: m = '"{0}" outputted to {1}:\n{2}'.format( cmd, stream_q.stream_name, '\n'.join(output)) if stream_q is out_q: logger.debug(m) else: logger.warning(m) for stream_q in (out_q, err_q): stream_q.join() if p.poll(): logger.error('"{0}" exited with "{1}"'.format(cmd, p.poll())) with open('problem desc.txt', 'a', encoding='utf-8') as fp: out = filter(filter_func, err_q.output) if filter_func else err_q.output fp.write('\n'.join(out)) sys.exit(1) elif collect_all_stdout: return out_q.output
return nbr_a, nbr_d @chrono(1) def fight_n(nbr_a, nbr_d, nbr): sum_a, sum_d = [], [] for _ in range(nbr): a, d = fight(nbr_a, nbr_d) sum_a.append(a) sum_d.append(d) return statistics.mean(sum_a), statistics.mean(sum_d) @parallel def a_fight(nbr_a, nbr_d, q): q.put(fight(nbr_a, nbr_d)) @chrono(1) def a_fight_n(nbr_a, nbr_d, nbr): q = mp.Queue(nbr) for _ in range(nbr): a_fight(nbr_a, nbr_d, q) return statistics.mean(q.get()[0] for _ in range(nbr)) if __name__ == '__main__': print(resource.prlimit()) print(a_fight_n(300, 100, 500)) print(fight_n(300, 100, 500))