def _wait_until_alive(self, client: EdenClient) -> None: def is_alive() -> Optional[bool]: if client.getDaemonInfo().status == fb303_status.ALIVE: return True return None poll_until(is_alive, timeout=60)
def _wait_for_mount_running(self, client: EdenClient, path: Optional[Path] = None) -> None: mount_path = path if path is not None else Path(self.mount) def mount_running() -> Optional[bool]: if self.eden.get_mount_state(mount_path, client) == MountState.RUNNING: return True return None poll_until(mount_running, timeout=60)
def test_stop_during_takeover(self) -> None: # block graceful restart with self.eden.get_thrift_client_legacy() as client: client.injectFault( FaultDefinition( keyClass="takeover", keyValueRegex="server_shutdown", block=True ) ) self.eden.wait_for_is_healthy() # Run a graceful restart # This won't succeed until we unblock the shutdown. p = Process(target=self.eden.graceful_restart) p.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: if not p.is_alive(): raise Exception( "eden restart --graceful command finished while " "graceful restart was still blocked" ) if client.getDaemonInfo().status is fb303_status.STOPPING: return True return None poll_until(state_shutting_down, timeout=60) # Normal restart should be rejected while a graceful restart # is in progress self.assert_restart_fails_with_in_progress_graceful_restart(client) # Normal shutdown should be rejected while a graceful restart # is in progress self.assert_shutdown_fails_with_in_progress_graceful_restart(client) # Getting SIGTERM should not kill process while a graceful restart is in # progress self.assert_sigkill_fails_with_in_progress_graceful_restart(client) # Unblock the server shutdown and wait for the graceful restart to complete. client.unblockFault( UnblockFaultArg(keyClass="takeover", keyValueRegex="server_shutdown") ) p.join()
def test_mount_init_state(self) -> None: self.eden.run_cmd("unmount", self.mount) self.assertEqual({self.mount: "NOT_RUNNING"}, self.eden.list_cmd_simple()) with self.eden.get_thrift_client_legacy() as client: fault = FaultDefinition(keyClass="mount", keyValueRegex=".*", block=True) client.injectFault(fault) # Run the "eden mount" CLI command. # This won't succeed until we unblock the mount. mount_cmd, edenfsctl_env = self.eden.get_edenfsctl_cmd_env( "mount", self.mount) mount_proc = subprocess.Popen(mount_cmd, env=edenfsctl_env) # Wait for the new mount to be reported by edenfs def mount_started() -> Optional[bool]: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True if mount_proc.poll() is not None: raise Exception( f"eden mount command finished (with status " f"{mount_proc.returncode}) while mounting was " f"still blocked") return None poll_until(mount_started, timeout=30) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Most thrift calls to access the mount should be disallowed while it is # still initializing. self._assert_thrift_calls_fail_during_mount_init(client) # Unblock mounting and wait for the mount to transition to running client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple()) mount_proc.wait()
def _run_test(self, invalidate_fn: Callable[[], None], timeout: float = 15) -> None: self.update_validity_interval("20ms") # Run the function which will invalidate the state directory invalidate_fn() # EdenFS should exit on its own optional_edenfs = self.eden._process assert optional_edenfs is not None edenfs = optional_edenfs def edenfs_exited() -> Optional[bool]: returncode = edenfs.poll() if returncode is None: return None return True poll_until(edenfs_exited, timeout=timeout)
def _wait_until_initializing(self, num_mounts: int = 1) -> None: """Wait until EdenFS is initializing mount points. This is primarily intended to be used to wait until the mount points are initializing when starting EdenFS with --fault_injection_block_mounts. """ def is_initializing() -> Optional[bool]: try: with self.eden.get_thrift_client_legacy() as client: # Return successfully when listMounts() reports the number of # mounts that we expect. mounts = client.listMounts() if len(mounts) == num_mounts: return True edenfs_process = self.eden._process assert edenfs_process is not None if edenfs_process.poll(): self.fail("eden exited before becoming healthy") return None except (EdenNotRunningError, TException): return None poll_until(is_initializing, timeout=60)
def poll_until_inactive(self, timeout: float) -> None: def check_inactive() -> typing.Optional[bool]: return True if self.query_active_state() == "inactive" else None poll_until(check_inactive, timeout=timeout)
def test_mount_state_during_unmount_with_in_progress_checkout( self) -> None: mounts = self.eden.run_cmd("list") self.assertEqual(f"{self.mount}\n", mounts) self.backing_repo.write_file("foo/bar.txt", "new contents") new_commit = self.backing_repo.commit("Update foo/bar.txt") with self.eden.get_thrift_client() as client: client.injectFault( FaultDefinition(keyClass="inodeCheckout", keyValueRegex=".*", block=True)) # Run a checkout p1 = Process(target=self.repo.update, args=(new_commit, )) p1.start() hg_parent = self.hg("log", "-r.", "-T{node}") # Ensure the checkout has started def checkout_in_progress() -> Optional[bool]: try: client.getScmStatusV2( GetScmStatusParams( mountPoint=bytes(self.mount, encoding="utf-8"), commit=bytes(hg_parent, encoding="utf-8"), listIgnored=False, )) except EdenError as ex: if ex.errorType == EdenErrorType.CHECKOUT_IN_PROGRESS: return True else: raise ex return None util.poll_until(checkout_in_progress, timeout=30) p2 = Process(target=self.eden.unmount, args=(self.mount, )) p2.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: mounts = self.eden.run_cmd("list") print(mounts) if mounts.find("SHUTTING_DOWN") != -1: return True if mounts.find("(not mounted)") != -1: self.fail( "mount should not list status as not mounted while " "checkout is in progress") return None util.poll_until(state_shutting_down, timeout=30) # Unblock the server shutdown and wait for the checkout to complete. client.unblockFault( UnblockFaultArg(keyClass="inodeCheckout", keyValueRegex=".*")) # join the checkout before the unmount because the unmount call # won't finish until the checkout has finished p1.join() p2.join()
def test_local_store_stats(self) -> None: # Update the config to tell the local store to updates its stats frequently # and also check if it needs to reload the config file frequently. initial_config = """\ [config] reload-interval = "100ms" [store] stats-interval = "100ms" """ self.eden.user_rc_path.write_text(initial_config) counter_regex = r"local_store\..*" with self.get_thrift_client() as client: # Makes sure that EdenFS picks up our updated config, # since we wrote it out after EdenFS started. client.reloadConfig() # Get the local store counters # Assert that the exist and are greater than 0. # (Since we include memtable sizes in the values these are currently always # reported as taking up at least a small amount of space.) initial_counters = client.getRegexCounters(counter_regex) self.assertGreater(initial_counters.get("local_store.blob.size"), 0) self.assertGreater( initial_counters.get("local_store.blobmeta.size"), 0) self.assertGreater(initial_counters.get("local_store.tree.size"), 0) self.assertGreater( initial_counters.get("local_store.hgcommit2tree.size"), 0) self.assertGreater( initial_counters.get("local_store.hgproxyhash.size"), 0) self.assertGreater( initial_counters.get("local_store.ephemeral.total_size"), 0) self.assertGreater( initial_counters.get("local_store.persistent.total_size"), 0) # Make sure the counters are less than 500MB, just as a sanity check self.assertLess( initial_counters.get("local_store.ephemeral.total_size"), 500_000_000) self.assertLess( initial_counters.get("local_store.persistent.total_size"), 500_000_000) # Read back several files self.assertEqual((self.mount_path / "a/dir/foo.txt").read_text(), "foo\n") self.assertEqual((self.mount_path / "a/dir/bar.txt").read_text(), "bar\n") self.assertEqual( (self.mount_path / "a/another_dir/hello.txt").read_text(), "hola\n") # The tree store size should be larger now after reading these files. # The counters won't be updated until the store.stats-interval expires. # Wait for this to happen. def tree_size_incremented() -> Optional[bool]: tree_size = client.getCounter("local_store.tree.size") initial_tree_size = initial_counters.get( "local_store.tree.size") assert initial_tree_size is not None if tree_size > initial_tree_size: return True return None poll_until(tree_size_incremented, timeout=10, interval=0.1) # EdenFS should not import blobs to local store self.assertEqual( initial_counters.get("local_store.blob.size"), client.getCounter("local_store.blob.size"), ) # Update the config file with a very small GC limit that will force GC to be # triggered self.eden.user_rc_path.write_text(initial_config + """ blob-size-limit = "1" blobmeta-size-limit = "1" tree-size-limit = "1" hgcommit2tree-size-limit = "1" """) # Wait until a GC run has completed. def gc_run_succeeded() -> Optional[Dict[str, int]]: counters = client.getRegexCounters(counter_regex) if counters.get( "local_store.auto_gc.last_run_succeeded") is not None: return counters return None counters = poll_until(gc_run_succeeded, timeout=30, interval=0.05) # Check the local_store.auto_gc counters self.assertEqual( counters.get("local_store.auto_gc.last_run_succeeded"), 1) self.assertGreater(counters.get("local_store.auto_gc.success"), 0) self.assertEqual(counters.get("local_store.auto_gc.failure", 0), 0) self.assertGreaterEqual( counters.get("local_store.auto_gc.last_duration_ms"), 0) # Run "eden stats local-store" and check the output stats_output = self.eden.run_cmd("stats", "local-store") print(stats_output) m = re.search(r"Successful Auto-GC Runs:\s+(\d+)", stats_output) self.assertIsNotNone(m) assert m is not None # make the type checker happy self.assertGreater(int(m.group(1)), 0) self.assertRegex(stats_output, r"Last Auto-GC Result:\s+Success") self.assertRegex(stats_output, r"Failed Auto-GC Runs:\s+0") self.assertRegex(stats_output, r"Total Ephemeral Size:") self.assertRegex(stats_output, r"Total Persistent Size:")