async def test_keys_missing(test_environment: TestEnvironment) -> None: env: TestEnvironment = test_environment not_in_keychain_plots: List[Path] = get_test_plots("not_in_keychain") dir_not_in_keychain: TestDirectory = TestDirectory( env.root_path / "plots" / "not_in_keychain", not_in_keychain_plots ) expected_result = PlotRefreshResult() # The plots in "not_in_keychain" directory have infinity g1 elements as farmer/pool key so they should be plots # with missing keys for now add_plot_directory(env.root_path, str(dir_not_in_keychain.path)) expected_result.loaded = [] expected_result.removed = [] expected_result.processed = len(dir_not_in_keychain) expected_result.remaining = 0 for i in range(2): await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.no_key_filenames) == len(dir_not_in_keychain) for path in env.refresh_tester.plot_manager.no_key_filenames: assert path in dir_not_in_keychain.plots # Delete one of the plots and make sure it gets dropped from the no key filenames list drop_plot = dir_not_in_keychain.path_list()[0] dir_not_in_keychain.drop(drop_plot) drop_plot.unlink() assert drop_plot in env.refresh_tester.plot_manager.no_key_filenames expected_result.processed -= 1 await env.refresh_tester.run(expected_result) assert drop_plot not in env.refresh_tester.plot_manager.no_key_filenames # Now add the missing keys to the plot manager's key lists and make sure the plots are getting loaded env.refresh_tester.plot_manager.farmer_public_keys.append(G1Element()) env.refresh_tester.plot_manager.pool_public_keys.append(G1Element()) expected_result.loaded = dir_not_in_keychain.plot_info_list() # type: ignore[assignment] expected_result.processed = len(dir_not_in_keychain) await env.refresh_tester.run(expected_result) # And make sure they are dropped from the list of plots with missing keys assert len(env.refresh_tester.plot_manager.no_key_filenames) == 0
def refresh_callback(self, event: PlotRefreshEvents, refresh_result: PlotRefreshResult): if event != PlotRefreshEvents.done: # Only validate the final results for this tests return for name in ["loaded", "removed", "processed", "remaining"]: try: actual_value = refresh_result.__getattribute__(name) if type(actual_value) == list: expected_list = self.expected_result.__getattribute__(name) if len(expected_list) != len(actual_value): return values_found = 0 for value in actual_value: if type(value) == PlotInfo: for plot_info in expected_list: if plot_info.prover.get_filename( ) == value.prover.get_filename(): values_found += 1 continue else: if value in expected_list: values_found += 1 continue if values_found != len(expected_list): log.error( f"{name} invalid: values_found {values_found} expected {len(expected_list)}" ) return else: expected_value = self.expected_result.__getattribute__( name) if actual_value != expected_value: log.error( f"{name} invalid: actual {actual_value} expected {expected_value}" ) return except AttributeError as error: log.error(f"{error}") return self.expected_result_matched = True
async def test_plot_info_caching(test_environment): env: TestEnvironment = test_environment expected_result = PlotRefreshResult() add_plot_directory(env.root_path, str(env.dir_1.path)) expected_result.loaded = env.dir_1.plot_info_list() expected_result.removed = [] expected_result.processed = len(env.dir_1) expected_result.remaining = 0 await env.refresh_tester.run(expected_result) assert env.refresh_tester.plot_manager.cache.path().exists() unlink(env.refresh_tester.plot_manager.cache.path()) # Should not write the cache again on shutdown because it didn't change assert not env.refresh_tester.plot_manager.cache.path().exists() env.refresh_tester.plot_manager.stop_refreshing() assert not env.refresh_tester.plot_manager.cache.path().exists() # Manually trigger `save_cache` and make sure it creates a new cache file env.refresh_tester.plot_manager.cache.save() assert env.refresh_tester.plot_manager.cache.path().exists() refresh_tester: PlotRefreshTester = PlotRefreshTester(env.root_path) plot_manager = refresh_tester.plot_manager plot_manager.cache.load() assert len(plot_manager.cache) == len(plot_manager.cache) await refresh_tester.run(expected_result) for path, plot_info in plot_manager.plots.items(): assert path in plot_manager.plots assert plot_manager.plots[path].prover.get_filename() == plot_info.prover.get_filename() assert plot_manager.plots[path].prover.get_id() == plot_info.prover.get_id() assert plot_manager.plots[path].prover.get_memo() == plot_info.prover.get_memo() assert plot_manager.plots[path].prover.get_size() == plot_info.prover.get_size() assert plot_manager.plots[path].pool_public_key == plot_info.pool_public_key assert plot_manager.plots[path].pool_contract_puzzle_hash == plot_info.pool_contract_puzzle_hash assert plot_manager.plots[path].plot_public_key == plot_info.plot_public_key assert plot_manager.plots[path].file_size == plot_info.file_size assert plot_manager.plots[path].time_modified == plot_info.time_modified assert plot_manager.plot_filename_paths == plot_manager.plot_filename_paths assert plot_manager.failed_to_open_filenames == plot_manager.failed_to_open_filenames assert plot_manager.no_key_filenames == plot_manager.no_key_filenames plot_manager.stop_refreshing() # Modify the content of the plot_manager.dat with open(plot_manager.cache.path(), "r+b") as file: file.write(b"\xff\xff") # Sets Cache.version to 65535 # Make sure it just loads the plots normally if it fails to load the cache refresh_tester: PlotRefreshTester = PlotRefreshTester(env.root_path) plot_manager = refresh_tester.plot_manager plot_manager.cache.load() assert len(plot_manager.cache) == 0 plot_manager.set_public_keys(bt.plot_manager.farmer_public_keys, bt.plot_manager.pool_public_keys) await refresh_tester.run(expected_result) assert len(plot_manager.plots) == len(plot_manager.plots) plot_manager.stop_refreshing()
def _refresh_task(self): while self._refreshing_enabled: while not self.needs_refresh() and self._refreshing_enabled: time.sleep(1) plot_filenames: Dict[Path, List[Path]] = get_plot_filenames(self.root_path) plot_directories: Set[Path] = set(plot_filenames.keys()) plot_paths: List[Path] = [] for paths in plot_filenames.values(): plot_paths += paths total_result: PlotRefreshResult = PlotRefreshResult() while self.needs_refresh() and self._refreshing_enabled: batch_result: PlotRefreshResult = self.refresh_batch(plot_paths, plot_directories) total_result += batch_result self._refresh_callback(batch_result) if batch_result.remaining_files == 0: break batch_sleep = self.refresh_parameter.batch_sleep_milliseconds self.log.debug(f"refresh_plots: Sleep {batch_sleep} milliseconds") time.sleep(float(batch_sleep) / 1000.0) # Cleanup unused cache available_ids = set([plot_info.prover.get_id() for plot_info in self.plots.values()]) invalid_cache_keys = [plot_id for plot_id in self.cache.keys() if plot_id not in available_ids] self.cache.remove(invalid_cache_keys) self.log.debug(f"_refresh_task: cached entries removed: {len(invalid_cache_keys)}") if self.cache.changed(): self.cache.save() self.last_refresh_time = time.time() self.log.debug( f"_refresh_task: total_result.loaded_plots {total_result.loaded_plots}, " f"total_result.removed_plots {total_result.removed_plots}, " f"total_result.loaded_size {total_result.loaded_size / (1024 ** 4):.2f} TiB, " f"total_duration {total_result.duration:.2f} seconds" )
def _refresh_task(self): while self._refreshing_enabled: while not self.needs_refresh() and self._refreshing_enabled: time.sleep(1) total_result: PlotRefreshResult = PlotRefreshResult() while self.needs_refresh() and self._refreshing_enabled: batch_result: PlotRefreshResult = self.refresh_batch() total_result += batch_result self._refresh_callback(batch_result) if batch_result.remaining_files == 0: self.last_refresh_time = time.time() break batch_sleep = self.refresh_parameter.batch_sleep_milliseconds self.log.debug(f"refresh_plots: Sleep {batch_sleep} milliseconds") time.sleep(float(batch_sleep) / 1000.0) self.log.debug( f"_refresh_task: total_result.loaded_plots {total_result.loaded_plots}, " f"total_result.removed_plots {total_result.removed_plots}, " f"total_result.loaded_size {total_result.loaded_size / (1024 ** 4):.2f} TiB, " f"total_duration {total_result.duration:.2f} seconds" )
def refresh_batch(self, plot_paths: List[Path], plot_directories: Set[Path]) -> PlotRefreshResult: start_time: float = time.time() result: PlotRefreshResult = PlotRefreshResult(processed=len(plot_paths)) counter_lock = threading.Lock() log.debug(f"refresh_batch: {len(plot_paths)} files in directories {plot_directories}") if self.match_str is not None: log.info(f'Only loading plots that contain "{self.match_str}" in the file or directory name') def process_file(file_path: Path) -> Optional[PlotInfo]: if not self._refreshing_enabled: return None filename_str = str(file_path) if self.match_str is not None and self.match_str not in filename_str: return None if ( file_path in self.failed_to_open_filenames and (time.time() - self.failed_to_open_filenames[file_path]) < self.refresh_parameter.retry_invalid_seconds ): # Try once every `refresh_parameter.retry_invalid_seconds` seconds to open the file return None if file_path in self.plots: return self.plots[file_path] entry: Optional[Tuple[str, Set[str]]] = self.plot_filename_paths.get(file_path.name) if entry is not None: loaded_parent, duplicates = entry if str(file_path.parent) in duplicates: log.debug(f"Skip duplicated plot {str(file_path)}") return None try: if not file_path.exists(): return None prover = DiskProver(str(file_path)) log.debug(f"process_file {str(file_path)}") expected_size = _expected_plot_size(prover.get_size()) * UI_ACTUAL_SPACE_CONSTANT_FACTOR stat_info = file_path.stat() # TODO: consider checking if the file was just written to (which would mean that the file is still # being copied). A segfault might happen in this edge case. if prover.get_size() >= 30 and stat_info.st_size < 0.98 * expected_size: log.warning( f"Not farming plot {file_path}. Size is {stat_info.st_size / (1024**3)} GiB, but expected" f" at least: {expected_size / (1024 ** 3)} GiB. We assume the file is being copied." ) return None cache_entry = self.cache.get(prover.get_id()) if cache_entry is None: ( pool_public_key_or_puzzle_hash, farmer_public_key, local_master_sk, ) = parse_plot_info(prover.get_memo()) # Only use plots that correct keys associated with them if farmer_public_key not in self.farmer_public_keys: log.warning(f"Plot {file_path} has a farmer public key that is not in the farmer's pk list.") self.no_key_filenames.add(file_path) if not self.open_no_key_filenames: return None pool_public_key: Optional[G1Element] = None pool_contract_puzzle_hash: Optional[bytes32] = None if isinstance(pool_public_key_or_puzzle_hash, G1Element): pool_public_key = pool_public_key_or_puzzle_hash else: assert isinstance(pool_public_key_or_puzzle_hash, bytes32) pool_contract_puzzle_hash = pool_public_key_or_puzzle_hash if pool_public_key is not None and pool_public_key not in self.pool_public_keys: log.warning(f"Plot {file_path} has a pool public key that is not in the farmer's pool pk list.") self.no_key_filenames.add(file_path) if not self.open_no_key_filenames: return None # If a plot is in `no_key_filenames` the keys were missing in earlier refresh cycles. We can remove # the current plot from that list if its in there since we passed the key checks above. if file_path in self.no_key_filenames: self.no_key_filenames.remove(file_path) local_sk = master_sk_to_local_sk(local_master_sk) plot_public_key: G1Element = ProofOfSpace.generate_plot_public_key( local_sk.get_g1(), farmer_public_key, pool_contract_puzzle_hash is not None ) cache_entry = CacheEntry(pool_public_key, pool_contract_puzzle_hash, plot_public_key) self.cache.update(prover.get_id(), cache_entry) with self.plot_filename_paths_lock: paths: Optional[Tuple[str, Set[str]]] = self.plot_filename_paths.get(file_path.name) if paths is None: paths = (str(Path(prover.get_filename()).parent), set()) self.plot_filename_paths[file_path.name] = paths else: paths[1].add(str(Path(prover.get_filename()).parent)) log.warning(f"Have multiple copies of the plot {file_path.name} in {[paths[0], *paths[1]]}.") return None new_plot_info: PlotInfo = PlotInfo( prover, cache_entry.pool_public_key, cache_entry.pool_contract_puzzle_hash, cache_entry.plot_public_key, stat_info.st_size, stat_info.st_mtime, ) with counter_lock: result.loaded.append(new_plot_info) if file_path in self.failed_to_open_filenames: del self.failed_to_open_filenames[file_path] except Exception as e: tb = traceback.format_exc() log.error(f"Failed to open file {file_path}. {e} {tb}") self.failed_to_open_filenames[file_path] = int(time.time()) return None log.info(f"Found plot {file_path} of size {new_plot_info.prover.get_size()}") if self.show_memo: plot_memo: bytes32 if pool_contract_puzzle_hash is None: plot_memo = stream_plot_info_pk(pool_public_key, farmer_public_key, local_master_sk) else: plot_memo = stream_plot_info_ph(pool_contract_puzzle_hash, farmer_public_key, local_master_sk) plot_memo_str: str = plot_memo.hex() log.info(f"Memo: {plot_memo_str}") return new_plot_info with self, ThreadPoolExecutor() as executor: plots_refreshed: Dict[Path, PlotInfo] = {} for new_plot in executor.map(process_file, plot_paths): if new_plot is not None: plots_refreshed[Path(new_plot.prover.get_filename())] = new_plot self.plots.update(plots_refreshed) result.duration = time.time() - start_time self.log.debug( f"refresh_batch: loaded {len(result.loaded)}, " f"removed {len(result.removed)}, processed {result.processed}, " f"remaining {result.remaining}, batch_size {self.refresh_parameter.batch_size}, " f"duration: {result.duration:.2f} seconds" ) return result
def _refresh_task(self): while self._refreshing_enabled: try: while not self.needs_refresh() and self._refreshing_enabled: time.sleep(1) if not self._refreshing_enabled: return plot_filenames: Dict[Path, List[Path]] = get_plot_filenames(self.root_path) plot_directories: Set[Path] = set(plot_filenames.keys()) plot_paths: List[Path] = [] for paths in plot_filenames.values(): plot_paths += paths total_result: PlotRefreshResult = PlotRefreshResult() total_size = len(plot_paths) self._refresh_callback(PlotRefreshEvents.started, PlotRefreshResult(remaining=total_size)) # First drop all plots we have in plot_filename_paths but not longer in the filesystem or set in config def plot_removed(test_path: Path): return not test_path.exists() or test_path.parent not in plot_directories for path in list(self.failed_to_open_filenames.keys()): if plot_removed(path): del self.failed_to_open_filenames[path] for path in self.no_key_filenames.copy(): if plot_removed(path): self.no_key_filenames.remove(path) filenames_to_remove: List[str] = [] for plot_filename, paths_entry in self.plot_filename_paths.items(): loaded_path, duplicated_paths = paths_entry loaded_plot = Path(loaded_path) / Path(plot_filename) if plot_removed(loaded_plot): filenames_to_remove.append(plot_filename) if loaded_plot in self.plots: del self.plots[loaded_plot] total_result.removed.append(loaded_plot) # No need to check the duplicates here since we drop the whole entry continue paths_to_remove: List[str] = [] for path in duplicated_paths: loaded_plot = Path(path) / Path(plot_filename) if plot_removed(loaded_plot): paths_to_remove.append(path) total_result.removed.append(loaded_plot) for path in paths_to_remove: duplicated_paths.remove(path) for filename in filenames_to_remove: del self.plot_filename_paths[filename] for remaining, batch in list_to_batches(plot_paths, self.refresh_parameter.batch_size): batch_result: PlotRefreshResult = self.refresh_batch(batch, plot_directories) if not self._refreshing_enabled: self.log.debug("refresh_plots: Aborted") break # Set the remaining files since `refresh_batch()` doesn't know them but we want to report it batch_result.remaining = remaining total_result.loaded += batch_result.loaded total_result.processed += batch_result.processed total_result.duration += batch_result.duration self._refresh_callback(PlotRefreshEvents.batch_processed, batch_result) if remaining == 0: break batch_sleep = self.refresh_parameter.batch_sleep_milliseconds self.log.debug(f"refresh_plots: Sleep {batch_sleep} milliseconds") time.sleep(float(batch_sleep) / 1000.0) if self._refreshing_enabled: self._refresh_callback(PlotRefreshEvents.done, total_result) # Cleanup unused cache available_ids = set([plot_info.prover.get_id() for plot_info in self.plots.values()]) invalid_cache_keys = [plot_id for plot_id in self.cache.keys() if plot_id not in available_ids] self.cache.remove(invalid_cache_keys) self.log.debug(f"_refresh_task: cached entries removed: {len(invalid_cache_keys)}") if self.cache.changed(): self.cache.save() self.last_refresh_time = time.time() self.log.debug( f"_refresh_task: total_result.loaded {len(total_result.loaded)}, " f"total_result.removed {len(total_result.removed)}, " f"total_duration {total_result.duration:.2f} seconds" ) except Exception as e: log.error(f"_refresh_callback raised: {e} with the traceback: {traceback.format_exc()}") self.reset()
async def test_callback_event_raises(test_environment, event_to_raise: PlotRefreshEvents): last_event_fired: Optional[PlotRefreshEvents] = None def raising_callback(event: PlotRefreshEvents, _: PlotRefreshResult): nonlocal last_event_fired last_event_fired = event if event == event_to_raise: raise Exception(f"run_raise_in_callback {event_to_raise}") env: TestEnvironment = test_environment expected_result = PlotRefreshResult() # Load dir_1 add_plot_directory(env.root_path, str(env.dir_1.path)) expected_result.loaded = env.dir_1.plot_info_list( ) # type: ignore[assignment] expected_result.removed = [] expected_result.processed = len(env.dir_1) expected_result.remaining = 0 await env.refresh_tester.run(expected_result) # Load dir_2 add_plot_directory(env.root_path, str(env.dir_2.path)) expected_result.loaded = env.dir_2.plot_info_list( ) # type: ignore[assignment] expected_result.removed = [] expected_result.processed = len(env.dir_1) + len(env.dir_2) expected_result.remaining = 0 await env.refresh_tester.run(expected_result) # Now raise the exception in the callback default_callback = env.refresh_tester.plot_manager._refresh_callback env.refresh_tester.plot_manager.set_refresh_callback(raising_callback) env.refresh_tester.plot_manager.start_refreshing() env.refresh_tester.plot_manager.trigger_refresh() await time_out_assert(5, env.refresh_tester.plot_manager.needs_refresh, value=False) # And make sure the follow-up evens aren't fired assert last_event_fired == event_to_raise # The exception should trigger `PlotManager.reset()` and clear the plots assert len(env.refresh_tester.plot_manager.plots) == 0 assert len(env.refresh_tester.plot_manager.plot_filename_paths) == 0 assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 0 assert len(env.refresh_tester.plot_manager.no_key_filenames) == 0 # The next run without the valid callback should lead to re-loading of all plot env.refresh_tester.plot_manager.set_refresh_callback(default_callback) expected_result.loaded = env.dir_1.plot_info_list( ) + env.dir_2.plot_info_list() # type: ignore[assignment] expected_result.removed = [] expected_result.processed = len(env.dir_1) + len(env.dir_2) expected_result.remaining = 0 await env.refresh_tester.run(expected_result)
async def test_invalid_plots(test_environment): env: TestEnvironment = test_environment expected_result = PlotRefreshResult() # Test re-trying if processing a plot failed # First create a backup of the plot retry_test_plot = env.dir_1.path_list()[0].resolve() retry_test_plot_save = Path(env.dir_1.path / ".backup").resolve() copy(retry_test_plot, retry_test_plot_save) # Invalidate the plot with open(retry_test_plot, "r+b") as file: file.write(bytes(100)) # Add it and validate it fails to load add_plot_directory(env.root_path, str(env.dir_1.path)) expected_result.loaded = env.dir_1.plot_info_list()[1:] expected_result.removed = [] expected_result.processed = len(env.dir_1) expected_result.remaining = 0 await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 1 assert retry_test_plot in env.refresh_tester.plot_manager.failed_to_open_filenames # Give it a non .plot ending and make sure it gets removed from the invalid list on the next refresh retry_test_plot_unload = Path(env.dir_1.path / ".unload").resolve() move(retry_test_plot, retry_test_plot_unload) expected_result.processed -= 1 expected_result.loaded = [] await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 0 assert retry_test_plot not in env.refresh_tester.plot_manager.failed_to_open_filenames # Recover the name and make sure it reappears in the invalid list move(retry_test_plot_unload, retry_test_plot) expected_result.processed += 1 await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 1 assert retry_test_plot in env.refresh_tester.plot_manager.failed_to_open_filenames # Make sure the file stays in `failed_to_open_filenames` and doesn't get loaded in the next refresh cycle expected_result.loaded = [] expected_result.processed = len(env.dir_1) await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 1 assert retry_test_plot in env.refresh_tester.plot_manager.failed_to_open_filenames # Now decrease the re-try timeout, restore the valid plot file and make sure it properly loads now env.refresh_tester.plot_manager.refresh_parameter.retry_invalid_seconds = 0 move(retry_test_plot_save, retry_test_plot) expected_result.loaded = env.dir_1.plot_info_list()[0:1] expected_result.processed = len(env.dir_1) await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.failed_to_open_filenames) == 0 assert retry_test_plot not in env.refresh_tester.plot_manager.failed_to_open_filenames
async def test_plot_refreshing(test_environment): env: TestEnvironment = test_environment expected_result = PlotRefreshResult() dir_duplicates: TestDirectory = TestDirectory( get_plot_dir().resolve() / "duplicates", env.dir_1.plots) async def run_test_case( *, trigger: Callable, test_path: Path, expect_loaded: List[MockPlotInfo], expect_removed: List[Path], expect_processed: int, expect_duplicates: int, expected_directories: int, expect_total_plots: int, ): expected_result.loaded = expect_loaded expected_result.removed = expect_removed expected_result.processed = expect_processed trigger(env.root_path, str(test_path)) assert len(get_plot_directories(env.root_path)) == expected_directories await env.refresh_tester.run(expected_result) assert len(env.refresh_tester.plot_manager.plots) == expect_total_plots assert len(env.refresh_tester.plot_manager.cache) == expect_total_plots assert len(env.refresh_tester.plot_manager.get_duplicates() ) == expect_duplicates assert len( env.refresh_tester.plot_manager.failed_to_open_filenames) == 0 # Add dir_1 await run_test_case( trigger=add_plot_directory, test_path=env.dir_1.path, expect_loaded=env.dir_1.plot_info_list(), expect_removed=[], expect_processed=len(env.dir_1), expect_duplicates=0, expected_directories=1, expect_total_plots=len(env.dir_1), ) # Add dir_2 await run_test_case( trigger=add_plot_directory, test_path=env.dir_2.path, expect_loaded=env.dir_2.plot_info_list(), expect_removed=[], expect_processed=len(env.dir_1) + len(env.dir_2), expect_duplicates=0, expected_directories=2, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Add dir_duplicates await run_test_case( trigger=add_plot_directory, test_path=dir_duplicates.path, expect_loaded=[], expect_removed=[], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(dir_duplicates), expected_directories=3, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) for item in dir_duplicates.path.iterdir(): assert item.is_file( ) and item in env.refresh_tester.plot_manager.get_duplicates() # Drop the duplicated plot we remove in the next test case from the test directory upfront so that the numbers match # the expected below drop_path = dir_duplicates.plots[0] dir_duplicates.drop(drop_path) # Delete one duplicated plot await run_test_case( trigger=trigger_remove_plot, test_path=drop_path, expect_loaded=[], expect_removed=[drop_path], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(dir_duplicates), expected_directories=3, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Drop the duplicated plot we remove in the next test case from the test directory upfront so that the numbers match # the expected below drop_path = env.dir_1.plots[0] env.dir_1.drop(drop_path) # Delete one duplicated plot await run_test_case( trigger=trigger_remove_plot, test_path=drop_path, expect_loaded=[], expect_removed=[drop_path], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(dir_duplicates), expected_directories=3, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Remove directory with the duplicates await run_test_case( trigger=remove_plot_directory, test_path=dir_duplicates.path, expect_loaded=[], expect_removed=dir_duplicates.path_list(), expect_processed=len(env.dir_1) + len(env.dir_2), expect_duplicates=0, expected_directories=2, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) for item in dir_duplicates.path.iterdir(): assert item.is_file( ) and item not in env.refresh_tester.plot_manager.get_duplicates() # Re-add the directory with the duplicates for other tests await run_test_case( trigger=add_plot_directory, test_path=dir_duplicates.path, expect_loaded=[], expect_removed=[], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(dir_duplicates), expected_directories=3, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Remove dir_1 from which the duplicated plots are loaded. This removes the duplicates of dir_1 # and in the same run loads them from dir_duplicates. await run_test_case( trigger=remove_plot_directory, test_path=env.dir_1.path, expect_loaded=dir_duplicates.plot_info_list(), expect_removed=env.dir_1.path_list(), expect_processed=len(env.dir_2) + len(dir_duplicates), expect_duplicates=0, expected_directories=2, expect_total_plots=len(env.dir_2) + len(dir_duplicates), ) # Re-add the directory. Now the plot seen as duplicate is from dir_1, not from dir_duplicates like before await run_test_case( trigger=add_plot_directory, test_path=env.dir_1.path, expect_loaded=[], expect_removed=[], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(dir_duplicates), expected_directories=3, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Drop the duplicated plot we remove in the next test case from the test directory upfront so that the numbers match # the expected below drop_path = env.dir_1.plots[2] env.dir_1.drop(drop_path) # Remove the duplicated plot await run_test_case( trigger=trigger_remove_plot, test_path=drop_path, expect_loaded=[], expect_removed=[drop_path], expect_processed=len(env.dir_1) + len(env.dir_2) + len(dir_duplicates), expect_duplicates=len(env.dir_1), expected_directories=3, expect_total_plots=len(env.dir_2) + len(dir_duplicates), ) # Remove dir_duplicates, this drops the duplicates and loads all plots from dir_1 await run_test_case( trigger=remove_plot_directory, test_path=dir_duplicates.path, expect_loaded=env.dir_1.plot_info_list(), expect_removed=dir_duplicates.path_list(), expect_processed=len(env.dir_1) + len(env.dir_2), expect_duplicates=0, expected_directories=2, expect_total_plots=len(env.dir_1) + len(env.dir_2), ) # Remove dir_2 await run_test_case( trigger=remove_plot_directory, test_path=env.dir_2.path, expect_loaded=[], expect_removed=env.dir_2.path_list(), expect_processed=len(env.dir_1), expect_duplicates=0, expected_directories=1, expect_total_plots=len(env.dir_1), ) # Remove dir_1 await run_test_case( trigger=remove_plot_directory, test_path=env.dir_1.path, expect_loaded=[], expect_removed=env.dir_1.path_list(), expect_processed=0, expect_duplicates=0, expected_directories=0, expect_total_plots=0, )
async def test1(self, simulation): test_rpc_port = uint16(21522) test_rpc_port_2 = uint16(21523) harvester, farmer_api = simulation def stop_node_cb(): pass def stop_node_cb_2(): pass config = bt.config hostname = config["self_hostname"] daemon_port = config["daemon_port"] farmer_rpc_api = FarmerRpcApi(farmer_api.farmer) harvester_rpc_api = HarvesterRpcApi(harvester) rpc_cleanup = await start_rpc_server( farmer_rpc_api, hostname, daemon_port, test_rpc_port, stop_node_cb, bt.root_path, config, connect_to_daemon=False, ) rpc_cleanup_2 = await start_rpc_server( harvester_rpc_api, hostname, daemon_port, test_rpc_port_2, stop_node_cb_2, bt.root_path, config, connect_to_daemon=False, ) try: client = await FarmerRpcClient.create(self_hostname, test_rpc_port, bt.root_path, config) client_2 = await HarvesterRpcClient.create(self_hostname, test_rpc_port_2, bt.root_path, config) async def have_connections(): return len(await client.get_connections()) > 0 await time_out_assert(15, have_connections, True) assert (await client.get_signage_point(std_hash(b"2"))) is None assert len(await client.get_signage_points()) == 0 async def have_signage_points(): return len(await client.get_signage_points()) > 0 sp = farmer_protocol.NewSignagePoint( std_hash(b"1"), std_hash(b"2"), std_hash(b"3"), uint64(1), uint64(1000000), uint8(2) ) await farmer_api.new_signage_point(sp) await time_out_assert(5, have_signage_points, True) assert (await client.get_signage_point(std_hash(b"2"))) is not None async def have_plots(): return len((await client_2.get_plots())["plots"]) > 0 await time_out_assert(5, have_plots, True) res = await client_2.get_plots() num_plots = len(res["plots"]) assert num_plots > 0 plot_dir = get_plot_dir() / "subdir" plot_dir.mkdir(parents=True, exist_ok=True) plot_dir_sub = get_plot_dir() / "subdir" / "subsubdir" plot_dir_sub.mkdir(parents=True, exist_ok=True) plotter = DiskPlotter() filename = "test_farmer_harvester_rpc_plot.plot" filename_2 = "test_farmer_harvester_rpc_plot2.plot" plotter.create_plot_disk( str(plot_dir), str(plot_dir), str(plot_dir), filename, 18, stream_plot_info_pk(bt.pool_pk, bt.farmer_pk, AugSchemeMPL.key_gen(bytes([4] * 32))), token_bytes(32), 128, 0, 2000, 0, False, ) # Making a plot with a puzzle hash encoded into it instead of pk plot_id_2 = token_bytes(32) plotter.create_plot_disk( str(plot_dir), str(plot_dir), str(plot_dir), filename_2, 18, stream_plot_info_ph(std_hash(b"random ph"), bt.farmer_pk, AugSchemeMPL.key_gen(bytes([5] * 32))), plot_id_2, 128, 0, 2000, 0, False, ) # Making the same plot, in a different dir. This should not be farmed plotter.create_plot_disk( str(plot_dir_sub), str(plot_dir_sub), str(plot_dir_sub), filename_2, 18, stream_plot_info_ph(std_hash(b"random ph"), bt.farmer_pk, AugSchemeMPL.key_gen(bytes([5] * 32))), plot_id_2, 128, 0, 2000, 0, False, ) res_2 = await client_2.get_plots() assert len(res_2["plots"]) == num_plots # Test farmer get_harvesters async def test_get_harvesters(): farmer_res = await client.get_harvesters() if len(list(farmer_res["harvesters"])) != 1: return False if len(list(farmer_res["harvesters"][0]["plots"])) != num_plots: return False return True await time_out_assert(30, test_get_harvesters) expected_result: PlotRefreshResult = PlotRefreshResult() def test_refresh_callback(refresh_result: PlotRefreshResult): assert refresh_result.loaded_plots == expected_result.loaded_plots assert refresh_result.removed_plots == expected_result.removed_plots assert refresh_result.processed_files == expected_result.processed_files assert refresh_result.remaining_files == expected_result.remaining_files harvester.plot_manager.set_refresh_callback(test_refresh_callback) async def test_case( trigger, expect_loaded, expect_removed, expect_processed, expected_directories, expect_total_plots ): expected_result.loaded_plots = expect_loaded expected_result.removed_plots = expect_removed expected_result.processed_files = expect_processed await trigger harvester.plot_manager.trigger_refresh() assert len(await client_2.get_plot_directories()) == expected_directories await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False) result = await client_2.get_plots() assert len(result["plots"]) == expect_total_plots assert len(harvester.plot_manager.cache) == expect_total_plots assert len(harvester.plot_manager.failed_to_open_filenames) == 0 # Add plot_dir with two new plots await test_case( client_2.add_plot_directory(str(plot_dir)), expect_loaded=2, expect_removed=0, expect_processed=2, expected_directories=2, expect_total_plots=num_plots + 2, ) # Add plot_dir_sub with one duplicate await test_case( client_2.add_plot_directory(str(plot_dir_sub)), expect_loaded=0, expect_removed=0, expect_processed=1, expected_directories=3, expect_total_plots=num_plots + 2, ) # Delete one plot await test_case( client_2.delete_plot(str(plot_dir / filename)), expect_loaded=0, expect_removed=1, expect_processed=0, expected_directories=3, expect_total_plots=num_plots + 1, ) # Remove directory with the duplicate await test_case( client_2.remove_plot_directory(str(plot_dir_sub)), expect_loaded=0, expect_removed=1, expect_processed=0, expected_directories=2, expect_total_plots=num_plots + 1, ) # Re-add the directory with the duplicate for other tests await test_case( client_2.add_plot_directory(str(plot_dir_sub)), expect_loaded=0, expect_removed=0, expect_processed=1, expected_directories=3, expect_total_plots=num_plots + 1, ) # Remove the directory which has the duplicated plot loaded. This removes the duplicated plot from plot_dir # and in the same run loads the plot from plot_dir_sub which is not longer seen as duplicate. await test_case( client_2.remove_plot_directory(str(plot_dir)), expect_loaded=1, expect_removed=1, expect_processed=1, expected_directories=2, expect_total_plots=num_plots + 1, ) # Re-add the directory now the plot seen as duplicate is from plot_dir, not from plot_dir_sub like before await test_case( client_2.add_plot_directory(str(plot_dir)), expect_loaded=0, expect_removed=0, expect_processed=1, expected_directories=3, expect_total_plots=num_plots + 1, ) # Remove the duplicated plot await test_case( client_2.delete_plot(str(plot_dir / filename_2)), expect_loaded=0, expect_removed=1, expect_processed=0, expected_directories=3, expect_total_plots=num_plots + 1, ) # Remove the directory with the loaded plot which is not longer a duplicate await test_case( client_2.remove_plot_directory(str(plot_dir_sub)), expect_loaded=0, expect_removed=1, expect_processed=0, expected_directories=2, expect_total_plots=num_plots, ) # Remove the directory which contains all other plots await test_case( client_2.remove_plot_directory(str(get_plot_dir())), expect_loaded=0, expect_removed=20, expect_processed=0, expected_directories=1, expect_total_plots=0, ) # Recover the plots to test caching # First make sure cache gets written if required and new plots are loaded await test_case( client_2.add_plot_directory(str(get_plot_dir())), expect_loaded=20, expect_removed=0, expect_processed=20, expected_directories=2, expect_total_plots=20, ) assert harvester.plot_manager.cache.path().exists() unlink(harvester.plot_manager.cache.path()) # Should not write the cache again on shutdown because it didn't change assert not harvester.plot_manager.cache.path().exists() harvester.plot_manager.stop_refreshing() assert not harvester.plot_manager.cache.path().exists() # Manually trigger `save_cache` and make sure it creates a new cache file harvester.plot_manager.cache.save() assert harvester.plot_manager.cache.path().exists() expected_result.loaded_plots = 20 expected_result.removed_plots = 0 expected_result.processed_files = 20 expected_result.remaining_files = 0 plot_manager: PlotManager = PlotManager(harvester.root_path, test_refresh_callback) plot_manager.start_refreshing() assert len(harvester.plot_manager.cache) == len(plot_manager.cache) await time_out_assert(5, plot_manager.needs_refresh, value=False) for path, plot_info in harvester.plot_manager.plots.items(): assert path in plot_manager.plots assert plot_manager.plots[path].prover.get_filename() == plot_info.prover.get_filename() assert plot_manager.plots[path].prover.get_id() == plot_info.prover.get_id() assert plot_manager.plots[path].prover.get_memo() == plot_info.prover.get_memo() assert plot_manager.plots[path].prover.get_size() == plot_info.prover.get_size() assert plot_manager.plots[path].pool_public_key == plot_info.pool_public_key assert plot_manager.plots[path].pool_contract_puzzle_hash == plot_info.pool_contract_puzzle_hash assert plot_manager.plots[path].plot_public_key == plot_info.plot_public_key assert plot_manager.plots[path].file_size == plot_info.file_size assert plot_manager.plots[path].time_modified == plot_info.time_modified assert harvester.plot_manager.plot_filename_paths == plot_manager.plot_filename_paths assert harvester.plot_manager.failed_to_open_filenames == plot_manager.failed_to_open_filenames assert harvester.plot_manager.no_key_filenames == plot_manager.no_key_filenames plot_manager.stop_refreshing() # Modify the content of the plot_manager.dat with open(harvester.plot_manager.cache.path(), "r+b") as file: file.write(b"\xff\xff") # Sets Cache.version to 65535 # Make sure it just loads the plots normally if it fails to load the cache plot_manager = PlotManager(harvester.root_path, test_refresh_callback) plot_manager.cache.load() assert len(plot_manager.cache) == 0 plot_manager.set_public_keys( harvester.plot_manager.farmer_public_keys, harvester.plot_manager.pool_public_keys ) expected_result.loaded_plots = 20 expected_result.removed_plots = 0 expected_result.processed_files = 20 expected_result.remaining_files = 0 plot_manager.start_refreshing() await time_out_assert(5, plot_manager.needs_refresh, value=False) assert len(plot_manager.plots) == len(harvester.plot_manager.plots) plot_manager.stop_refreshing() # Test re-trying if processing a plot failed # First save the plot retry_test_plot = Path(plot_dir_sub / filename_2).resolve() retry_test_plot_save = Path(plot_dir_sub / "save").resolve() copy(retry_test_plot, retry_test_plot_save) # Invalidate the plot with open(plot_dir_sub / filename_2, "r+b") as file: file.write(bytes(100)) # Add it and validate it fails to load await harvester.add_plot_directory(str(plot_dir_sub)) expected_result.loaded_plots = 0 expected_result.removed_plots = 0 expected_result.processed_files = 1 expected_result.remaining_files = 0 harvester.plot_manager.start_refreshing() await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False) assert retry_test_plot in harvester.plot_manager.failed_to_open_filenames # Make sure the file stays in `failed_to_open_filenames` and doesn't get loaded or processed in the next # update round expected_result.loaded_plots = 0 expected_result.processed_files = 0 harvester.plot_manager.trigger_refresh() await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False) assert retry_test_plot in harvester.plot_manager.failed_to_open_filenames # Now decrease the re-try timeout, restore the valid plot file and make sure it properly loads now harvester.plot_manager.refresh_parameter.retry_invalid_seconds = 0 move(retry_test_plot_save, retry_test_plot) expected_result.loaded_plots = 1 expected_result.processed_files = 1 harvester.plot_manager.trigger_refresh() await time_out_assert(5, harvester.plot_manager.needs_refresh, value=False) assert retry_test_plot not in harvester.plot_manager.failed_to_open_filenames targets_1 = await client.get_reward_targets(False) assert "have_pool_sk" not in targets_1 assert "have_farmer_sk" not in targets_1 targets_2 = await client.get_reward_targets(True) assert targets_2["have_pool_sk"] and targets_2["have_farmer_sk"] new_ph: bytes32 = create_puzzlehash_for_pk(master_sk_to_wallet_sk(bt.farmer_master_sk, uint32(10)).get_g1()) new_ph_2: bytes32 = create_puzzlehash_for_pk( master_sk_to_wallet_sk(bt.pool_master_sk, uint32(472)).get_g1() ) await client.set_reward_targets(encode_puzzle_hash(new_ph, "xch"), encode_puzzle_hash(new_ph_2, "xch")) targets_3 = await client.get_reward_targets(True) assert decode_puzzle_hash(targets_3["farmer_target"]) == new_ph assert decode_puzzle_hash(targets_3["pool_target"]) == new_ph_2 assert targets_3["have_pool_sk"] and targets_3["have_farmer_sk"] new_ph_3: bytes32 = create_puzzlehash_for_pk( master_sk_to_wallet_sk(bt.pool_master_sk, uint32(1888)).get_g1() ) await client.set_reward_targets(None, encode_puzzle_hash(new_ph_3, "xch")) targets_4 = await client.get_reward_targets(True) assert decode_puzzle_hash(targets_4["farmer_target"]) == new_ph assert decode_puzzle_hash(targets_4["pool_target"]) == new_ph_3 assert not targets_4["have_pool_sk"] and targets_3["have_farmer_sk"] root_path = farmer_api.farmer._root_path config = load_config(root_path, "config.yaml") assert config["farmer"]["xch_target_address"] == encode_puzzle_hash(new_ph, "xch") assert config["pool"]["xch_target_address"] == encode_puzzle_hash(new_ph_3, "xch") new_ph_3_encoded = encode_puzzle_hash(new_ph_3, "xch") added_char = new_ph_3_encoded + "a" with pytest.raises(ValueError): await client.set_reward_targets(None, added_char) replaced_char = new_ph_3_encoded[0:-1] + "a" with pytest.raises(ValueError): await client.set_reward_targets(None, replaced_char) assert len((await client.get_pool_state())["pool_state"]) == 0 all_sks = farmer_api.farmer.local_keychain.get_all_private_keys() auth_sk = master_sk_to_pooling_authentication_sk(all_sks[0][0], 2, 1) pool_list = [ { "launcher_id": "ae4ef3b9bfe68949691281a015a9c16630fc8f66d48c19ca548fb80768791afa", "authentication_public_key": bytes(auth_sk.get_g1()).hex(), "owner_public_key": "84c3fcf9d5581c1ddc702cb0f3b4a06043303b334dd993ab42b2c320ebfa98e5ce558448615b3f69638ba92cf7f43da5", "payout_instructions": "c2b08e41d766da4116e388357ed957d04ad754623a915f3fd65188a8746cf3e8", "pool_url": "localhost", "p2_singleton_puzzle_hash": "16e4bac26558d315cded63d4c5860e98deb447cc59146dd4de06ce7394b14f17", "target_puzzle_hash": "344587cf06a39db471d2cc027504e8688a0a67cce961253500c956c73603fd58", } ] config["pool"]["pool_list"] = pool_list save_config(root_path, "config.yaml", config) await farmer_api.farmer.update_pool_state() pool_state = (await client.get_pool_state())["pool_state"] assert len(pool_state) == 1 assert ( pool_state[0]["pool_config"]["payout_instructions"] == "c2b08e41d766da4116e388357ed957d04ad754623a915f3fd65188a8746cf3e8" ) await client.set_payout_instructions(hexstr_to_bytes(pool_state[0]["pool_config"]["launcher_id"]), "1234vy") await farmer_api.farmer.update_pool_state() pool_state = (await client.get_pool_state())["pool_state"] assert pool_state[0]["pool_config"]["payout_instructions"] == "1234vy" finally: # Checks that the RPC manages to stop the node client.close() client_2.close() await client.await_closed() await client_2.await_closed() await rpc_cleanup() await rpc_cleanup_2()
def refresh_batch(self) -> PlotRefreshResult: start_time: float = time.time() plot_filenames: Dict[Path, List[Path]] = get_plot_filenames(self.root_path) all_filenames: List[Path] = [] for paths in plot_filenames.values(): all_filenames += paths result: PlotRefreshResult = PlotRefreshResult() counter_lock = threading.Lock() log.debug(f"refresh_batch: {len(all_filenames)} files in directories {get_plot_directories(self.root_path)}") if self.match_str is not None: log.info(f'Only loading plots that contain "{self.match_str}" in the file or directory name') def process_file(file_path: Path) -> Dict: new_provers: Dict[Path, PlotInfo] = {} filename_str = str(file_path) if self.match_str is not None and self.match_str not in filename_str: return new_provers if file_path.exists(): if ( file_path in self.failed_to_open_filenames and (time.time() - self.failed_to_open_filenames[file_path]) > 1200 ): # Try once every 20 minutes to open the file return new_provers if file_path in self.plots: try: stat_info = file_path.stat() except Exception as e: log.error(f"Failed to open file {file_path}. {e}") return new_provers if stat_info.st_mtime == self.plots[file_path].time_modified: new_provers[file_path] = self.plots[file_path] return new_provers entry: Optional[Tuple[str, Set[str]]] = self.plot_filename_paths.get(file_path.name) if entry is not None: loaded_parent, duplicates = entry if str(file_path.parent) in duplicates: log.debug(f"Skip duplicated plot {str(file_path)}") return new_provers try: with counter_lock: if result.processed_files >= self.refresh_parameter.batch_size: result.remaining_files += 1 return new_provers result.processed_files += 1 prover = DiskProver(str(file_path)) log.debug(f"process_file {str(file_path)}") expected_size = _expected_plot_size(prover.get_size()) * UI_ACTUAL_SPACE_CONSTANT_FACTOR stat_info = file_path.stat() # TODO: consider checking if the file was just written to (which would mean that the file is still # being copied). A segfault might happen in this edge case. if prover.get_size() >= 30 and stat_info.st_size < 0.98 * expected_size: log.warning( f"Not farming plot {file_path}. Size is {stat_info.st_size / (1024**3)} GiB, but expected" f" at least: {expected_size / (1024 ** 3)} GiB. We assume the file is being copied." ) return new_provers ( pool_public_key_or_puzzle_hash, farmer_public_key, local_master_sk, ) = parse_plot_info(prover.get_memo()) # Only use plots that correct keys associated with them if self.farmer_public_keys is not None and farmer_public_key not in self.farmer_public_keys: log.warning(f"Plot {file_path} has a farmer public key that is not in the farmer's pk list.") self.no_key_filenames.add(file_path) if not self.open_no_key_filenames: return new_provers if isinstance(pool_public_key_or_puzzle_hash, G1Element): pool_public_key = pool_public_key_or_puzzle_hash pool_contract_puzzle_hash = None else: assert isinstance(pool_public_key_or_puzzle_hash, bytes32) pool_public_key = None pool_contract_puzzle_hash = pool_public_key_or_puzzle_hash if ( self.pool_public_keys is not None and pool_public_key is not None and pool_public_key not in self.pool_public_keys ): log.warning(f"Plot {file_path} has a pool public key that is not in the farmer's pool pk list.") self.no_key_filenames.add(file_path) if not self.open_no_key_filenames: return new_provers stat_info = file_path.stat() local_sk = master_sk_to_local_sk(local_master_sk) plot_public_key: G1Element = ProofOfSpace.generate_plot_public_key( local_sk.get_g1(), farmer_public_key, pool_contract_puzzle_hash is not None ) with self.plot_filename_paths_lock: if file_path.name not in self.plot_filename_paths: self.plot_filename_paths[file_path.name] = (str(Path(prover.get_filename()).parent), set()) else: self.plot_filename_paths[file_path.name][1].add(str(Path(prover.get_filename()).parent)) if len(self.plot_filename_paths[file_path.name][1]) > 0: log.warning( f"Have multiple copies of the plot {file_path} in " f"{self.plot_filename_paths[file_path.name][1]}." ) return new_provers new_provers[file_path] = PlotInfo( prover, pool_public_key, pool_contract_puzzle_hash, plot_public_key, stat_info.st_size, stat_info.st_mtime, ) with counter_lock: result.loaded_plots += 1 result.loaded_size += stat_info.st_size except Exception as e: tb = traceback.format_exc() log.error(f"Failed to open file {file_path}. {e} {tb}") self.failed_to_open_filenames[file_path] = int(time.time()) return new_provers log.info(f"Found plot {file_path} of size {new_provers[file_path].prover.get_size()}") if self.show_memo: plot_memo: bytes32 if pool_contract_puzzle_hash is None: plot_memo = stream_plot_info_pk(pool_public_key, farmer_public_key, local_master_sk) else: plot_memo = stream_plot_info_ph(pool_contract_puzzle_hash, farmer_public_key, local_master_sk) plot_memo_str: str = plot_memo.hex() log.info(f"Memo: {plot_memo_str}") return new_provers return new_provers def reduce_function(x: Dict, y: Dict) -> Dict: return {**x, **y} with self, ThreadPoolExecutor() as executor: # First drop all plots we have in plot_filename_paths but not longer in the filesystem or set in config def plot_removed(test_path: Path): return not test_path.exists() or test_path.parent not in plot_filenames with self.plot_filename_paths_lock: filenames_to_remove: List[str] = [] for plot_filename, paths_entry in self.plot_filename_paths.items(): loaded_path, duplicated_paths = paths_entry if plot_removed(Path(loaded_path) / Path(plot_filename)): filenames_to_remove.append(plot_filename) result.removed_plots += 1 # No need to check the duplicates here since we drop the whole entry continue paths_to_remove: List[str] = [] for path in duplicated_paths: if plot_removed(Path(path) / Path(plot_filename)): paths_to_remove.append(path) result.removed_plots += 1 for path in paths_to_remove: duplicated_paths.remove(path) for filename in filenames_to_remove: del self.plot_filename_paths[filename] initial_value: Dict[Path, PlotInfo] = {} self.plots = reduce(reduce_function, executor.map(process_file, all_filenames), initial_value) result.duration = time.time() - start_time self.log.debug( f"refresh_batch: loaded_plots {result.loaded_plots}, " f"loaded_size {result.loaded_size / (1024 ** 4):.2f} TiB, " f"removed_plots {result.removed_plots}, processed_plots {result.processed_files}, " f"remaining_plots {result.remaining_files}, batch_size {self.refresh_parameter.batch_size}, " f"duration: {result.duration:.2f} seconds" ) return result