def unminify( files: List[str], output_dir: Path, execution: str = "mp", parallelism: int = -1, cache_dir: Path = None, ): """Minify all the files in the given folder.""" if len(files) == 1 and Path(files[0]).is_dir(): folder = Path(files[0]) files = [str(f) for f in sorted(folder.glob("*.json.gz"))] print(f"Found {len(files)} files under {folder}/*.json.gz") assert len(files) > 0, "No files given." output_dir.mkdir(exist_ok=True) outputs = [output_dir / str(f).split("/")[-1] for f in files] if cache_dir is None: cache_dir = output_dir / "wet_cache" files = [f for f, o in zip(files, outputs) if not o.exists()] outputs = [o for o in outputs if not o.exists()] if not files: return ex = get_executor( "unminify", output_dir / "logs", execution, timeout_hour=8, cpus=1, task_parallelism=parallelism, mem_gb=32, ) ex(unminify_file, files, outputs, itertools.repeat(cache_dir))
def get_executor( self, name: str, timeout_hour: int = 1, mem_gb: int = 1, cpus: int = 1 ) -> Executor: name = "_".join((name, self.config_name, *self.experiments)) return execution.get_executor( name, self.output_dir / "logs", self.execution, timeout_hour=timeout_hour, mem_gb=mem_gb, cpus=cpus, task_parallelism=self.task_parallelism, )
def minify( files: List[Path], output_dir: Path, execution: str = "mp", parallelism: int = -1 ): """Minify all the files in the given folder.""" files = _expand_files(files) output_dir.mkdir(exist_ok=True) with open(output_dir / "files.txt", "w") as o: for f in files: print(f.name, file=o) outputs = [output_dir / f.name for f in files] ex = get_executor( "minify", output_dir / "logs", execution, timeout_hour=2, cpus=1, task_parallelism=parallelism, ) ex(minify_file, files, outputs)