def save_dataset(cls, dataset: Dataset, *, module=None, module_data=None) -> NoReturn: """ Save dataset :param dataset: dataset to save :param module: dataset module """ if module is None: module = dataset.module dataset_path = DatasetPath(dataset.dataset_id) dataset_path.makedirs() cls.__save_json(dataset_path.metadata_file, dataset.metadata) cls.__save_json(dataset_path.dataset_config_file, dataset.dataset_config) if module_data is not None: ZipUtils.extract_data(module_data, GflConf.temp_dir) ModuleUtils.migrate_module( PathUtils.join(GflConf.temp_dir, dataset.dataset_id), dataset_path.module_name, dataset_path.module_dir) elif module is not None: ModuleUtils.submit_module(module, dataset_path.module_name, dataset_path.module_dir) else: ModuleUtils.submit_module(dataset.module, dataset_path.module_name, dataset_path.module_dir)
def save_job(cls, job: Job, *, module=None, module_data=None) -> NoReturn: """ Save job :param job: job to save :param module: job module :param module_data: """ job_path = JobPath(job.job_id) job_path.makedirs() cls.__save_json(job_path.metadata_file, job.metadata) cls.__save_json(job_path.job_config_file, job.job_config) cls.__save_json(job_path.train_config_file, job.train_config) cls.__save_json(job_path.aggregate_config_file, job.aggregate_config) if module_data is not None: ZipUtils.extract_data(module_data, GflConf.temp_dir) ModuleUtils.migrate_module( PathUtils.join(GflConf.temp_dir, job.job_id), job_path.module_name, job_path.module_dir) elif module is not None: ModuleUtils.submit_module(module, job_path.module_name, job_path.module_dir) else: ModuleUtils.submit_module(job.module, job_path.module_name, job_path.module_dir)
def save_job_zip(cls, job_id: str, job: File) -> NoReturn: """ Save the zip file of the job. :param job_id: job ID :param job: zip file """ job_path = JobPath(job_id) if job.ipfs_hash is not None and job.ipfs_hash != "": file_obj = Ipfs.get(job.ipfs_hash) else: file_obj = File.file ZipUtils.extract(file_obj, job_path.root_dir)
def save_dataset_zip(cls, dataset_id: str, dataset: File) -> NoReturn: """ Save the zip file of the dataset. :param dataset_id: dataset ID :param dataset: zip file """ dataset_path = DatasetPath(dataset_id) if dataset.ipfs_hash is not None and dataset.ipfs_hash != "": file_obj = Ipfs.get(dataset.ipfs_hash) else: file_obj = dataset.file ZipUtils.extract(file_obj, dataset_path.root_dir)
def load_dataset_zip(cls, dataset_id: str) -> File: """ Load dataset from a ZIP file. :param dataset_id: dataset ID """ dataset_path = DatasetPath(dataset_id) file_obj = BytesIO() ZipUtils.compress(dataset_path.metadata_file, file_obj) if GflConf.get_property("ipfs.enabled"): file_obj.seek(0) ipfs_hash = Ipfs.put(file_obj.read()) return File(ipfs_hash=ipfs_hash, file=None) else: return File(ipfs_hash=None, file=file_obj)
def load_job_zip(job_id: str) -> File: """ Load job from a ZIP file. :param job_id: dataset ID """ job_path = JobPath(job_id) file_obj = BytesIO() ZipUtils.compress([job_path.metadata_file, job_path.config_dir], file_obj) if GflConf.get_property("ipfs.enabled"): file_obj.seek(0) ipfs_hash = Ipfs.put(file_obj.read()) return File(ipfs_hash == ipfs_hash) else: return File(file=file_obj)
def pack_dataset(dataset: Dataset): data = { "dataset_id": dataset.dataset_id, "metadata": dataset.metadata, "dataset_config": dataset.dataset_config } module_path = ModuleUtils.get_module_path(dataset.module) module_zip_data = ZipUtils.get_compress_data(module_path, dataset.dataset_id) return json.dumps(data).encode("utf-8"), module_zip_data
def pack_job(job: Job): data = { "job_id": job.job_id, "metadata": job.metadata.to_dict(), "job_config": job.job_config.to_dict(), "train_config": job.train_config.to_dict(), "aggregate_config": job.aggregate_config.to_dict() } module_path = ModuleUtils.get_module_path(job.module) module_zip_data = ZipUtils.get_compress_data(module_path, job.job_id) return json.dumps(data).encode("utf-8"), module_zip_data
from gfl.utils import ZipUtils if __name__ == "__main__": data = ZipUtils.get_compress_data("__init__.py", "") ZipUtils.extract_data(data, "t2")