def initialize(self, resources: Resources, configs: Config): super().initialize(resources, configs) assert configs.output_path is not None assert configs.system_name is not None utils_io.ensure_dir(configs.output_path) self._tbf_out = open(configs.output_path, 'w')
def initialize(self, resources: Resources, configs: Config): # pylint: disable=attribute-defined-outside-init super().initialize(resources, configs) pack_index = os.path.join(self.configs.output_dir, self.pack_idx) ensure_dir(pack_index) self.pack_idx_out = open(pack_index, 'w') multi_index = os.path.join(self.configs.output_dir, self.multi_idx) ensure_dir(multi_index) self.multi_idx_out = open(multi_index, 'w')
def write_pack(input_pack: BasePack, output_dir: str, sub_path: str, indent: Optional[int] = None, zip_pack: bool = False, overwrite: bool = False, drop_record: bool = False) -> str: """ Write a pack to a path. Args: input_pack: A Pack to be written. output_dir: The output directory. sub_path: The file name for this pack. indent: Whether to format JSON with an indent. zip_pack: Whether to zip the output JSON. overwrite: Whether to overwrite the file if already exists. drop_record: Whether to drop the creation records in the serialization. Returns: If successfully written, will return the path of the output file. otherwise, will return None. """ output_path = os.path.join(output_dir, sub_path) + '.json' if overwrite or not os.path.exists(output_path): if zip_pack: output_path = output_path + '.gz' ensure_dir(output_path) out_str: str = input_pack.serialize(drop_record) if indent: out_str = json.dumps(json.loads(out_str), indent=indent) if zip_pack: with gzip.open(output_path, 'wt') as out: out.write(out_str) else: with open(output_path, 'w') as out: out.write(out_str) else: logging.info("Will not overwrite existing path %s", output_path) logging.info("Writing a pack to %s", output_path) return output_path
def write_pack( input_pack: BasePack, output_dir: str, sub_path: str, indent: Optional[int] = None, zip_pack: bool = False, overwrite: bool = False, drop_record: bool = False, serialize_method: str = "jsonpickle", ) -> str: """ Write a pack to a path. Args: input_pack: A Pack to be written. output_dir: The output directory. sub_path: The file name for this pack. indent: Whether to format JSON with an indent. zip_pack: Whether to zip the output JSON. overwrite: Whether to overwrite the file if already exists. drop_record: Whether to drop the creation records in the serialization. serialize_method: The method used to serialize the data. Current available options are "jsonpickle" and "pickle". Default is "jsonpickle". Returns: If successfully written, will return the path of the output file. otherwise, will return None. """ output_path = os.path.join(output_dir, sub_path) if overwrite or not os.path.exists(output_path): ensure_dir(output_path) input_pack.serialize( output_path, zip_pack=zip_pack, drop_record=drop_record, serialize_method=serialize_method, indent=indent, ) else: logging.info("Will not overwrite existing path %s", output_path) logging.info("Writing a pack to %s", output_path) return output_path
def initialize(self, resources: Resources, configs: Config): # pylint: disable=attribute-defined-outside-init,consider-using-with super().initialize(resources, configs) if self.configs.output_dir is None: raise ProcessorConfigError( "`output_dir` is not specified for the writer.") pack_paths = os.path.join(self.configs.output_dir, self.pack_idx) ensure_dir(pack_paths) self.pack_idx_out = open(pack_paths, "w", encoding="utf-8") multi_index = os.path.join(self.configs.output_dir, self.multi_idx) ensure_dir(multi_index) self.multi_idx_out = open(multi_index, "w", encoding="utf-8") if self.configs.serialize_method == "jsonpickle": self._suffix = ".json.gz" if self.configs.zip_pack else ".json" else: self._suffix = ".pickle.gz" if self.configs.zip_pack else ".pickle"