def _process(self, input_pack: DataPack): sub_path = self.sub_output_path(input_pack) if sub_path == '': raise ValueError("No concrete path provided from sub_output_path.") maybe_create_dir(self.configs.output_dir) write_pack(input_pack, self.configs.output_dir, sub_path, self.configs.indent, self.configs.zip_pack, self.configs.overwrite)
def _process(self, input_pack: DataPack): sub_path = self.sub_output_path(input_pack) if sub_path is not None and not sub_path == "": # Sub path could be empty, which we will skip writing the file. maybe_create_dir(self.configs.output_dir) write_pack( input_pack, self.configs.output_dir, sub_path, self.configs.indent, self.configs.zip_pack, self.configs.overwrite, self.configs.drop_record, )
def _process(self, input_pack: PackType): sub_path = self.sub_output_path(input_pack) if sub_path == '': raise ValueError( "No concrete path provided from sub_output_path.") maybe_create_dir(self.root_output_dir) p = os.path.join(self.root_output_dir, sub_path) if self.zip_pack: with gzip.open(p + '.gz', 'wt') as out: out.write(input_pack.serialize()) else: with open(p, 'w') as out: out.write(input_pack.serialize())
def maybe_download(urls, path, filenames=None, extract=False): r"""Downloads a set of files. Args: urls: A (list of) URLs to download files. path (str): The destination path to save the files. filenames: A (list of) strings of the file names. If given, must have the same length with :attr:`urls`. If `None`, filenames are extracted from :attr:`urls`. extract (bool): Whether to extract compressed files. Returns: A list of paths to the downloaded files. """ maybe_create_dir(path) if not isinstance(urls, (list, tuple)): is_list = False urls = [urls] else: is_list = True if filenames is not None: if not isinstance(filenames, (list, tuple)): filenames = [filenames] if len(urls) != len(filenames): raise ValueError( '`filenames` must have the same number of elements as `urls`.') result = [] for i, url in enumerate(urls): if filenames is not None: filename = filenames[i] elif 'drive.google.com' in url: filename = _extract_google_drive_file_id(url) else: filename = url.split('/')[-1] # If downloading from GitHub, remove suffix ?raw=True # from local filename if filename.endswith("?raw=true"): filename = filename[:-9] filepath = os.path.join(path, filename) result.append(filepath) # if not tf.gfile.Exists(filepath): if not os.path.exists(filepath): if 'drive.google.com' in url: filepath = _download_from_google_drive(url, filename, path) else: filepath = _download(url, filename, path) if extract: logging.info('Extract %s', filepath) if tarfile.is_tarfile(filepath): tarfile.open(filepath, 'r').extractall(path) elif zipfile.is_zipfile(filepath): with zipfile.ZipFile(filepath) as zfile: zfile.extractall(path) else: logging.info("Unknown compression type. Only .tar.gz" ".tar.bz2, .tar, and .zip are supported") if not is_list: return result[0] return result