def add_extra_filename_values(filename_format, rows, urls, dry_run): """Extend `rows` with values for special formatting fields. """ file_fields = list(get_fmt_names(filename_format)) if any(i.startswith("_url") for i in file_fields): for row, url in zip(rows, urls): row.update(get_url_parts(url)) if any(i.startswith("_url_filename") for i in file_fields): if dry_run: # Don't waste time making requests. dummy = get_file_parts("BASE.EXT", "_url_filename") for idx, row in enumerate(rows): row.update( {k: v + str(idx) for k, v in dummy.items()}) else: pbar = ui.get_progressbar(total=len(urls), label="Requesting names", unit=" Files") for row, url in zip(rows, urls): # If we run into any issues here, we're just going to raise an # exception and then abort inside dlplugin. It'd be good to # disentangle this from `extract` so that we could yield an # individual error, drop the row, and keep going. filename = get_url_filename(url) if filename: row.update(get_file_parts(filename, "_url_filename")) else: raise ValueError( "{} does not contain a filename".format(url)) pbar.update(1, increment=True) pbar.finish()
def add_extra_filename_values(filename_format, rows, urls, dry_run): """Extend `rows` with values for special formatting fields. """ file_fields = list(get_fmt_names(filename_format)) if any(i.startswith("_url") for i in file_fields): for row, url in zip(rows, urls): row.update(get_url_parts(url)) if any(i.startswith("_url_filename") for i in file_fields): if dry_run: # Don't waste time making requests. dummy = get_file_parts("BASE.EXT", "_url_filename") for idx, row in enumerate(rows): row.update( {k: v + str(idx) for k, v in dummy.items()}) else: num_urls = len(urls) log_progress(lgr.info, "addurls_requestnames", "Requesting file names for %d URLs", num_urls, label="Requesting names", total=num_urls, unit=" Files") for row, url in zip(rows, urls): # If we run into any issues here, we're just going to raise an # exception and then abort inside dlplugin. It'd be good to # disentangle this from `extract` so that we could yield an # individual error, drop the row, and keep going. filename = get_url_filename(url) if filename: row.update(get_file_parts(filename, "_url_filename")) else: raise ValueError( "{} does not contain a filename".format(url)) log_progress(lgr.info, "addurls_requestnames", "%s returned for %s", url, filename, update=1, increment=True) log_progress(lgr.info, "addurls_requestnames", "Finished requesting file names")