def main(output, datasets, checksum): logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO) for dataset in datasets: (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(dataset) path = Path(dataset) if path.is_dir(): path = Path(path.joinpath(path)) elif path.suffix not in [".xml", ".zip"]: raise RuntimeError("want xml or zipped archive") logging.info("Processing %s", path) output_path = Path(output) yaml_path = output_path.joinpath(path.name + ".yaml") logging.info("Output %s", yaml_path) if os.path.exists(yaml_path): logging.info("Output already exists %s", yaml_path) with open(yaml_path) as f: if checksum: logging.info("Running checksum comparison") datamap = yaml.safe_load_all(f) for data in datamap: yaml_sha1 = data["checksum_sha1"] checksum_sha1 = hashlib.sha1(open( path, "rb").read()).hexdigest() if checksum_sha1 == yaml_sha1: logging.info( "Dataset preparation already done...SKIPPING") continue else: logging.info("Dataset preparation already done...SKIPPING") continue documents = prepare_dataset(path) if documents: logging.info("Writing %s dataset(s) into %s", len(documents), yaml_path) with open(yaml_path, "w") as stream: yaml.safe_dump_all(documents, stream) else: logging.info("No datasets discovered. Bye!")
def dump_all(documents, stream=None, **kwargs): """Dumps multiple YAML documents to the stream. Args: documents: An iterable of YAML serializable Python objects to dump. stream: The stream to write the data to or None to return it as a string. **kwargs: Other arguments to the dump method. Returns: The string representation of the YAML data if stream is None. """ return yaml.safe_dump_all( documents, stream=stream, default_flow_style=False, indent=2, **kwargs)
def dump_all(documents, stream=None, **kwargs): # type: (Iterable[Any], Optional[IO[AnyStr]], Any) -> str """Dumps multiple YAML documents to the stream. Args: documents: An iterable of YAML serializable Python objects to dump. stream: The stream to write the data to or None to return it as a string. **kwargs: Other arguments to the dump method. Returns: The string representation of the YAML data if stream is None. """ return yaml.safe_dump_all( documents, stream=stream, default_flow_style=False, indent=2, **kwargs)
def dump_all(documents, stream=None, **kwargs): # type: (typing.Iterable[typing.Any], typing.Optional[typing.IO[typing.AnyStr]], typing.Any) -> str # pylint: disable=line-too-long """Dumps multiple YAML documents to the stream. Args: documents: An iterable of YAML serializable Python objects to dump. stream: The stream to write the data to or None to return it as a string. **kwargs: Other arguments to the dump method. Returns: The string representation of the YAML data if stream is None. """ return yaml.safe_dump_all(documents, stream=stream, default_flow_style=False, indent=2, **kwargs)