Пример #1
0
def main(output, datasets, checksum):
    logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s",
                        level=logging.INFO)

    for dataset in datasets:
        (mode, ino, dev, nlink, uid, gid, size, atime, mtime,
         ctime) = os.stat(dataset)
        path = Path(dataset)
        if path.is_dir():
            path = Path(path.joinpath(path))
        elif path.suffix not in [".xml", ".zip"]:
            raise RuntimeError("want xml or zipped archive")
        logging.info("Processing %s", path)
        output_path = Path(output)
        yaml_path = output_path.joinpath(path.name + ".yaml")
        logging.info("Output %s", yaml_path)
        if os.path.exists(yaml_path):
            logging.info("Output already exists %s", yaml_path)
            with open(yaml_path) as f:
                if checksum:
                    logging.info("Running checksum comparison")
                    datamap = yaml.safe_load_all(f)
                    for data in datamap:
                        yaml_sha1 = data["checksum_sha1"]
                        checksum_sha1 = hashlib.sha1(open(
                            path, "rb").read()).hexdigest()
                    if checksum_sha1 == yaml_sha1:
                        logging.info(
                            "Dataset preparation already done...SKIPPING")
                        continue
                else:
                    logging.info("Dataset preparation already done...SKIPPING")
                    continue

        documents = prepare_dataset(path)
        if documents:
            logging.info("Writing %s dataset(s) into %s", len(documents),
                         yaml_path)
            with open(yaml_path, "w") as stream:
                yaml.safe_dump_all(documents, stream)
        else:
            logging.info("No datasets discovered. Bye!")
Пример #2
0
def dump_all(documents, stream=None, **kwargs):
  """Dumps multiple YAML documents to the stream.

  Args:
    documents: An iterable of YAML serializable Python objects to dump.
    stream: The stream to write the data to or None to return it as a string.
    **kwargs: Other arguments to the dump method.

  Returns:
    The string representation of the YAML data if stream is None.
  """
  return yaml.safe_dump_all(
      documents, stream=stream, default_flow_style=False, indent=2, **kwargs)
Пример #3
0
def dump_all(documents, stream=None, **kwargs):
  # type: (Iterable[Any], Optional[IO[AnyStr]], Any) -> str
  """Dumps multiple YAML documents to the stream.

  Args:
    documents: An iterable of YAML serializable Python objects to dump.
    stream: The stream to write the data to or None to return it as a string.
    **kwargs: Other arguments to the dump method.

  Returns:
    The string representation of the YAML data if stream is None.
  """
  return yaml.safe_dump_all(
      documents, stream=stream, default_flow_style=False, indent=2, **kwargs)
Пример #4
0
def dump_all(documents, stream=None, **kwargs):
    # type: (typing.Iterable[typing.Any], typing.Optional[typing.IO[typing.AnyStr]], typing.Any) -> str  # pylint: disable=line-too-long
    """Dumps multiple YAML documents to the stream.

  Args:
    documents: An iterable of YAML serializable Python objects to dump.
    stream: The stream to write the data to or None to return it as a string.
    **kwargs: Other arguments to the dump method.

  Returns:
    The string representation of the YAML data if stream is None.
  """
    return yaml.safe_dump_all(documents,
                              stream=stream,
                              default_flow_style=False,
                              indent=2,
                              **kwargs)