Exemplo n.º 1
0
    def __init__(self,
                 archive_format=ZIP,
                 prefix=None,
                 description=None,
                 predicate=None,
                 client_id=None):
        """CollectionArchiveGenerator constructor.

    Args:
      archive_format: May be ArchiveCollectionGenerator.ZIP or
          ArchiveCollectionGenerator.TAR_GZ. Defaults to ZIP.
      prefix: Name of the folder inside the archive that will contain all
          the generated data.
      description: String describing archive's contents. It will be included
          into the auto-generated MANIFEST file. Defaults to
          'Files archive collection'.
      predicate: If not None, only the files matching the predicate will be
          archived, all others will be skipped.
      client_id: The client_id to use when exporting a flow results collection.
    Raises:
      ValueError: if prefix is None.
    """
        super(CollectionArchiveGenerator, self).__init__()

        if archive_format == self.ZIP:
            self.archive_generator = utils.StreamingZipGenerator(
                compression=zipfile.ZIP_DEFLATED)
        elif archive_format == self.TAR_GZ:
            self.archive_generator = utils.StreamingTarGenerator()
        else:
            raise ValueError("Unknown archive format: %s" % archive_format)

        if not prefix:
            raise ValueError("Prefix can't be None.")
        self.prefix = prefix

        self.description = description or "Files archive collection"

        self.total_files = 0
        self.archived_files = 0
        self.ignored_files = []
        self.failed_files = []

        self.predicate = predicate or (lambda _: True)
        self.client_id = client_id
Exemplo n.º 2
0
    def _GenerateContent(self, start_urns, prefix, age, token=None):
        archive_generator = utils.StreamingZipGenerator(
            compression=zipfile.ZIP_DEFLATED)
        folders_urns = set(start_urns)

        while folders_urns:
            next_urns = set()
            for _, children in aff4.FACTORY.MultiListChildren(folders_urns):
                for urn in children:
                    next_urns.add(urn)

            download_fds = set()
            folders_urns = set()
            for fd in aff4.FACTORY.MultiOpen(next_urns, token=token):
                if isinstance(fd, aff4.AFF4Stream):
                    download_fds.add(fd)
                elif "Container" in fd.behaviours:
                    folders_urns.add(fd.urn)

            if download_fds:
                if age != aff4.NEWEST_TIME:
                    urns = [fd.urn for fd in download_fds]
                    # We need to reopen the files with the actual age
                    # requested. We can't do this in the call above since
                    # indexes are stored with the latest timestamp of an object
                    # only so adding the age above will potentially ignore
                    # some of the indexes.
                    download_fds = list(
                        aff4.FACTORY.MultiOpen(urns, age=age, token=token))

                for chunk in self._StreamFds(archive_generator,
                                             prefix,
                                             download_fds,
                                             token=token):
                    yield chunk

        yield archive_generator.Close()
Exemplo n.º 3
0
 def Start(self):
     self.archive_generator = utils.StreamingZipGenerator(
         compression=zipfile.ZIP_DEFLATED)
     self.export_counts = {}
     return []