def _GenerateContent(self, start_urns, prefix, age, token=None): archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) folders_urns = set(start_urns) while folders_urns: next_urns = set() for _, children in aff4.FACTORY.MultiListChildren(folders_urns): for urn in children: next_urns.add(urn) download_fds = set() folders_urns = set() for fd in aff4.FACTORY.MultiOpen(next_urns, token=token): if isinstance(fd, aff4.AFF4Stream): download_fds.add(fd) elif "Container" in fd.behaviours: folders_urns.add(fd.urn) if download_fds: if age != aff4.NEWEST_TIME: urns = [fd.urn for fd in download_fds] # We need to reopen the files with the actual age # requested. We can't do this in the call above since # indexes are stored with the latest timestamp of an object # only so adding the age above will potentially ignore # some of the indexes. download_fds = list( aff4.FACTORY.MultiOpen(urns, age=age, token=token)) for chunk in self._StreamFds( archive_generator, prefix, download_fds, token=token): yield chunk yield archive_generator.Close()
def _GenerateContent(self, client_id, start_paths, timestamp, path_prefix): client_paths = [] for start_path in start_paths: path_type, components = rdf_objects.ParseCategorizedPath( start_path) for pi in data_store.REL_DB.ListDescendantPathInfos( client_id, path_type, components): if pi.directory: continue client_paths.append(db.ClientPath.FromPathInfo(client_id, pi)) archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) for chunk in file_store.StreamFilesChunks(client_paths, max_timestamp=timestamp): if chunk.chunk_index == 0: content_path = os.path.join(path_prefix, chunk.client_path.vfs_path) # TODO(user): Export meaningful file metadata. st = os.stat_result( (0o644, 0, 0, 0, 0, 0, chunk.total_size, 0, 0, 0)) yield archive_generator.WriteFileHeader(content_path, st=st) yield archive_generator.WriteFileChunk(chunk.data) if chunk.chunk_index == chunk.total_chunks - 1: yield archive_generator.WriteFileFooter() yield archive_generator.Close()
def _GenerateArchive( self, args: ApiGetCollectedHuntTimelinesArgs, ) -> Iterator[bytes]: zipgen = utils.StreamingZipGenerator() yield from self._GenerateHuntTimelines(args, zipgen) yield zipgen.Close()
def _Generate( self, hunt_id: Text, format: ApiGetCollectedTimelineArgs.Format, ) -> Iterator[bytes]: zipgen = utils.StreamingZipGenerator() yield from self._GenerateTimelines(hunt_id, format, zipgen) yield zipgen.Close()
def _Generate( self, hunt_id: Text, fmt: rdf_structs.EnumNamedValue, ) -> Iterator[bytes]: zipgen = utils.StreamingZipGenerator() yield from self._GenerateTimelines(hunt_id, fmt, zipgen) yield zipgen.Close()
def testHierarchy(self): archiver = utils.StreamingZipGenerator() output = io.BytesIO() output.write(archiver.WriteFileHeader("foo/bar/baz")) output.write(archiver.WriteFileChunk(b"quux")) output.write(archiver.WriteFileFooter()) output.write(archiver.Close()) with zipfile.ZipFile(output, mode="r") as zipdesc: self.assertEqual(zipdesc.read("foo/bar/baz"), b"quux")
def testWriteFromFD(self): filedesc = io.BytesIO(b"foobarbaz" * 1024 * 1024) archiver = utils.StreamingZipGenerator() output = io.BytesIO() for chunk in archiver.WriteFromFD(filedesc, "quux"): output.write(chunk) output.write(archiver.Close()) with zipfile.ZipFile(output, mode="r") as zipdesc: self.assertEqual(zipdesc.read("quux"), filedesc.getvalue())
def testCompression(self): archiver = utils.StreamingZipGenerator(zipfile.ZIP_DEFLATED) output = io.BytesIO() output.write(archiver.WriteFileHeader("foo")) output.write(archiver.WriteFileChunk(b"quux")) output.write(archiver.WriteFileChunk(b"norf")) output.write(archiver.WriteFileFooter()) output.write(archiver.Close()) with zipfile.ZipFile(output, mode="r") as zipdesc: self.assertEqual(zipdesc.read("foo"), b"quuxnorf")
def __init__(self, archive_format=ZIP, prefix=None, description=None, predicate=None, client_id=None): """CollectionArchiveGenerator constructor. Args: archive_format: May be ArchiveCollectionGenerator.ZIP or ArchiveCollectionGenerator.TAR_GZ. Defaults to ZIP. prefix: Name of the folder inside the archive that will contain all the generated data. description: String describing archive's contents. It will be included into the auto-generated MANIFEST file. Defaults to 'Files archive collection'. predicate: If not None, only the files matching the predicate will be archived, all others will be skipped. The predicate receives a db.ClientPath as input. client_id: The client_id to use when exporting a flow results collection. Raises: ValueError: if prefix is None. """ super(Aff4CollectionArchiveGenerator, self).__init__() if archive_format == self.ZIP: self.archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) elif archive_format == self.TAR_GZ: self.archive_generator = utils.StreamingTarGenerator() else: raise ValueError("Unknown archive format: %s" % archive_format) if not prefix: raise ValueError("Prefix can't be None.") self.prefix = prefix self.description = description or "Files archive collection" self.total_files = 0 self.archived_files = 0 self.ignored_files = [] self.failed_files = [] self.predicate = predicate or (lambda _: True) self.client_id = client_id
def __init__(self, flow: rdf_flow_objects.Flow, archive_format: ArchiveFormat): self.flow = flow self.archive_format = archive_format if archive_format == ArchiveFormat.ZIP: self.archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) extension = "zip" elif archive_format == ArchiveFormat.TAR_GZ: self.archive_generator = utils.StreamingTarGenerator() extension = "tar.gz" else: raise ValueError(f"Unknown archive format: {archive_format}") self.prefix = "{}_{}_{}".format( flow.client_id.replace(".", "_"), flow.flow_id, flow.flow_class_name) self.filename = f"{self.prefix}.{extension}" self.num_archived_files = 0
def _Generate(self, hunt_id: Text) -> Iterator[bytes]: zipgen = utils.StreamingZipGenerator() yield from self._GenerateTimelines(hunt_id, zipgen) yield zipgen.Close()
def Start(self): self.archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) self.export_counts = {} return []
def _Generate(self, hunt_id): zipgen = utils.StreamingZipGenerator() yield from self._GenerateTimelines(hunt_id, zipgen) yield zipgen.Close()