def testStreamsTwoFilesWithTwoChunksInEach(self): client_path_1 = db.ClientPath.OS(self.client_id, ("foo", "bar")) self._WriteFile(client_path_1, (0, 2)) client_path_2 = db.ClientPath.OS(self.client_id_other, ("foo", "bar")) self._WriteFile(client_path_2, (2, 4)) chunks = list( file_store.StreamFilesChunks([client_path_1, client_path_2])) self.assertLen(chunks, 4) self.assertEqual(chunks[0].client_path, client_path_1) self.assertEqual(chunks[0].data, self.blob_data[0]) self.assertEqual(chunks[0].chunk_index, 0) self.assertEqual(chunks[0].total_chunks, 2) self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[0].total_size, self.blob_size * 2) self.assertEqual(chunks[1].client_path, client_path_1) self.assertEqual(chunks[1].data, self.blob_data[1]) self.assertEqual(chunks[1].chunk_index, 1) self.assertEqual(chunks[1].total_chunks, 2) self.assertEqual(chunks[1].offset, self.blob_size) self.assertEqual(chunks[1].total_size, self.blob_size * 2) self.assertEqual(chunks[2].client_path, client_path_2) self.assertEqual(chunks[2].data, self.blob_data[2]) self.assertEqual(chunks[2].chunk_index, 0) self.assertEqual(chunks[2].total_chunks, 2) self.assertEqual(chunks[2].offset, 0) self.assertEqual(chunks[2].total_size, self.blob_size * 2) self.assertEqual(chunks[3].client_path, client_path_2) self.assertEqual(chunks[3].data, self.blob_data[3]) self.assertEqual(chunks[3].chunk_index, 1) self.assertEqual(chunks[3].total_chunks, 2) self.assertEqual(chunks[3].offset, self.blob_size) self.assertEqual(chunks[3].total_size, self.blob_size * 2)
def testIgnoresFileWithoutChunks(self): client_path_1 = db.ClientPath.OS(self.client_id, ("foo", "bar")) self._WriteFile(client_path_1, None) client_path_2 = db.ClientPath.OS(self.client_id_other, ("foo", "bar")) self._WriteFile(client_path_2, (2, 4)) chunks = list(file_store.StreamFilesChunks([client_path_1, client_path_2])) self.assertLen(chunks, 2) self.assertEqual(chunks[0].client_path, client_path_2) self.assertEqual(chunks[0].data, self.blob_data[2]) self.assertEqual(chunks[0].chunk_index, 0) self.assertEqual(chunks[0].total_chunks, 2) self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[0].total_size, self.blob_size * 2) self.assertEqual(chunks[1].client_path, client_path_2) self.assertEqual(chunks[1].data, self.blob_data[3]) self.assertEqual(chunks[1].chunk_index, 1) self.assertEqual(chunks[1].total_chunks, 2) self.assertEqual(chunks[1].offset, self.blob_size) self.assertEqual(chunks[1].total_size, self.blob_size * 2)
def _BatchConvert(self, metadata_value_pairs): """Convert given batch of metadata value pairs.""" filtered_pairs = self._RemoveRegistryKeys(metadata_value_pairs) for fp_batch in collection.Batch(filtered_pairs, self._BATCH_SIZE): if self.options.export_files_contents: client_paths = set() for metadata, stat_entry in fp_batch: # TODO(user): Deprecate client_urn in ExportedMetadata in favor of # client_id (to be added). client_paths.add( db.ClientPath.FromPathSpec( metadata.client_urn.Basename(), stat_entry.pathspec)) data_by_path = {} for chunk in file_store.StreamFilesChunks( client_paths, max_size=self.MAX_CONTENT_SIZE): data_by_path.setdefault(chunk.client_path, []).append(chunk.data) for metadata, stat_entry in fp_batch: result = self._CreateExportedFile(metadata, stat_entry) clientpath = db.ClientPath.FromPathSpec( metadata.client_urn.Basename(), stat_entry.pathspec) if self.options.export_files_contents: try: data = data_by_path[clientpath] result.content = b"".join(data)[:self.MAX_CONTENT_SIZE] result.content_sha256 = hashlib.sha256( result.content).hexdigest() except KeyError: pass yield result
def Generate(self, items, token=None): """Generates archive from a given collection. Iterates the collection and generates an archive by yielding contents of every referenced AFF4Stream. Args: items: Iterable of rdf_client_fs.StatEntry objects token: User's ACLToken. Yields: Binary chunks comprising the generated archive. """ del token # unused, to be removed with AFF4 code client_ids = set() for item_batch in collection.Batch(items, self.BATCH_SIZE): fds_to_write = {} for item in item_batch: try: urn = flow_export.CollectionItemToAff4Path( item, self.client_id) client_path = flow_export.CollectionItemToClientPath( item, self.client_id) except flow_export.ItemNotExportableError: continue fd = file_store.OpenFile(client_path) self.total_files += 1 if not self.predicate(client_path): self.ignored_files.append(utils.SmartUnicode(urn)) continue client_ids.add(client_path.client_id) # content_path = os.path.join(self.prefix, *urn_components) self.archived_files += 1 # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result((0o644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) fds_to_write[fd] = (client_path, urn, st) if fds_to_write: for fd, (client_path, urn, st) in iteritems(fds_to_write): try: for i, chunk in enumerate( file_store.StreamFilesChunks([client_path])): if i == 0: target_path = os.path.join( self.prefix, urn.Path()[1:]) yield self.archive_generator.WriteFileHeader( target_path, st=st) yield self.archive_generator.WriteFileChunk( chunk.data) yield self.archive_generator.WriteFileFooter() except Exception as exception: # pylint: disable=broad-except logging.exception(exception) self.archived_files -= 1 self.failed_files.append(unicode(urn)) if self.archive_generator.is_file_write_in_progress: yield self.archive_generator.WriteFileFooter() if client_ids: for client_id, client_info in iteritems( data_store.REL_DB.MultiReadClientFullInfo(client_ids)): client = api_client.ApiClient().InitFromClientInfo(client_info) for chunk in self._GenerateClientInfo(client_id, client): yield chunk for chunk in self._GenerateDescription(): yield chunk yield self.archive_generator.Close()