def _GetHWInfos(client_list, batch_size=10000, token=None): """Opens the given clients in batches and returns hardware information.""" # This function returns a dict mapping each client_id to a set of reported # hardware serial numbers reported by this client. hw_infos = {} logging.info("%d clients to process.", len(client_list)) c = 0 for batch in utils.Grouper(client_list, batch_size): logging.info("Processing batch: %d-%d", c, c + batch_size) c += len(batch) client_objs = aff4.FACTORY.MultiOpen(batch, age=aff4.ALL_TIMES, token=token) for client in client_objs: hwi = client.GetValuesForAttribute(client.Schema.HARDWARE_INFO) hw_infos[client.urn] = set(["%s" % x.serial_number for x in hwi]) return hw_infos
def _GenerateConvertedValues(self, converter, grr_messages): """Generates converted values using given converter from given messages. Groups values in batches of BATCH_SIZE size and applies the converter to each batch. Args: converter: ExportConverter instance. grr_messages: An iterable (a generator is assumed) with GRRMessage values. Yields: Values generated by the converter. Raises: ValueError: if any of the GrrMessage objects doesn't have "source" set. """ for batch in utils.Grouper(grr_messages, self.BATCH_SIZE): metadata_items = self._GetMetadataForClients( [gm.source for gm in batch]) batch_with_metadata = zip(metadata_items, [gm.payload for gm in batch]) for result in converter.BatchConvert(batch_with_metadata, token=self.token): yield result
def Start(self): """Retrieve all the clients for the AbstractClientStatsCollectors.""" self.stats = aff4.FACTORY.Create(self.FILESTORE_STATS_URN, aff4_stats.FilestoreStats, mode="w", token=self.token) self._CreateConsumers() hashes = aff4.FACTORY.Open(self.HASH_PATH, token=self.token).ListChildren(limit=10**8) try: for urns in utils.Grouper(hashes, self.OPEN_FILES_LIMIT): for fd in aff4.FACTORY.MultiOpen(urns, mode="r", token=self.token, age=aff4.NEWEST_TIME): for consumer in self.consumers: consumer.ProcessFile(fd) self.HeartBeat() finally: for consumer in self.consumers: consumer.Save(self.stats) self.stats.Close()
def Execute(self, thread_count): """Runs the migration with a given thread count.""" blob_urns = list(aff4.FACTORY.ListChildren("aff4:/blobs")) sys.stdout.write("Blobs to migrate: {}\n".format(len(blob_urns))) sys.stdout.write("Threads to use: {}\n".format(thread_count)) self._total_count = len(blob_urns) self._migrated_count = 0 self._start_time = rdfvalue.RDFDatetime.Now() batches = utils.Grouper(blob_urns, _BLOB_BATCH_SIZE) self._Progress() tp = pool.ThreadPool(processes=thread_count) tp.map(self._MigrateBatch, list(batches)) self._Progress() if self._migrated_count == self._total_count: message = "\nMigration has been finished (migrated {} blobs).\n".format( self._migrated_count) sys.stdout.write(message) else: message = "Not all blobs have been migrated ({}/{})".format( self._migrated_count, self._total_count) raise AssertionError(message)
def AddFileWithUnknownHash(blob_ids): """Add a new file consisting of given blob IDs.""" blob_refs = [] offset = 0 sha256 = hashlib.sha256() for blob_ids_batch in utils.Grouper(blob_ids, _BLOBS_READ_BATCH_SIZE): unique_ids = set(blob_ids_batch) data = data_store.REL_DB.ReadBlobs(unique_ids) for k, v in iteritems(data): if v is None: raise BlobNotFound( "Couldn't find one of referenced blobs: %s" % k) for blob_id in blob_ids_batch: blob_data = data[blob_id] blob_refs.append( rdf_objects.BlobReference( offset=offset, size=len(blob_data), blob_id=blob_id, )) offset += len(blob_data) sha256.update(blob_data) hash_id = rdf_objects.SHA256HashID.FromBytes(sha256.digest()) data_store.REL_DB.WriteHashBlobReferences({hash_id: blob_refs}) return hash_id
def MigrateClients(self, client_urns): """Migrates entire VFS of given client list to the relational data store.""" self._start_time = rdfvalue.RDFDatetime.Now() self._client_urns_to_migrate = client_urns self._client_urns_migrated = [] self._client_urns_failed = [] to_migrate_count = len(self._client_urns_to_migrate) sys.stdout.write("Clients to migrate: {}\n".format(to_migrate_count)) batches = utils.Grouper(client_urns, self.client_batch_size) tp = pool.ThreadPool(processes=self.thread_count) tp.map(self.MigrateClientBatch, list(batches)) migrated_count = len(self._client_urns_migrated) sys.stdout.write("Migrated clients: {}\n".format(migrated_count)) if to_migrate_count == migrated_count: sys.stdout.write("All clients migrated successfully!\n") else: message = "Not all clients have been migrated ({}/{})".format( migrated_count, to_migrate_count) raise RuntimeError(message)
def Stop(self, reason=None): super(GenericHunt, self).Stop(reason=reason) started_flows = grr_collections.RDFUrnCollection( self.started_flows_collection_urn) num_terminated_flows = 0 self.Log("Hunt stop. Terminating all the started flows.") # Delete hunt flows states. for flows_batch in utils.Grouper(started_flows, self.__class__.STOP_BATCH_SIZE): with queue_manager.QueueManager(token=self.token) as manager: manager.MultiDestroyFlowStates(flows_batch) with data_store.DB.GetMutationPool() as mutation_pool: for f in flows_batch: flow.GRRFlow.MarkForTermination( f, reason="Parent hunt stopped.", mutation_pool=mutation_pool) num_terminated_flows += len(flows_batch) # Delete hunt's requests and responses to ensure no more # processing is going to occur. with queue_manager.QueueManager(token=self.token) as manager: manager.DestroyFlowStates(self.session_id) self.Log("%d flows terminated.", num_terminated_flows)
def ProcessSingleTypeExportedValues(self, original_value_type, exported_values): first_value = next(exported_values, None) if not first_value: return yield self.archive_generator.WriteFileHeader( "%s/%s/from_%s.yaml" % (self.path_prefix, first_value.__class__.__name__, original_value_type.__name__)) yield self.archive_generator.WriteFileChunk( _SerializeToYaml(first_value)) counter = 1 for batch in utils.Grouper(exported_values, self.ROW_BATCH): counter += len(batch) # TODO(hanuszczak): YAML is supposed to be a unicode file format so we # should use `StringIO` here instead. However, because PyYAML dumps to # `bytes` instead of `unicode` we have to use `BytesIO`. It should be # investigated whether there is a way to adjust behaviour of PyYAML. buf = io.BytesIO() for value in batch: buf.write(b"\n") buf.write(_SerializeToYaml(value)) yield self.archive_generator.WriteFileChunk(buf.getvalue()) yield self.archive_generator.WriteFileFooter() counts_for_original_type = self.export_counts.setdefault( original_value_type.__name__, dict()) counts_for_original_type[first_value.__class__.__name__] = counter
def ProcessOneHunt(self, exceptions_by_hunt): """Reads results for one hunt and process them.""" hunt_results_urn, results = ( hunts_results.HuntResultQueue.ClaimNotificationsForCollection( token=self.token, lease_time=self.lifetime)) logging.debug("Found %d results for hunt %s", len(results), hunt_results_urn) if not results: return 0 hunt_urn = rdfvalue.RDFURN(hunt_results_urn.Dirname()) batch_size = self.BATCH_SIZE metadata_urn = hunt_urn.Add("ResultsMetadata") exceptions_by_plugin = {} num_processed_for_hunt = 0 collection_obj = implementation.GRRHunt.ResultCollectionForHID(hunt_urn) try: with aff4.FACTORY.OpenWithLock( metadata_urn, lease_time=600, token=self.token) as metadata_obj: all_plugins, used_plugins = self.LoadPlugins(metadata_obj) num_processed = int( metadata_obj.Get(metadata_obj.Schema.NUM_PROCESSED_RESULTS)) for batch in utils.Grouper(results, batch_size): results = list( collection_obj.MultiResolve( [r.value.ResultRecord() for r in batch])) self.RunPlugins(hunt_urn, used_plugins, results, exceptions_by_plugin) hunts_results.HuntResultQueue.DeleteNotifications( batch, token=self.token) num_processed += len(batch) num_processed_for_hunt += len(batch) self.HeartBeat() metadata_obj.Set( metadata_obj.Schema.NUM_PROCESSED_RESULTS(num_processed)) metadata_obj.UpdateLease(600) if self.CheckIfRunningTooLong(): logging.warning("Run too long, stopping.") break metadata_obj.Set(metadata_obj.Schema.OUTPUT_PLUGINS(all_plugins)) metadata_obj.Set( metadata_obj.Schema.NUM_PROCESSED_RESULTS(num_processed)) except aff4.LockError: logging.warn( "ProcessHuntResultCollectionsCronFlow: " "Could not get lock on hunt metadata %s.", metadata_urn) return 0 if exceptions_by_plugin: for plugin, exceptions in iteritems(exceptions_by_plugin): exceptions_by_hunt.setdefault(hunt_urn, {}).setdefault( plugin, []).extend(exceptions) logging.debug("Processed %d results.", num_processed_for_hunt) return len(results)
def ProcessSingleTypeExportedValues(self, original_value_type, exported_values): first_value = next(exported_values, None) if not first_value: return if not isinstance(first_value, rdf_structs.RDFProtoStruct): raise ValueError("The SQLite plugin only supports export-protos") yield self.archive_generator.WriteFileHeader( "%s/%s_from_%s.sql" % (self.path_prefix, first_value.__class__.__name__, original_value_type.__name__)) table_name = "%s.from_%s" % (first_value.__class__.__name__, original_value_type.__name__) schema = self._GetSqliteSchema(first_value.__class__) # We will buffer the sql statements into an in-memory sql database before # dumping them to the zip archive. We rely on the PySQLite library for # string escaping. db_connection = sqlite3.connect(":memory:") db_cursor = db_connection.cursor() yield self.archive_generator.WriteFileChunk("BEGIN TRANSACTION;\n") with db_connection: buf = io.StringIO() buf.write(u"CREATE TABLE \"%s\" (\n " % table_name) column_types = [(k, v.sqlite_type) for k, v in iteritems(schema)] buf.write(u",\n ".join( [u"\"%s\" %s" % (k, v) for k, v in column_types])) buf.write(u"\n);") db_cursor.execute(buf.getvalue()) yield self.archive_generator.WriteFileChunk(buf.getvalue() + u"\n") self._InsertValueIntoDb(table_name, schema, first_value, db_cursor) for sql in self._FlushAllRows(db_connection, table_name): yield sql counter = 1 for batch in utils.Grouper(exported_values, self.ROW_BATCH): counter += len(batch) with db_connection: for value in batch: self._InsertValueIntoDb(table_name, schema, value, db_cursor) for sql in self._FlushAllRows(db_connection, table_name): yield sql db_connection.close() yield self.archive_generator.WriteFileChunk("COMMIT;\n") yield self.archive_generator.WriteFileFooter() counts_for_original_type = self.export_counts.setdefault( original_value_type.__name__, dict()) counts_for_original_type[first_value.__class__.__name__] = counter
def CleanVacuousVersions(clients=None, dry_run=True): """A script to remove no-op client versions. This script removes versions of a client when it is identical to the previous, in the sense that no versioned attributes were changed since the previous client version. Args: clients: A list of ClientURN, if empty cleans all clients. dry_run: whether this is a dry run """ if not clients: index = client_index.CreateClientIndex() clients = index.LookupClients(["."]) clients.sort() with data_store.DB.GetMutationPool() as pool: logging.info("checking %d clients", len(clients)) for batch in utils.Grouper(clients, 10000): # TODO(amoser): This only works on datastores that use the Bigtable # scheme. client_infos = data_store.DB.MultiResolvePrefix( batch, ["aff4:", "aff4:"], data_store.DB.ALL_TIMESTAMPS) for client, type_list in client_infos: cleared = 0 kept = 0 updates = [] for a, _, ts in type_list: if ts != 0: updates.append((ts, a)) updates = sorted(updates) dirty = True for ts, a in updates: if a == "aff4:type": if dirty: kept += 1 dirty = False else: cleared += 1 if not dry_run: pool.DeleteAttributes(client, ["aff4:type"], start=ts, end=ts) if pool.Size() > 1000: pool.Flush() else: dirty = True logging.info("%s: kept %d and cleared %d", client, kept, cleared)
def GetInput(self): """Yield client urns.""" client_list = GetAllClients(token=self.token) logging.debug("Got %d clients", len(client_list)) for client_group in utils.Grouper(client_list, self.client_chunksize): for fd in aff4.FACTORY.MultiOpen(client_group, mode="r", aff4_type=aff4_grr.VFSGRRClient, token=self.token): if isinstance(fd, aff4_grr.VFSGRRClient): # Skip if older than max_age oldest_time = (time.time() - self.max_age) * 1e6 if fd.Get(aff4_grr.VFSGRRClient.SchemaCls.PING) >= oldest_time: yield fd
def Run(self): self.start = 0 self.end = int(1e6 * (time.time() - self.MAX_AGE)) client_urns = export_utils.GetAllClients(token=self.token) for batch in utils.Grouper(client_urns, 10000): with data_store.DB.GetMutationPool() as mutation_pool: for client_urn in batch: mutation_pool.DeleteAttributes(client_urn.Add("stats"), [u"aff4:stats"], start=self.start, end=self.end) self.HeartBeat()
def _IterateAllClients(): """Fetches client data from the relational db.""" all_client_ids = data_store.REL_DB.ReadAllClientIDs() for batch in utils.Grouper(all_client_ids, CLIENT_READ_BATCH_SIZE): client_map = data_store.REL_DB.MultiReadClientFullInfo(batch) fs_client_ids = [ cid for (cid, client) in iteritems(client_map) if client.metadata.fleetspeak_enabled ] last_contact_times = _GetLastContactFromFleetspeak(fs_client_ids) for cid, last_contact in iteritems(last_contact_times): client_map[cid].metadata.ping = last_contact for client in itervalues(client_map): yield client
def _MultiStream(cls, fds): """Effectively streams data from multiple opened BlobImage objects. Args: fds: A list of opened AFF4Stream (or AFF4Stream descendants) objects. Yields: Tuples (chunk, fd, exception) where chunk is a binary blob of data and fd is an object from the fds argument. If one or more chunks are missing, exception is a MissingBlobsError object and chunk is None. _MultiStream does its best to skip the file entirely if one of its chunks is missing, but in case of very large files it's still possible to yield a truncated file. """ broken_fds = set() missing_blobs_fd_pairs = [] for chunk_fd_pairs in utils.Grouper( cls._GenerateChunkIds(fds), cls.MULTI_STREAM_CHUNKS_READ_AHEAD): chunk_fds = list(map(operator.itemgetter(0), chunk_fd_pairs)) results_map = data_store.DB.ReadBlobs(chunk_fds, token=fds[0].token) for chunk_id, fd in chunk_fd_pairs: if chunk_id not in results_map or results_map[chunk_id] is None: missing_blobs_fd_pairs.append((chunk_id, fd)) broken_fds.add(fd) for chunk, fd in chunk_fd_pairs: if fd in broken_fds: continue yield fd, results_map[chunk], None if missing_blobs_fd_pairs: missing_blobs_by_fd = {} for chunk_id, fd in missing_blobs_fd_pairs: missing_blobs_by_fd.setdefault(fd, []).append(chunk_id) for fd, missing_blobs in iteritems(missing_blobs_by_fd): e = MissingBlobsError("%d missing blobs (multi-stream)" % len(missing_blobs), missing_chunks=missing_blobs) yield fd, None, e
def _IterateAllLegacyClients(token): """Fetches client data from the legacy db.""" root_children = aff4.FACTORY.Open( aff4.ROOT_URN, token=token).OpenChildren(mode="r") for batch in utils.Grouper(root_children, CLIENT_READ_BATCH_SIZE): fs_client_map = {} non_fs_clients = [] for child in batch: if not isinstance(child, aff4_grr.VFSGRRClient): continue if child.Get(child.Schema.FLEETSPEAK_ENABLED): fs_client_map[child.urn.Basename()] = child else: non_fs_clients.append(child) last_contact_times = _GetLastContactFromFleetspeak(viewkeys(fs_client_map)) for client in non_fs_clients: yield client.Get(client.Schema.PING), client for cid, client in iteritems(fs_client_map): last_contact = last_contact_times.get(cid, client.Get(client.Schema.PING)) yield last_contact, client
def Convert(self, values, start_index=0, end_index=None): """Converts given collection to exported values. This method uses a threadpool to do the conversion in parallel. It blocks until everything is converted. Args: values: Iterable object with values to convert. start_index: Start from this index in the collection. end_index: Finish processing on the (index - 1) element of the collection. If None, work till the end of the collection. Returns: Nothing. ConvertedBatch() should handle the results. """ if not values: return try: total_batch_count = len(values) // self.batch_size except TypeError: total_batch_count = -1 pool = ThreadPool.Factory(self.threadpool_prefix, self.threadpool_size) val_iterator = itertools.islice(values, start_index, end_index) pool.Start() try: for batch_index, batch in enumerate( utils.Grouper(val_iterator, self.batch_size)): logging.debug("Processing batch %d out of %d", batch_index, total_batch_count) pool.AddTask( target=self.ConvertBatch, args=(batch,), name="batch_%d" % batch_index, inline=False) finally: pool.Stop()
def MigrateClient(self, client_urn): """Migrates entire VFS of given client to the relational data store.""" vfs = ListVfs(client_urn) path_infos = [] for vfs_urn in vfs: _, vfs_path = vfs_urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) path_infos.append(path_info) data_store.REL_DB.InitPathInfos(client_urn.Basename(), path_infos) for vfs_group in utils.Grouper(vfs, self.vfs_group_size): stat_entries = dict() hash_entries = dict() for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES): _, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT): stat_path_info = path_info.Copy() stat_path_info.timestamp = stat_entry.age stat_entries[stat_path_info] = stat_entry for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH): hash_path_info = path_info.Copy() hash_path_info.timestamp = hash_entry.age hash_entries[hash_path_info] = hash_entry data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(), stat_entries, hash_entries)
def Execute(self, thread_count): """Runs the migration procedure. Args: thread_count: A number of threads to execute the migration with. Raises: AssertionError: If not all clients have been migrated. ValueError: If the relational database backend is not available. """ if not data_store.RelationalDBWriteEnabled(): raise ValueError("No relational database available.") sys.stdout.write("Collecting clients...\n") client_urns = _GetClientUrns() sys.stdout.write("Clients to migrate: {}\n".format(len(client_urns))) sys.stdout.write("Threads to use: {}\n".format(thread_count)) self._total_count = len(client_urns) self._migrated_count = 0 self._start_time = rdfvalue.RDFDatetime.Now() batches = utils.Grouper(client_urns, _CLIENT_BATCH_SIZE) self._Progress() tp = pool.ThreadPool(processes=thread_count) tp.map(self._MigrateBatch, list(batches)) self._Progress() if self._migrated_count == self._total_count: message = "\nMigration has been finished (migrated {} clients).\n".format( self._migrated_count) sys.stdout.write(message) else: message = "Not all clients have been migrated ({}/{})".format( self._migrated_count, self._total_count) raise AssertionError(message)
def ProcessSingleTypeExportedValues(self, original_value_type, exported_values): first_value = next(exported_values, None) if not first_value: return yield self.archive_generator.WriteFileHeader( "%s/%s/from_%s.csv" % (self.path_prefix, first_value.__class__.__name__, original_value_type.__name__)) writer = utils.CsvWriter() # Write the CSV header based on first value class and write # the first value itself. All other values are guaranteed # to have the same class (see ProcessSingleTypeExportedValues definition). writer.WriteRow(self._GetCSVHeader(first_value.__class__)) writer.WriteRow(self._GetCSVRow(first_value)) chunk = writer.Content().encode("utf-8") yield self.archive_generator.WriteFileChunk(chunk) # Counter starts from 1, as 1 value has already been written. counter = 1 for batch in utils.Grouper(exported_values, self.ROW_BATCH): counter += len(batch) writer = utils.CsvWriter() for value in batch: writer.WriteRow(self._GetCSVRow(value)) chunk = writer.Content().encode("utf-8") yield self.archive_generator.WriteFileChunk(chunk) yield self.archive_generator.WriteFileFooter() self.export_counts.setdefault( original_value_type.__name__, dict())[first_value.__class__.__name__] = counter
def CleanAff4Clients(self): """Cleans up old client data from aff4.""" inactive_client_ttl = config.CONFIG[ "DataRetention.inactive_client_ttl"] if not inactive_client_ttl: self.Log("TTL not set - nothing to do...") return exception_label = config.CONFIG[ "DataRetention.inactive_client_ttl_exception_label"] index = client_index.CreateClientIndex(token=self.token) client_urns = index.LookupClients(["."]) deadline = rdfvalue.RDFDatetime.Now() - inactive_client_ttl deletion_count = 0 for client_group in utils.Grouper(client_urns, 1000): inactive_client_urns = [] for client in aff4.FACTORY.MultiOpen( client_group, mode="r", aff4_type=aff4_grr.VFSGRRClient, token=self.token): if exception_label in client.GetLabelsNames(): continue if client.Get(client.Schema.LAST) < deadline: inactive_client_urns.append(client.urn) aff4.FACTORY.MultiDelete(inactive_client_urns, token=self.token) deletion_count += len(inactive_client_urns) self.HeartBeat() self.Log("Deleted %d inactive clients." % deletion_count)
def Handle(self, args, token=None): if args.count: end = args.offset + args.count # Read <count> clients ahead in case some of them fail to open / verify. batch_size = end + args.count else: end = sys.maxsize batch_size = end keywords = shlex.split(args.query) api_clients = [] if data_store.RelationalDBReadEnabled(): index = client_index.ClientIndex() # TODO(amoser): We could move the label verification into the # database making this method more efficient. Label restrictions # should be on small subsets though so this might not be worth # it. all_client_ids = set() for label in self.labels_whitelist: label_filter = ["label:" + label] + keywords all_client_ids.update(index.LookupClients(label_filter)) index = 0 for cid_batch in utils.Grouper(sorted(all_client_ids), batch_size): client_infos = data_store.REL_DB.MultiReadClientFullInfo( cid_batch) for _, client_info in sorted(iteritems(client_infos)): if not self._VerifyLabels(client_info.labels): continue if index >= args.offset and index < end: api_clients.append( ApiClient().InitFromClientInfo(client_info)) index += 1 if index >= end: UpdateClientsFromFleetspeak(api_clients) return ApiSearchClientsResult(items=api_clients) else: index = client_index.CreateClientIndex(token=token) all_urns = set() for label in self.labels_whitelist: label_filter = ["label:" + label] + keywords all_urns.update(index.LookupClients(label_filter)) all_objs = aff4.FACTORY.MultiOpen(all_urns, aff4_type=aff4_grr.VFSGRRClient, token=token) index = 0 for client_obj in sorted(all_objs): if not self._CheckClientLabels(client_obj): continue if index >= args.offset and index < end: api_clients.append( ApiClient().InitFromAff4Object(client_obj)) index += 1 if index >= end: break UpdateClientsFromFleetspeak(api_clients) return ApiSearchClientsResult(items=api_clients)
def Generate(self, collection, token=None): """Generates archive from a given collection. Iterates the collection and generates an archive by yielding contents of every referenced AFF4Stream. Args: collection: Iterable with items that point to aff4 paths. token: User's ACLToken. Yields: Binary chunks comprising the generated archive. """ clients = set() for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection), self.BATCH_SIZE): fds_to_write = {} for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token): self.total_files += 1 if not self.predicate(fd): self.ignored_files.append(utils.SmartUnicode(fd.urn)) continue # Any file-like object with data in AFF4 should inherit AFF4Stream. if isinstance(fd, aff4.AFF4Stream): urn_components = fd.urn.Split() clients.add(rdf_client.ClientURN(urn_components[0])) content_path = os.path.join(self.prefix, *urn_components) self.archived_files += 1 # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result( (0o644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) fds_to_write[fd] = (content_path, st) if fds_to_write: prev_fd = None for fd, chunk, exception in aff4.AFF4Stream.MultiStream( fds_to_write): if exception: logging.exception(exception) self.archived_files -= 1 self.failed_files.append(utils.SmartUnicode(fd.urn)) continue if prev_fd != fd: if prev_fd: yield self.archive_generator.WriteFileFooter() prev_fd = fd content_path, st = fds_to_write[fd] yield self.archive_generator.WriteFileHeader( content_path, st=st) yield self.archive_generator.WriteFileChunk(chunk) if self.archive_generator.is_file_write_in_progress: yield self.archive_generator.WriteFileFooter() if clients: for client_urn_batch in utils.Grouper(clients, self.BATCH_SIZE): for fd in aff4.FACTORY.MultiOpen( client_urn_batch, aff4_type=aff4_grr.VFSGRRClient, token=token): for chunk in self._GenerateClientInfo(fd): yield chunk for chunk in self._GenerateDescription(): yield chunk yield self.archive_generator.Close()
def _MigrateVfsUrns(self, vfs_urns): """Migrates history of given list of VFS URNs.""" for group in utils.Grouper(vfs_urns, self.history_vfs_group_size): self._MigrateVfsUrnGroup(group)
def MigrateClients(self, client_urns): """Migrates entire VFS of given client list to the relational data store.""" batches = utils.Grouper(client_urns, self.client_batch_size) tp = pool.ThreadPool(processes=self.thread_count) tp.map(self.MigrateClientBatch, list(batches))