示例#1
0
    def testVariableHuntSchedulesAllFlowsOnStart(self):
        client_ids = self.SetupClients(10)

        hunt_obj = rdf_hunt_objects.Hunt(client_rate=0)
        hunt_obj.args.hunt_type = hunt_obj.args.HuntType.VARIABLE

        for index, pair in enumerate(collection.Batch(client_ids, 2)):
            hunt_obj.args.variable.flow_groups.append(
                rdf_hunt_objects.VariableHuntFlowGroup(
                    client_ids=pair,
                    flow_name=compatibility.GetName(transfer.GetFile),
                    flow_args=transfer.GetFileArgs(pathspec=rdf_paths.PathSpec(
                        path="/tmp/evil_%d.txt" % index,
                        pathtype=rdf_paths.PathSpec.PathType.OS,
                    ))))

        data_store.REL_DB.WriteHuntObject(hunt_obj)
        hunt.StartHunt(hunt_obj.hunt_id)

        hunt_counters = data_store.REL_DB.ReadHuntCounters(hunt_obj.hunt_id)
        self.assertEqual(hunt_counters.num_clients, 10)

        all_flows = data_store.REL_DB.ReadHuntFlows(hunt_obj.hunt_id, 0,
                                                    sys.maxsize)
        self.assertCountEqual(client_ids, [f.client_id for f in all_flows])

        for index, pair in enumerate(collection.Batch(client_ids, 2)):
            for client_id in pair:
                all_flows = data_store.REL_DB.ReadAllFlowObjects(client_id)
                self.assertLen(all_flows, 1)

                self.assertEqual(all_flows[0].flow_class_name,
                                 compatibility.GetName(transfer.GetFile))
                self.assertEqual(all_flows[0].args.pathspec.path,
                                 "/tmp/evil_%d.txt" % index)
示例#2
0
    def Start(self):
        """Retrieve all the clients for the AbstractClientStatsCollectors."""
        self.stats = aff4.FACTORY.Create(self.FILESTORE_STATS_URN,
                                         aff4_stats.FilestoreStats,
                                         mode="w",
                                         token=self.token)

        self._CreateConsumers()
        hashes = aff4.FACTORY.Open(self.HASH_PATH,
                                   token=self.token).ListChildren(limit=10**8)

        try:
            for urns in collection.Batch(hashes, self.OPEN_FILES_LIMIT):
                for fd in aff4.FACTORY.MultiOpen(urns,
                                                 mode="r",
                                                 token=self.token,
                                                 age=aff4.NEWEST_TIME):

                    for consumer in self.consumers:
                        consumer.ProcessFile(fd)
                self.HeartBeat()

        finally:
            for consumer in self.consumers:
                consumer.Save(self.stats)
            self.stats.Close()
示例#3
0
  def Parse(self, cmd, args, stdout, stderr, return_val, knowledge_base):
    """Parse the yum output."""
    _ = stderr, args, knowledge_base  # Unused.
    self.CheckReturn(cmd, return_val)

    # `yum list installed` output is divided into lines. First line should be
    # always equal to "Installed Packages". The following lines are triplets,
    # but if one of the triplet columns does not fit, the rest of the row is
    # carried over to the next line. Thus, instead of processing the output line
    # by line, we split it into individual items (they cannot contain any space)
    # and chunk them to triplets.

    items = stdout.decode("utf-8").split()
    if not (items[0] == "Installed" and items[1] == "Packages"):
      message = ("`yum list installed` output does not start with \"Installed "
                 "Packages\"")
      raise AssertionError(message)
    items = items[2:]

    packages = []
    for name_arch, version, source in collection.Batch(items, 3):
      name, arch = name_arch.split(".")

      packages.append(
          rdf_client.SoftwarePackage.Installed(
              name=name, publisher=source, version=version, architecture=arch))

    if packages:
      yield rdf_client.SoftwarePackages(packages=packages)
示例#4
0
    def Execute(self, thread_count, urns=None):
        """Runs the migration with a given thread count."""

        if urns is None:
            blob_urns = list(aff4.FACTORY.ListChildren("aff4:/blobs"))
        else:
            blob_urns = [rdfvalue.RDFURN(urn) for urn in urns]

        sys.stdout.write("Blobs to migrate: {}\n".format(len(blob_urns)))
        sys.stdout.write("Threads to use: {}\n".format(thread_count))

        self._total_count = len(blob_urns)
        self._migrated_count = 0
        self._start_time = rdfvalue.RDFDatetime.Now()

        batches = collection.Batch(blob_urns, _BLOB_BATCH_SIZE)

        self._Progress()
        tp = pool.ThreadPool(processes=thread_count)
        tp.map(self._MigrateBatch, list(batches))
        self._Progress()

        if self._migrated_count == self._total_count:
            message = "\nMigration has been finished (migrated {} blobs).\n".format(
                self._migrated_count)
            sys.stdout.write(message)
        else:
            message = "Not all blobs have been migrated ({}/{})".format(
                self._migrated_count, self._total_count)
            raise AssertionError(message)
示例#5
0
文件: system.py 项目: costaafm/grr
    def ProcessClients(self, responses):
        """Does the work."""
        del responses

        end = rdfvalue.RDFDatetime.Now() - db.CLIENT_STATS_RETENTION
        client_urns = export_utils.GetAllClients(token=self.token)

        for batch in collection.Batch(client_urns, 10000):
            with data_store.DB.GetMutationPool() as mutation_pool:
                for client_urn in batch:
                    mutation_pool.DeleteAttributes(
                        client_urn.Add("stats"), [u"aff4:stats"],
                        start=0,
                        end=end.AsMicrosecondsSinceEpoch())
            self.HeartBeat()

        if data_store.RelationalDBEnabled():
            total_deleted_count = 0
            for deleted_count in data_store.REL_DB.DeleteOldClientStats(
                    yield_after_count=_STATS_DELETION_BATCH_SIZE,
                    retention_time=end):
                self.HeartBeat()
                total_deleted_count += deleted_count
            self.Log("Deleted %d ClientStats that expired before %s",
                     total_deleted_count, end)
示例#6
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        yield self.archive_generator.WriteFileHeader(
            "%s/%s/from_%s.yaml" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))

        serialized_value_bytes = _SerializeToYaml(first_value).encode("utf-8")
        yield self.archive_generator.WriteFileChunk(serialized_value_bytes)
        counter = 1
        for batch in collection.Batch(exported_values, self.ROW_BATCH):
            counter += len(batch)

            buf = io.StringIO()
            for value in batch:
                buf.write("\n")
                buf.write(_SerializeToYaml(value))

            contents = buf.getvalue()
            yield self.archive_generator.WriteFileChunk(
                contents.encode("utf-8"))
        yield self.archive_generator.WriteFileFooter()

        counts_for_original_type = self.export_counts.setdefault(
            original_value_type.__name__, dict())
        counts_for_original_type[first_value.__class__.__name__] = counter
示例#7
0
def AddFileWithUnknownHash(blob_ids):
  """Add a new file consisting of given blob IDs."""

  blob_refs = []
  offset = 0
  sha256 = hashlib.sha256()
  for blob_ids_batch in collection.Batch(blob_ids, _BLOBS_READ_BATCH_SIZE):
    unique_ids = set(blob_ids_batch)
    data = data_store.BLOBS.ReadBlobs(unique_ids)
    for k, v in iteritems(data):
      if v is None:
        raise BlobNotFound("Couldn't find one of referenced blobs: %s" % k)

    for blob_id in blob_ids_batch:
      blob_data = data[blob_id]
      blob_refs.append(
          rdf_objects.BlobReference(
              offset=offset,
              size=len(blob_data),
              blob_id=blob_id,
          ))
      offset += len(blob_data)

      sha256.update(blob_data)

  hash_id = rdf_objects.SHA256HashID.FromBytes(sha256.digest())
  data_store.REL_DB.WriteHashBlobReferences({hash_id: blob_refs})

  return hash_id
    def _GenerateConvertedValues(self, converter, grr_messages):
        """Generates converted values using given converter from given messages.

    Groups values in batches of BATCH_SIZE size and applies the converter
    to each batch.

    Args:
      converter: ExportConverter instance.
      grr_messages: An iterable (a generator is assumed) with GRRMessage values.

    Yields:
      Values generated by the converter.

    Raises:
      ValueError: if any of the GrrMessage objects doesn't have "source" set.
    """
        for batch in collection.Batch(grr_messages, self.BATCH_SIZE):
            metadata_items = self._GetMetadataForClients(
                [gm.source for gm in batch])
            batch_with_metadata = zip(metadata_items,
                                      [gm.payload for gm in batch])

            for result in converter.BatchConvert(batch_with_metadata,
                                                 token=self.token):
                yield result
示例#9
0
    def MigrateClients(self, client_urns):
        """Migrates entire VFS of given client list to the relational data store."""
        self._start_time = rdfvalue.RDFDatetime.Now()

        self._client_urns_to_migrate = client_urns
        self._client_urns_migrated = []
        self._client_urns_failed = []

        to_migrate_count = len(self._client_urns_to_migrate)
        sys.stdout.write("Clients to migrate: {}\n".format(to_migrate_count))

        batches = collection.Batch(client_urns, self.client_batch_size)

        tp = pool.ThreadPool(processes=self.thread_count)
        tp.map(self.MigrateClientBatch, list(batches))

        migrated_count = len(self._client_urns_migrated)
        sys.stdout.write("Migrated clients: {}\n".format(migrated_count))

        if to_migrate_count == migrated_count:
            sys.stdout.write("All clients migrated successfully!\n")
        else:
            message = "Not all clients have been migrated ({}/{})".format(
                migrated_count, to_migrate_count)
            raise RuntimeError(message)
示例#10
0
  def ProcessSingleTypeExportedValues(self, original_value_type,
                                      exported_values):
    first_value = next(exported_values, None)
    if not first_value:
      return

    yield self.archive_generator.WriteFileHeader(
        "%s/%s/from_%s.yaml" % (self.path_prefix,
                                first_value.__class__.__name__,
                                original_value_type.__name__))
    yield self.archive_generator.WriteFileChunk(_SerializeToYaml(first_value))
    counter = 1
    for batch in collection.Batch(exported_values, self.ROW_BATCH):
      counter += len(batch)
      # TODO(hanuszczak): YAML is supposed to be a unicode file format so we
      # should use `StringIO` here instead. However, because PyYAML dumps to
      # `bytes` instead of `unicode` we have to use `BytesIO`. It should be
      # investigated whether there is a way to adjust behaviour of PyYAML.
      buf = io.BytesIO()
      for value in batch:
        buf.write(b"\n")
        buf.write(_SerializeToYaml(value))

      yield self.archive_generator.WriteFileChunk(buf.getvalue())
    yield self.archive_generator.WriteFileFooter()

    counts_for_original_type = self.export_counts.setdefault(
        original_value_type.__name__, dict())
    counts_for_original_type[first_value.__class__.__name__] = counter
示例#11
0
  def _StopLegacy(self, reason=None):
    super(GenericHunt, self).Stop(reason=reason)

    started_flows = grr_collections.RDFUrnCollection(
        self.started_flows_collection_urn)

    num_terminated_flows = 0
    self.Log("Hunt stop. Terminating all the started flows.")

    # Delete hunt flows states.
    for flows_batch in collection.Batch(started_flows,
                                        self.__class__.STOP_BATCH_SIZE):
      with queue_manager.QueueManager(token=self.token) as manager:
        manager.MultiDestroyFlowStates(flows_batch)

      with data_store.DB.GetMutationPool() as mutation_pool:
        for f in flows_batch:
          flow.GRRFlow.MarkForTermination(
              f, reason="Parent hunt stopped.", mutation_pool=mutation_pool)

      num_terminated_flows += len(flows_batch)

    # Delete hunt's requests and responses to ensure no more
    # processing is going to occur.
    with queue_manager.QueueManager(token=self.token) as manager:
      manager.DestroyFlowStates(self.session_id)

    self.Log("%d flows terminated.", num_terminated_flows)
示例#12
0
def _GetHWInfos(client_list, batch_size=10000, token=None):
    """Opens the given clients in batches and returns hardware information."""

    # This function returns a dict mapping each client_id to a set of reported
    # hardware serial numbers reported by this client.
    hw_infos = {}

    logging.info("%d clients to process.", len(client_list))

    c = 0

    for batch in collection.Batch(client_list, batch_size):
        logging.info("Processing batch: %d-%d", c, c + batch_size)
        c += len(batch)

        client_objs = aff4.FACTORY.MultiOpen(batch,
                                             age=aff4.ALL_TIMES,
                                             token=token)

        for client in client_objs:
            hwi = client.GetValuesForAttribute(client.Schema.HARDWARE_INFO)

            hw_infos[client.urn] = set(["%s" % x.serial_number for x in hwi])

    return hw_infos
示例#13
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        if not isinstance(first_value, rdf_structs.RDFProtoStruct):
            raise ValueError("The SQLite plugin only supports export-protos")
        yield self.archive_generator.WriteFileHeader(
            "%s/%s_from_%s.sql" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))
        table_name = "%s.from_%s" % (first_value.__class__.__name__,
                                     original_value_type.__name__)
        schema = self._GetSqliteSchema(first_value.__class__)

        # We will buffer the sql statements into an in-memory sql database before
        # dumping them to the zip archive. We rely on the PySQLite library for
        # string escaping.
        db_connection = sqlite3.connect(":memory:")
        db_cursor = db_connection.cursor()

        yield self.archive_generator.WriteFileChunk(
            "BEGIN TRANSACTION;\n".encode("utf-8"))

        with db_connection:
            buf = io.StringIO()
            buf.write(u"CREATE TABLE \"%s\" (\n  " % table_name)
            column_types = [(k, v.sqlite_type) for k, v in iteritems(schema)]
            buf.write(u",\n  ".join(
                [u"\"%s\" %s" % (k, v) for k, v in column_types]))
            buf.write(u"\n);")
            db_cursor.execute(buf.getvalue())

            chunk = (buf.getvalue() + "\n").encode("utf-8")
            yield self.archive_generator.WriteFileChunk(chunk)

            self._InsertValueIntoDb(table_name, schema, first_value, db_cursor)

        for sql in self._FlushAllRows(db_connection, table_name):
            yield sql
        counter = 1
        for batch in collection.Batch(exported_values, self.ROW_BATCH):
            counter += len(batch)
            with db_connection:
                for value in batch:
                    self._InsertValueIntoDb(table_name, schema, value,
                                            db_cursor)
            for sql in self._FlushAllRows(db_connection, table_name):
                yield sql

        db_connection.close()
        yield self.archive_generator.WriteFileChunk(
            "COMMIT;\n".encode("utf-8"))
        yield self.archive_generator.WriteFileFooter()

        counts_for_original_type = self.export_counts.setdefault(
            original_value_type.__name__, dict())
        counts_for_original_type[first_value.__class__.__name__] = counter
示例#14
0
文件: file_store.py 项目: ehossam/grr
def StreamFilesChunks(client_paths, max_timestamp=None):
    """Streams contents of given files.

  Args:
    client_paths: db.ClientPath objects describing paths to files.
    max_timestamp: If specified, then for every requested file will open the
      last collected version of the file with a timestamp equal or lower than
      max_timestamp. If not specified, will simply open a latest version for
      each file.

  Yields:
    StreamedFileChunk objects for every file read. Chunks will be returned
    sequentially, their order will correspond to the client_paths order.
    Files having no content will simply be ignored.
  """

    path_infos_by_cp = (
        data_store.REL_DB.ReadLatestPathInfosWithHashBlobReferences(
            client_paths, max_timestamp=max_timestamp))

    hash_ids_by_cp = {
        cp: rdf_objects.SHA256HashID.FromBytes(pi.hash_entry.sha256.AsBytes())
        for cp, pi in iteritems(path_infos_by_cp) if pi
    }

    blob_refs_by_hash_id = data_store.REL_DB.ReadHashBlobReferences(
        hash_ids_by_cp.values())

    all_chunks = []
    for cp in client_paths:
        try:
            hash_id = hash_ids_by_cp[cp]
        except KeyError:
            continue

        try:
            blob_refs = blob_refs_by_hash_id[hash_id]
        except KeyError:
            continue

        num_blobs = len(blob_refs)
        total_size = 0
        for ref in blob_refs:
            total_size += ref.size

        for i, ref in enumerate(blob_refs):
            all_chunks.append(
                (cp, ref.blob_id, i, num_blobs, ref.offset, total_size))

    for batch in collection.Batch(all_chunks, STREAM_CHUNKS_READ_AHEAD):
        blobs = data_store.BLOBS.ReadBlobs([
            blob_id for cp, blob_id, i, num_blobs, offset, total_size in batch
        ])
        for cp, blob_id, i, num_blobs, offset, total_size in batch:
            yield StreamedFileChunk(cp, blobs[blob_id], i, num_blobs, offset,
                                    total_size)
示例#15
0
    def Generate(self, items, token=None):
        """Generates archive from a given collection.

    Iterates the collection and generates an archive by yielding contents
    of every referenced AFF4Stream.

    Args:
      items: Iterable of rdf_client_fs.StatEntry objects
      token: User's ACLToken.

    Yields:
      Binary chunks comprising the generated archive.
    """

        del token  # unused, to be removed with AFF4 code

        client_ids = set()
        for item_batch in collection.Batch(items, self.BATCH_SIZE):

            client_paths = set()
            for item in item_batch:
                try:
                    client_path = flow_export.CollectionItemToClientPath(
                        item, self.client_id)
                except flow_export.ItemNotExportableError:
                    continue

                if not self.predicate(client_path):
                    self.ignored_files.add(client_path)
                    self.processed_files.add(client_path)
                    continue

                client_ids.add(client_path.client_id)
                client_paths.add(client_path)

            for chunk in file_store.StreamFilesChunks(client_paths):
                self.processed_files.add(chunk.client_path)
                for output in self._WriteFileChunk(chunk=chunk):
                    yield output

            self.processed_files |= client_paths - (self.ignored_files
                                                    | self.archived_files)

        if client_ids:
            for client_id, client_info in iteritems(
                    data_store.REL_DB.MultiReadClientFullInfo(client_ids)):
                client = api_client.ApiClient().InitFromClientInfo(client_info)
                for chunk in self._GenerateClientInfo(client_id, client):
                    yield chunk

        for chunk in self._GenerateDescription():
            yield chunk

        yield self.archive_generator.Close()
示例#16
0
文件: file.py 项目: avmi/grr
    def _BatchConvert(self, metadata_value_pairs):
        registry_pairs, file_pairs, match_pairs = self._SeparateTypes(
            metadata_value_pairs)
        for fp_batch in collection.Batch(file_pairs, self._BATCH_SIZE):

            if self.options.export_files_contents:
                pathspec_by_client_path = {}
                for metadata, ff_result in fp_batch:
                    # TODO(user): Deprecate client_urn in ExportedMetadata in favor of
                    # client_id (to be added).
                    client_path = db.ClientPath.FromPathSpec(
                        metadata.client_urn.Basename(),
                        ff_result.stat_entry.pathspec)
                    pathspec_by_client_path[
                        client_path] = ff_result.stat_entry.pathspec

                data_by_pathspec = {}
                for chunk in file_store.StreamFilesChunks(
                        pathspec_by_client_path,
                        max_size=self.MAX_CONTENT_SIZE):
                    pathspec = pathspec_by_client_path[chunk.client_path]
                    data_by_pathspec.setdefault(pathspec.CollapsePath(),
                                                []).append(chunk.data)

            for metadata, ff_result in fp_batch:
                result = self._CreateExportedFile(metadata,
                                                  ff_result.stat_entry)

                # FileFinderResult has hashes in "hash_entry" attribute which is not
                # passed to ConvertValuesWithMetadata call. We have to process these
                # explicitly here.
                self.ParseFileHash(ff_result.hash_entry, result)

                if self.options.export_files_contents:
                    try:
                        data = data_by_pathspec[
                            ff_result.stat_entry.pathspec.CollapsePath()]
                        result.content = b"".join(data)[:self.MAX_CONTENT_SIZE]
                        result.content_sha256 = hashlib.sha256(
                            result.content).hexdigest()
                    except KeyError:
                        pass

                yield result

        # Now export the registry keys
        for result in export.ConvertValuesWithMetadata(registry_pairs,
                                                       options=self.options):
            yield result

        # Now export the grep matches.
        for result in export.ConvertValuesWithMetadata(match_pairs,
                                                       options=self.options):
            yield result
示例#17
0
def CleanVacuousVersions(clients=None, dry_run=True):
    """A script to remove no-op client versions.

  This script removes versions of a client when it is identical to the previous,
  in the sense that no versioned attributes were changed since the previous
  client version.

  Args:
    clients: A list of ClientURN, if empty cleans all clients.
    dry_run: whether this is a dry run
  """

    if not clients:
        index = client_index.CreateClientIndex()
        clients = index.LookupClients(["."])
    clients.sort()
    with data_store.DB.GetMutationPool() as pool:

        logging.info("checking %d clients", len(clients))
        for batch in collection.Batch(clients, 10000):
            # TODO(amoser): This only works on datastores that use the Bigtable
            # scheme.
            client_infos = data_store.DB.MultiResolvePrefix(
                batch, ["aff4:", "aff4:"], data_store.DB.ALL_TIMESTAMPS)

            for client, type_list in client_infos:
                cleared = 0
                kept = 0
                updates = []
                for a, _, ts in type_list:
                    if ts != 0:
                        updates.append((ts, a))
                updates = sorted(updates)
                dirty = True
                for ts, a in updates:
                    if a == "aff4:type":
                        if dirty:
                            kept += 1
                            dirty = False
                        else:
                            cleared += 1
                            if not dry_run:
                                pool.DeleteAttributes(client, ["aff4:type"],
                                                      start=ts,
                                                      end=ts)
                                if pool.Size() > 1000:
                                    pool.Flush()
                    else:
                        dirty = True
                logging.info("%s: kept %d and cleared %d", client, kept,
                             cleared)
示例#18
0
    def Run(self):
        self.start = 0
        self.end = int(1e6 * (time.time() - self.MAX_AGE))

        client_urns = export_utils.GetAllClients(token=self.token)

        for batch in collection.Batch(client_urns, 10000):
            with data_store.DB.GetMutationPool() as mutation_pool:
                for client_urn in batch:
                    mutation_pool.DeleteAttributes(client_urn.Add("stats"),
                                                   [u"aff4:stats"],
                                                   start=self.start,
                                                   end=self.end)
            self.HeartBeat()
示例#19
0
def _IterateAllClients():
    """Fetches client data from the relational db."""
    all_client_ids = data_store.REL_DB.ReadAllClientIDs()
    for batch in collection.Batch(all_client_ids, CLIENT_READ_BATCH_SIZE):
        client_map = data_store.REL_DB.MultiReadClientFullInfo(batch)
        fs_client_ids = [
            cid for (cid, client) in iteritems(client_map)
            if client.metadata.fleetspeak_enabled
        ]
        last_contact_times = _GetLastContactFromFleetspeak(fs_client_ids)
        for cid, last_contact in iteritems(last_contact_times):
            client_map[cid].metadata.ping = last_contact
        for client in itervalues(client_map):
            yield client
示例#20
0
 def GetInput(self):
     """Yield client urns."""
     client_list = GetAllClients(token=self.token)
     logging.debug("Got %d clients", len(client_list))
     for client_group in collection.Batch(client_list,
                                          self.client_chunksize):
         for fd in aff4.FACTORY.MultiOpen(client_group,
                                          mode="r",
                                          aff4_type=aff4_grr.VFSGRRClient,
                                          token=self.token):
             if isinstance(fd, aff4_grr.VFSGRRClient):
                 # Skip if older than max_age
                 oldest_time = (time.time() - self.max_age) * 1e6
             if fd.Get(aff4_grr.VFSGRRClient.SchemaCls.PING) >= oldest_time:
                 yield fd
示例#21
0
文件: system.py 项目: costaafm/grr
    def Start(self):
        """Retrieve all the clients for the AbstractClientStatsCollectors."""
        try:

            self.stats = {}

            self.BeginProcessing()

            processed_count = 0

            if data_store.RelationalDBEnabled():
                for client_info in _IterateAllClients(
                        recency_window=self.recency_window):
                    self.ProcessClientFullInfo(client_info)
                    processed_count += 1

                    if processed_count % _CLIENT_READ_BATCH_SIZE == 0:
                        self.Log("Processed %d clients.", processed_count)
                        self.HeartBeat()

                if processed_count != 0:
                    self.Log("Processed %d clients.", processed_count)

            else:
                root_children = aff4.FACTORY.Open(
                    aff4.ROOT_URN, token=self.token).OpenChildren(mode="r")
                for batch in collection.Batch(root_children,
                                              _CLIENT_READ_BATCH_SIZE):
                    for child in batch:
                        if not isinstance(child, aff4_grr.VFSGRRClient):
                            continue

                        last_ping = child.Get(child.Schema.PING)

                        self.ProcessLegacyClient(last_ping, child)
                        processed_count += 1
                        # This flow is not dead: we don't want to run out of lease time.
                        self.HeartBeat()

            self.FinishProcessing()
            for fd in itervalues(self.stats):
                fd.Close()

            logging.info("%s: processed %d clients.", self.__class__.__name__,
                         processed_count)
        except Exception as e:  # pylint: disable=broad-except
            logging.exception("Error while calculating stats: %s", e)
            raise
示例#22
0
    def _MultiStream(cls, fds):
        """Effectively streams data from multiple opened BlobImage objects.

    Args:
      fds: A list of opened AFF4Stream (or AFF4Stream descendants) objects.

    Yields:
      Tuples (chunk, fd, exception) where chunk is a binary blob of data and fd
      is an object from the fds argument.

      If one or more chunks are missing, exception is a MissingBlobsError object
      and chunk is None. _MultiStream does its best to skip the file entirely if
      one of its chunks is missing, but in case of very large files it's still
      possible to yield a truncated file.
    """

        broken_fds = set()
        missing_blobs_fd_pairs = []
        for chunk_fd_pairs in collection.Batch(
                cls._GenerateChunkIds(fds),
                cls.MULTI_STREAM_CHUNKS_READ_AHEAD):
            chunk_fds = list(map(operator.itemgetter(0), chunk_fd_pairs))
            results_map = data_store.BLOBS.ReadBlobs(chunk_fds)

            for chunk_id, fd in chunk_fd_pairs:
                if chunk_id not in results_map or results_map[chunk_id] is None:
                    missing_blobs_fd_pairs.append((chunk_id, fd))
                    broken_fds.add(fd)

            for chunk, fd in chunk_fd_pairs:
                if fd in broken_fds:
                    continue

                yield fd, results_map[chunk], None

        if missing_blobs_fd_pairs:
            missing_blobs_by_fd = {}
            for chunk_id, fd in missing_blobs_fd_pairs:
                missing_blobs_by_fd.setdefault(fd, []).append(chunk_id)

            for fd, missing_blobs in iteritems(missing_blobs_by_fd):
                e = MissingBlobsError("%d missing blobs (multi-stream)" %
                                      len(missing_blobs),
                                      missing_chunks=missing_blobs)
                yield fd, None, e
示例#23
0
    def Generate(
        self, mappings: Iterator[flow_base.ClientPathArchiveMapping]
    ) -> Iterator[bytes]:
        """Generates archive from a given set of client path mappings.

    Iterates the mappings and generates an archive by yielding contents
    of every referenced file.

    Args:
      mappings: A set of mappings defining the archive structure.

    Yields:
      Chunks of bytes of the generated archive.
    """
        processed_files = {}
        missing_files = set()
        for mappings_batch in collection.Batch(mappings, self.BATCH_SIZE):

            archive_paths_by_id = {}
            for mapping in mappings_batch:
                archive_paths_by_id[
                    mapping.client_path.path_id] = mapping.archive_path

            processed_in_batch = set()
            for chunk in file_store.StreamFilesChunks(
                [m.client_path for m in mappings_batch]):
                processed_in_batch.add(chunk.client_path.path_id)
                processed_files[
                    chunk.client_path.vfs_path] = archive_paths_by_id[
                        chunk.client_path.path_id]
                for output in self._WriteFileChunk(chunk, archive_paths_by_id):
                    yield output

            for mapping in mappings_batch:
                if mapping.client_path.path_id in processed_in_batch:
                    continue

                missing_files.add(mapping.client_path.vfs_path)

        for chunk in self._GenerateDescription(processed_files, missing_files):
            yield chunk

        yield self.archive_generator.Close()
示例#24
0
def _IterateAllLegacyClients(token):
  """Fetches client data from the legacy db."""
  root_children = aff4.FACTORY.Open(
      aff4.ROOT_URN, token=token).OpenChildren(mode="r")
  for batch in collection.Batch(root_children, CLIENT_READ_BATCH_SIZE):
    fs_client_map = {}
    non_fs_clients = []
    for child in batch:
      if not isinstance(child, aff4_grr.VFSGRRClient):
        continue
      if child.Get(child.Schema.FLEETSPEAK_ENABLED):
        fs_client_map[child.urn.Basename()] = child
      else:
        non_fs_clients.append(child)
    last_contact_times = _GetLastContactFromFleetspeak(viewkeys(fs_client_map))
    for client in non_fs_clients:
      yield client.Get(client.Schema.PING), client
    for cid, client in iteritems(fs_client_map):
      last_contact = last_contact_times.get(cid, client.Get(client.Schema.PING))
      yield last_contact, client
示例#25
0
  def Convert(self, values, start_index=0, end_index=None):
    """Converts given collection to exported values.

    This method uses a threadpool to do the conversion in parallel. It
    blocks for up to one hour until everything is converted.

    Args:
      values: Iterable object with values to convert.
      start_index: Start from this index in the collection.
      end_index: Finish processing on the (index - 1) element of the collection.
        If None, work till the end of the collection.

    Returns:
      Nothing. ConvertedBatch() should handle the results.
    """
    if not values:
      return

    try:
      total_batch_count = len(values) // self.batch_size
    except TypeError:
      total_batch_count = -1

    pool = ThreadPool.Factory(self.threadpool_prefix, self.threadpool_size)
    val_iterator = itertools.islice(values, start_index, end_index)

    pool.Start()
    try:
      for batch_index, batch in enumerate(
          collection.Batch(val_iterator, self.batch_size)):
        logging.debug("Processing batch %d out of %d", batch_index,
                      total_batch_count)

        pool.AddTask(
            target=self.ConvertBatch,
            args=(batch,),
            name="batch_%d" % batch_index,
            inline=False)

    finally:
      pool.Stop(join_timeout=3600)
示例#26
0
    def Run(self):
        if not fleetspeak_connector.CONN or not fleetspeak_connector.CONN.outgoing:
            # Nothing to do if Fleetspeak is not enabled.
            self.Log("Fleetspeak has not been initialized. Will do nothing.")
            return

        if not data_store.RelationalDBWriteEnabled():
            raise NotImplementedError(
                "Cronjob does not support the legacy datastore.")

        age_threshold = config.CONFIG["Server.fleetspeak_last_ping_threshold"]
        max_last_ping = rdfvalue.RDFDatetime.Now() - age_threshold
        last_pings = data_store.REL_DB.ReadClientLastPings(
            max_last_ping=max_last_ping, fleetspeak_enabled=True)

        num_clients_updated = 0
        batch_size = config.CONFIG["Server.fleetspeak_list_clients_batch_size"]
        for client_ids in collection.Batch(iterkeys(last_pings), batch_size):
            fs_ids = [
                fleetspeak_utils.GRRIDToFleetspeakID(i) for i in client_ids
            ]
            request_start = rdfvalue.RDFDatetime.Now()
            fs_result = fleetspeak_connector.CONN.outgoing.ListClients(
                admin_pb2.ListClientsRequest(client_ids=fs_ids))
            latency = rdfvalue.RDFDatetime.Now() - request_start
            logging.info("Fleetspeak ListClients() took %s.", latency)
            stats_collector_instance.Get().RecordEvent(
                "fleetspeak_last_ping_latency_millis", latency.milliseconds)

            for fs_client in fs_result.clients:
                grr_id = fleetspeak_utils.FleetspeakIDToGRRID(
                    fs_client.client_id)
                new_last_ping = fleetspeak_utils.TSToRDFDatetime(
                    fs_client.last_contact_time)
                if last_pings[grr_id] is None or last_pings[
                        grr_id] < new_last_ping:
                    data_store.REL_DB.WriteClientMetadata(
                        grr_id, last_ping=new_last_ping)
                    num_clients_updated += 1

            self.Log("Updated timestamps for %d clients.", num_clients_updated)
示例#27
0
def _IterateAllClients(recency_window=None):
  """Fetches client data from the relational db.

  Args:
    recency_window: An rdfvalue.Duration specifying a window of last-ping
      timestamps to consider. Clients that haven't communicated with GRR servers
      longer than the given period will be skipped. If recency_window is None,
      all clients will be iterated.

  Yields:
    Batches (lists) of ClientFullInfo objects.
  """
  if recency_window is None:
    min_last_ping = None
  else:
    min_last_ping = rdfvalue.RDFDatetime.Now() - recency_window
  client_ids = data_store.REL_DB.ReadAllClientIDs(min_last_ping=min_last_ping)
  for client_id_batch in collection.Batch(client_ids, CLIENT_READ_BATCH_SIZE):
    client_info_dict = data_store.REL_DB.MultiReadClientFullInfo(
        client_id_batch)
    yield list(itervalues(client_info_dict))
示例#28
0
    def Handle(self, args, context=None):
        if args.count:
            end = args.offset + args.count
            # Read <count> clients ahead in case some of them fail to open / verify.
            batch_size = end + args.count
        else:
            end = db.MAX_COUNT
            batch_size = end

        keywords = compatibility.ShlexSplit(args.query)
        api_clients = []

        index = client_index.ClientIndex()

        # TODO(amoser): We could move the label verification into the
        # database making this method more efficient. Label restrictions
        # should be on small subsets though so this might not be worth
        # it.
        all_client_ids = set()
        for label in self.allow_labels:
            label_filter = ["label:" + label] + keywords
            all_client_ids.update(index.LookupClients(label_filter))

        index = 0
        for cid_batch in collection.Batch(sorted(all_client_ids), batch_size):
            client_infos = data_store.REL_DB.MultiReadClientFullInfo(cid_batch)

            for _, client_info in sorted(client_infos.items()):
                if not self._VerifyLabels(client_info.labels):
                    continue
                if index >= args.offset and index < end:
                    api_clients.append(
                        ApiClient().InitFromClientInfo(client_info))
                index += 1
                if index >= end:
                    UpdateClientsFromFleetspeak(api_clients)
                    return ApiSearchClientsResult(items=api_clients)

        UpdateClientsFromFleetspeak(api_clients)
        return ApiSearchClientsResult(items=api_clients)
示例#29
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        yield self.archive_generator.WriteFileHeader(
            "%s/%s/from_%s.csv" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))

        writer = csv.Writer()
        # Write the CSV header based on first value class and write
        # the first value itself. All other values are guaranteed
        # to have the same class (see ProcessSingleTypeExportedValues definition).
        writer.WriteRow(self._GetCSVHeader(first_value.__class__))
        writer.WriteRow(self._GetCSVRow(first_value))

        chunk = writer.Content().encode("utf-8")
        yield self.archive_generator.WriteFileChunk(chunk)

        # Counter starts from 1, as 1 value has already been written.
        counter = 1
        for batch in collection.Batch(exported_values, self.ROW_BATCH):
            counter += len(batch)

            writer = csv.Writer()
            for value in batch:
                writer.WriteRow(self._GetCSVRow(value))

            chunk = writer.Content().encode("utf-8")
            yield self.archive_generator.WriteFileChunk(chunk)

        yield self.archive_generator.WriteFileFooter()

        self.export_counts.setdefault(
            original_value_type.__name__,
            dict())[first_value.__class__.__name__] = counter
示例#30
0
    def Execute(self, thread_count):
        """Runs the migration procedure.

    Args:
      thread_count: A number of threads to execute the migration with.

    Raises:
      AssertionError: If not all clients have been migrated.
      ValueError: If the relational database backend is not available.
    """
        if not data_store.RelationalDBWriteEnabled():
            raise ValueError("No relational database available.")

        sys.stdout.write("Collecting clients...\n")
        client_urns = _GetClientUrns()

        sys.stdout.write("Clients to migrate: {}\n".format(len(client_urns)))
        sys.stdout.write("Threads to use: {}\n".format(thread_count))

        self._total_count = len(client_urns)
        self._migrated_count = 0
        self._start_time = rdfvalue.RDFDatetime.Now()

        batches = collection.Batch(client_urns, _CLIENT_BATCH_SIZE)

        self._Progress()
        tp = pool.ThreadPool(processes=thread_count)
        tp.map(self._MigrateBatch, list(batches))
        self._Progress()

        if self._migrated_count == self._total_count:
            message = "\nMigration has been finished (migrated {} clients).\n".format(
                self._migrated_count)
            sys.stdout.write(message)
        else:
            message = "Not all clients have been migrated ({}/{})".format(
                self._migrated_count, self._total_count)
            raise AssertionError(message)