Exemplo n.º 1
0
    def ReadPathInfo(self,
                     client_id,
                     path_type,
                     components,
                     timestamp=None,
                     cursor=None):
        """Retrieves a path info record for a given path."""
        if timestamp is None:
            path_infos = self.ReadPathInfos(client_id, path_type, [components])

            path_info = path_infos[components]
            if path_info is None:
                raise db.UnknownPathError(client_id=client_id,
                                          path_type=path_type,
                                          components=components)

            return path_info

        query = """
    SELECT directory, UNIX_TIMESTAMP(p.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths as p
 LEFT JOIN (SELECT client_id, path_type, path_id, stat_entry
              FROM client_path_stat_entries
             WHERE client_id = %(client_id)s
               AND path_type = %(path_type)s
               AND path_id = %(path_id)s
               AND UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
          ORDER BY timestamp DESC
             LIMIT 1) AS s
        ON p.client_id = s.client_id
       AND p.path_type = s.path_type
       AND p.path_id = s.path_id
 LEFT JOIN (SELECT client_id, path_type, path_id, hash_entry
              FROM client_path_hash_entries
             WHERE client_id = %(client_id)s
               AND path_type = %(path_type)s
               AND path_id = %(path_id)s
               AND UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
          ORDER BY timestamp DESC
             LIMIT 1) AS h
        ON p.client_id = h.client_id
       AND p.path_type = h.path_type
       AND p.path_id = h.path_id
     WHERE p.client_id = %(client_id)s
       AND p.path_type = %(path_type)s
       AND p.path_id = %(path_id)s
    """
        values = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "path_id": rdf_objects.PathID.FromComponents(components).AsBytes(),
            "timestamp": mysql_utils.RDFDatetimeToTimestamp(timestamp),
        }

        cursor.execute(query, values)
        row = cursor.fetchone()
        if row is None:
            raise db.UnknownPathError(client_id=client_id,
                                      path_type=path_type,
                                      components=components)

        # pyformat: disable
        (directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp,
         hash_entry_bytes, last_hash_entry_timestamp) = row
        # pyformat: enable

        if stat_entry_bytes is not None:
            stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
                stat_entry_bytes)
        else:
            stat_entry = None

        if hash_entry_bytes is not None:
            hash_entry = rdf_crypto.Hash.FromSerializedString(hash_entry_bytes)
        else:
            hash_entry = None

        datetime = mysql_utils.TimestampToRDFDatetime
        return rdf_objects.PathInfo(
            path_type=path_type,
            components=components,
            timestamp=datetime(timestamp),
            last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
            last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
            directory=directory,
            stat_entry=stat_entry,
            hash_entry=hash_entry)
Exemplo n.º 2
0
  def ReadLatestPathInfosWithHashBlobReferences(self,
                                                client_paths,
                                                max_timestamp=None,
                                                cursor=None):
    """Returns PathInfos that have corresponding HashBlobReferences."""
    path_infos = {client_path: None for client_path in client_paths}

    path_id_components = {}
    for client_path in client_paths:
      path_id_components[client_path.path_id] = client_path.components

    params = []
    query = """
    SELECT t.client_id, t.path_type, t.path_id, UNIX_TIMESTAMP(t.timestamp),
           s.stat_entry, h.hash_entry
      FROM (SELECT h.client_id, h.path_type, h.path_id,
                   MAX(h.timestamp) AS timestamp
              FROM client_path_hash_entries AS h
        INNER JOIN hash_blob_references AS b
                ON b.hash_id = h.sha256
             WHERE {conditions}
          GROUP BY client_id, path_type, path_id) AS t
 LEFT JOIN client_path_stat_entries AS s
        ON s.client_id = t.client_id
       AND s.path_type = t.path_type
       AND s.path_id = t.path_id
       AND s.timestamp = t.timestamp
 LEFT JOIN client_path_hash_entries AS h
        ON h.client_id = t.client_id
       AND h.path_type = t.path_type
       AND h.path_id = t.path_id
       AND h.timestamp = t.timestamp
    """

    path_conditions = []

    for client_path in client_paths:
      path_conditions.append("""
      (client_id = %s AND path_type = %s AND path_id = %s)
      """)
      params.append(db_utils.ClientIDToInt(client_path.client_id))
      params.append(int(client_path.path_type))
      params.append(client_path.path_id.AsBytes())

    conditions = " OR ".join(path_conditions)
    if max_timestamp is not None:
      conditions = "({}) AND UNIX_TIMESTAMP(timestamp) <= %s".format(conditions)
      params.append(mysql_utils.RDFDatetimeToTimestamp(max_timestamp))

    cursor.execute(query.format(conditions=conditions), params)
    for row in cursor.fetchall():
      # pyformat: disable
      (client_id, path_type, path_id_bytes, timestamp,
       stat_entry_bytes, hash_entry_bytes) = row
      # pyformat: enable

      path_id = rdf_objects.PathID.FromBytes(path_id_bytes)
      components = path_id_components[path_id]

      if stat_entry_bytes is not None:
        stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
            stat_entry_bytes)
      else:
        stat_entry = None

      hash_entry = rdf_crypto.Hash.FromSerializedString(hash_entry_bytes)

      client_path = db.ClientPath(
          client_id=db_utils.IntToClientID(client_id),
          path_type=path_type,
          components=path_id_components[path_id])

      path_info = rdf_objects.PathInfo(
          path_type=path_type,
          components=components,
          stat_entry=stat_entry,
          hash_entry=hash_entry,
          timestamp=mysql_utils.TimestampToRDFDatetime(timestamp))

      path_infos[client_path] = path_info

    return path_infos
Exemplo n.º 3
0
    def ReadPathInfos(self,
                      client_id,
                      path_type,
                      components_list,
                      cursor=None):
        """Retrieves path info records for given paths."""

        if not components_list:
            return {}

        path_ids = list(map(rdf_objects.PathID.FromComponents,
                            components_list))

        path_infos = {components: None for components in components_list}

        query = """
    SELECT path, directory, UNIX_TIMESTAMP(client_paths.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths
 LEFT JOIN client_path_stat_entries ON
           (client_paths.client_id = client_path_stat_entries.client_id AND
            client_paths.path_type = client_path_stat_entries.path_type AND
            client_paths.path_id = client_path_stat_entries.path_id AND
            client_paths.last_stat_entry_timestamp = client_path_stat_entries.timestamp)
 LEFT JOIN client_path_hash_entries ON
           (client_paths.client_id = client_path_hash_entries.client_id AND
            client_paths.path_type = client_path_hash_entries.path_type AND
            client_paths.path_id = client_path_hash_entries.path_id AND
            client_paths.last_hash_entry_timestamp = client_path_hash_entries.timestamp)
     WHERE client_paths.client_id = %(client_id)s
       AND client_paths.path_type = %(path_type)s
       AND client_paths.path_id IN %(path_ids)s
    """
        values = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "path_ids": [path_id.AsBytes() for path_id in path_ids]
        }

        cursor.execute(query, values)
        for row in cursor.fetchall():
            # pyformat: disable
            (path, directory, timestamp, stat_entry_bytes,
             last_stat_entry_timestamp, hash_entry_bytes,
             last_hash_entry_timestamp) = row
            # pyformat: enable
            components = mysql_utils.PathToComponents(path)

            if stat_entry_bytes is not None:
                stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
                    stat_entry_bytes)
            else:
                stat_entry = None

            if hash_entry_bytes is not None:
                hash_entry = rdf_crypto.Hash.FromSerializedString(
                    hash_entry_bytes)
            else:
                hash_entry = None

            datetime = mysql_utils.TimestampToRDFDatetime
            path_info = rdf_objects.PathInfo(
                path_type=path_type,
                components=components,
                timestamp=datetime(timestamp),
                last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
                last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
                directory=directory,
                stat_entry=stat_entry,
                hash_entry=hash_entry)

            path_infos[components] = path_info

        return path_infos
Exemplo n.º 4
0
  def ReadPathInfos(self, client_id, path_type, components_list, cursor=None):
    """Retrieves path info records for given paths."""

    if not components_list:
      return {}

    path_ids = list(map(rdf_objects.PathID.FromComponents, components_list))

    path_infos = {components: None for components in components_list}

    query = """
    SELECT path, directory, UNIX_TIMESTAMP(client_paths.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths
 LEFT JOIN client_path_stat_entries ON
           (client_paths.client_id = client_path_stat_entries.client_id AND
            client_paths.path_type = client_path_stat_entries.path_type AND
            client_paths.path_id = client_path_stat_entries.path_id AND
            client_paths.last_stat_entry_timestamp = client_path_stat_entries.timestamp)
 LEFT JOIN client_path_hash_entries ON
           (client_paths.client_id = client_path_hash_entries.client_id AND
            client_paths.path_type = client_path_hash_entries.path_type AND
            client_paths.path_id = client_path_hash_entries.path_id AND
            client_paths.last_hash_entry_timestamp = client_path_hash_entries.timestamp)
     WHERE client_paths.client_id = %s
       AND client_paths.path_type = %s
       AND client_paths.path_id IN ({})
    """.format(", ".join(["%s"] * len(path_ids)))
    # NOTE: passing tuples as cursor.execute arguments is broken in
    # mysqldbclient==1.3.10
    # (see https://github.com/PyMySQL/mysqlclient-python/issues/145)
    # and is considered unmaintained.
    values = [
        db_utils.ClientIDToInt(client_id),
        int(path_type),
    ] + [path_id.AsBytes() for path_id in path_ids]

    cursor.execute(query, values)
    for row in cursor.fetchall():
      # pyformat: disable
      (path, directory, timestamp,
       stat_entry_bytes, last_stat_entry_timestamp,
       hash_entry_bytes, last_hash_entry_timestamp) = row
      # pyformat: enable
      components = mysql_utils.PathToComponents(path)

      if stat_entry_bytes is not None:
        stat_entry = rdf_client_fs.StatEntry.FromSerializedBytes(
            stat_entry_bytes)
      else:
        stat_entry = None

      if hash_entry_bytes is not None:
        hash_entry = rdf_crypto.Hash.FromSerializedBytes(hash_entry_bytes)
      else:
        hash_entry = None

      datetime = mysql_utils.TimestampToRDFDatetime
      path_info = rdf_objects.PathInfo(
          path_type=path_type,
          components=components,
          timestamp=datetime(timestamp),
          last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
          last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
          directory=directory,
          stat_entry=stat_entry,
          hash_entry=hash_entry)

      path_infos[components] = path_info

    return path_infos
Exemplo n.º 5
0
  def ReadPathInfosHistories(
      self,
      client_id: Text,
      path_type: rdf_objects.PathInfo.PathType,
      components_list: Iterable[Sequence[Text]],
      cutoff: Optional[rdfvalue.RDFDatetime] = None,
      cursor: Optional[MySQLdb.cursors.Cursor] = None
  ) -> Dict[Sequence[Text], Sequence[rdf_objects.PathInfo]]:
    """Reads a collection of hash and stat entries for given paths."""
    # MySQL does not handle well empty `IN` clauses so we guard against that.
    if not components_list:
      return {}

    path_infos = {components: [] for components in components_list}

    path_id_components = {}
    for components in components_list:
      path_id = rdf_objects.PathID.FromComponents(components)
      path_id_components[path_id] = components

    params = {
        "client_id": db_utils.ClientIDToInt(client_id),
        "path_type": int(path_type),
    }
    for path_id in path_id_components:
      params["path_id_%s" % path_id.AsHexString()] = path_id.AsBytes()

    path_id_placeholders = ", ".join([
        "%(path_id_{})s".format(path_id.AsHexString())
        for path_id in path_id_components
    ])

    if cutoff is not None:
      stat_entry_timestamp_condition = """
      AND s.timestamp <= FROM_UNIXTIME(%(cutoff)s)
      """
      hash_entry_timestamp_condition = """
      AND h.timestamp <= FROM_UNIXTIME(%(cutoff)s)
      """
      params["cutoff"] = mysql_utils.RDFDatetimeToTimestamp(cutoff)
    else:
      stat_entry_timestamp_condition = ""
      hash_entry_timestamp_condition = ""

    # MySQL does not support full outer joins, so we emulate them with a union.
    query = """
    SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp),
           h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp)
      FROM client_path_stat_entries AS s
 LEFT JOIN client_path_hash_entries AS h
        ON s.client_id = h.client_id
       AND s.path_type = h.path_type
       AND s.path_id = h.path_id
       AND s.timestamp = h.timestamp
     WHERE s.client_id = %(client_id)s
       AND s.path_type = %(path_type)s
       AND s.path_id IN ({path_id_placeholders})
       {stat_entry_timestamp_condition}
     UNION
    SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp),
           h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp)
      FROM client_path_hash_entries AS h
 LEFT JOIN client_path_stat_entries AS s
        ON h.client_id = s.client_id
       AND h.path_type = s.path_type
       AND h.path_id = s.path_id
       AND h.timestamp = s.timestamp
     WHERE h.client_id = %(client_id)s
       AND h.path_type = %(path_type)s
       AND h.path_id IN ({path_id_placeholders})
       {hash_entry_timestamp_condition}
    """.format(
        stat_entry_timestamp_condition=stat_entry_timestamp_condition,
        hash_entry_timestamp_condition=hash_entry_timestamp_condition,
        path_id_placeholders=path_id_placeholders)

    cursor.execute(query, params)
    for row in cursor.fetchall():
      # pyformat: disable
      (stat_entry_path_id_bytes, stat_entry_bytes, stat_entry_timestamp,
       hash_entry_path_id_bytes, hash_entry_bytes, hash_entry_timestamp) = row
      # pyformat: enable

      path_id_bytes = stat_entry_path_id_bytes or hash_entry_path_id_bytes
      path_id = rdf_objects.PathID.FromSerializedBytes(path_id_bytes)
      components = path_id_components[path_id]

      timestamp = stat_entry_timestamp or hash_entry_timestamp

      if stat_entry_bytes is not None:
        stat_entry = rdf_client_fs.StatEntry.FromSerializedBytes(
            stat_entry_bytes)
      else:
        stat_entry = None

      if hash_entry_bytes is not None:
        hash_entry = rdf_crypto.Hash.FromSerializedBytes(hash_entry_bytes)
      else:
        hash_entry = None

      path_info = rdf_objects.PathInfo(
          path_type=path_type,
          components=components,
          stat_entry=stat_entry,
          hash_entry=hash_entry,
          timestamp=mysql_utils.TimestampToRDFDatetime(timestamp))

      path_infos[components].append(path_info)

    for components in components_list:
      path_infos[components].sort(key=lambda path_info: path_info.timestamp)

    return path_infos
Exemplo n.º 6
0
    def AddFile(self, fd):
        """Adds a file to the hash file store.

    We take a file in the client space:
      aff4:/C.123123123/fs/os/usr/local/blah

    Hash it, update the hash in the original file if its different to the
    one calculated on the client, and copy the original AFF4 object to

      aff4:/files/hash/generic/sha256/123123123 (canonical reference)

    We then create symlinks for all other hash types:

      aff4:/files/hash/generic/sha1/345345345
      aff4:/files/hash/generic/md5/456456456
      aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs)
      aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs)

    When present in PE files, the signing data (revision, cert_type,
    certificate) is added to the original object.

    This can't be done simply in the FileStore.Write() method with fixed hash
    buffer sizes because the authenticode hashes need to track hashing of
    different-sized regions based on the signature information.

    Args:
      fd: File open for reading.

    Raises:
      IOError: If there was an error writing the file.
    """
        hashes = self._HashFile(fd)

        # The empty file is very common, we don't keep the back references for it
        # in the DB since it just takes up too much space.
        empty_hash = ("e3b0c44298fc1c149afbf4c8996fb924"
                      "27ae41e4649b934ca495991b7852b855")
        if hashes.sha256 == empty_hash:
            return

        # Update the hashes field now that we have calculated them all.
        fd.Set(fd.Schema.HASH, hashes)
        fd.Flush()

        if data_store.RelationalDBWriteEnabled():
            client_id, vfs_path = fd.urn.Split(2)
            path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)
            path_info = rdf_objects.PathInfo(path_type=path_type,
                                             components=components,
                                             hash_entry=hashes)
            data_store.REL_DB.WritePathInfos(client_id, [path_info])

        # sha256 is the canonical location.
        canonical_urn = self.PATH.Add("generic/sha256").Add(str(hashes.sha256))
        if not list(aff4.FACTORY.Stat(canonical_urn)):
            aff4.FACTORY.Copy(fd.urn, canonical_urn)
            # Remove the STAT entry, it makes no sense to copy it between clients.
            with aff4.FACTORY.Open(canonical_urn, mode="rw",
                                   token=self.token) as new_fd:
                new_fd.Set(new_fd.Schema.STAT(None))

        self._AddToIndex(canonical_urn, fd.urn)

        for hash_type, hash_digest in hashes.ListSetFields():
            # Determine fingerprint type.
            hash_type = hash_type.name
            # No need to create a symlink for sha256, it's the canonical location.
            if hash_type == "sha256":
                continue
            hash_digest = str(hash_digest)
            fingerprint_type = "generic"
            if hash_type.startswith("pecoff_"):
                fingerprint_type = "pecoff"
                hash_type = hash_type[len("pecoff_"):]
            if hash_type not in self.HASH_TYPES[fingerprint_type]:
                continue

            file_store_urn = self.PATH.Add(fingerprint_type).Add(
                hash_type).Add(hash_digest)

            with aff4.FACTORY.Create(file_store_urn,
                                     aff4.AFF4Symlink,
                                     token=self.token) as symlink:
                symlink.Set(symlink.Schema.SYMLINK_TARGET, canonical_urn)

        # We do not want to be externally written here.
        return None
Exemplo n.º 7
0
    def CreateClientObject(self, vfs_fixture):
        """Make a new client object."""

        # First remove the old fixture just in case its still there.
        aff4.FACTORY.Delete(self.client_id, token=self.token)

        # Create the fixture at a fixed time.
        with test_lib.FakeTime(self.age):
            for path, (aff4_type, attributes) in vfs_fixture:
                path %= self.args

                if data_store.AFF4Enabled():
                    aff4_object = aff4.FACTORY.Create(self.client_id.Add(path),
                                                      aff4_type,
                                                      mode="rw",
                                                      token=self.token)

                path_info = None

                if data_store.RelationalDBWriteEnabled():
                    data_store.REL_DB.WriteClientMetadata(
                        self.client_id.Basename(), fleetspeak_enabled=False)

                    components = [
                        component for component in path.split("/") if component
                    ]
                    if (len(components) > 1 and components[0] == "fs"
                            and components[1] in ["os", "tsk"]):
                        path_info = rdf_objects.PathInfo()
                        if components[1] == "os":
                            path_info.path_type = rdf_objects.PathInfo.PathType.OS
                        else:
                            path_info.path_type = rdf_objects.PathInfo.PathType.TSK
                        path_info.components = components[2:]
                        if aff4_type in [
                                aff4_grr.VFSFile, aff4_grr.VFSMemoryFile
                        ]:
                            path_info.directory = False
                        elif aff4_type == aff4_standard.VFSDirectory:
                            path_info.directory = True
                        else:
                            raise ValueError("Incorrect AFF4 type: %s" %
                                             aff4_type)

                for attribute_name, value in iteritems(attributes):
                    attribute = aff4.Attribute.PREDICATES[attribute_name]
                    if isinstance(value, (bytes, Text)):
                        # Interpolate the value
                        value %= self.args

                    # Is this supposed to be an RDFValue array?
                    if issubclass(attribute.attribute_type,
                                  rdf_protodict.RDFValueArray):
                        rdfvalue_object = attribute()
                        for item in value:
                            new_object = rdfvalue_object.rdf_type.FromTextFormat(
                                utils.SmartStr(item))
                            rdfvalue_object.Append(new_object)

                    # It is a text serialized protobuf.
                    elif issubclass(attribute.attribute_type,
                                    rdf_structs.RDFProtoStruct):
                        # Use the alternate constructor - we always write protobufs in
                        # textual form:
                        rdfvalue_object = attribute.attribute_type.FromTextFormat(
                            utils.SmartStr(value))

                    elif issubclass(attribute.attribute_type,
                                    rdfvalue.RDFInteger):
                        rdfvalue_object = attribute(int(value))
                    else:
                        rdfvalue_object = attribute(value)

                    if data_store.AFF4Enabled():
                        # If we don't already have a pathspec, try and get one from the
                        # stat.
                        if aff4_object.Get(
                                aff4_object.Schema.PATHSPEC) is None:
                            # If the attribute was a stat, it has a pathspec nested in it.
                            # We should add that pathspec as an attribute.
                            if attribute.attribute_type == rdf_client_fs.StatEntry:
                                stat_object = attribute.attribute_type.FromTextFormat(
                                    utils.SmartStr(value))
                                if stat_object.pathspec:
                                    pathspec_attribute = aff4.Attribute(
                                        "aff4:pathspec", rdf_paths.PathSpec,
                                        "The pathspec used to retrieve "
                                        "this object from the client.",
                                        "pathspec")
                                    aff4_object.AddAttribute(
                                        pathspec_attribute,
                                        stat_object.pathspec)

                    if attribute in ["aff4:content", "aff4:content"]:
                        if data_store.AFF4Enabled():
                            # For AFF4MemoryStreams we need to call Write() instead of
                            # directly setting the contents..
                            content = rdfvalue_object.AsBytes()
                            aff4_object.Write(content)

                        if path_info is not None:
                            blob_id = rdf_objects.BlobID.FromBlobData(content)
                            data_store.BLOBS.WriteBlobs({blob_id: content})
                            hash_id = file_store.AddFileWithUnknownHash(
                                db.ClientPath.FromPathInfo(
                                    self.client_id.Basename(), path_info),
                                [blob_id])
                            path_info.hash_entry.num_bytes = len(content)
                            path_info.hash_entry.sha256 = hash_id.AsBytes()
                    elif data_store.AFF4Enabled():
                        aff4_object.AddAttribute(attribute, rdfvalue_object)

                    if (isinstance(rdfvalue_object, rdf_client_fs.StatEntry)
                            and rdfvalue_object.pathspec.pathtype != "UNSET"):
                        if data_store.RelationalDBWriteEnabled():
                            client_id = self.client_id.Basename()
                            path_info = rdf_objects.PathInfo.FromStatEntry(
                                rdfvalue_object)
                            data_store.REL_DB.WritePathInfos(
                                client_id, [path_info])

                if data_store.AFF4Enabled():
                    # Populate the KB from the client attributes.
                    if aff4_type == aff4_grr.VFSGRRClient:
                        kb = rdf_client.KnowledgeBase()
                        artifact.SetCoreGRRKnowledgeBaseValues(kb, aff4_object)
                        aff4_object.Set(aff4_object.Schema.KNOWLEDGE_BASE, kb)

                    # Make sure we do not actually close the object here - we only want to
                    # sync back its attributes, not run any finalization code.
                    aff4_object.Flush()
                    if aff4_type == aff4_grr.VFSGRRClient:
                        index = client_index.CreateClientIndex(
                            token=self.token)
                        index.AddClient(aff4_object)

                if path_info is not None:
                    data_store.REL_DB.WritePathInfos(
                        client_id=self.client_id.Basename(),
                        path_infos=[path_info])
Exemplo n.º 8
0
    def testGetAncestorsRoot(self):
        path_info = rdf_objects.PathInfo(components=["foo"])

        results = list(path_info.GetAncestors())
        self.assertLen(results, 1)
        self.assertEqual(results[0].components, [])
Exemplo n.º 9
0
 def testUpdateFromValidatesType(self):
     with self.assertRaises(TypeError):
         rdf_objects.PathInfo(components=["usr", "local", "bin"
                                          ], ).UpdateFrom("/usr/local/bin")
Exemplo n.º 10
0
 def testValidateDoubleDotComponent(self):
     with self.assertRaisesRegex(ValueError, "Incorrect"):
         rdf_objects.PathInfo(components=["..", "foo", "bar"])
Exemplo n.º 11
0
 def testGetAncestorsEmpty(self):
     path_info = rdf_objects.PathInfo(components=[], directory=True)
     self.assertEqual(list(path_info.GetAncestors()), [])
Exemplo n.º 12
0
 def testValidateEmptyComponent(self):
     with self.assertRaisesRegex(ValueError, "Empty"):
         rdf_objects.PathInfo(components=["foo", "", "bar"])
Exemplo n.º 13
0
  def testWritePathInfosValidatesPathType(self):
    path = ["usr", "local"]
    client_id = "C.bbbbbbbbbbbbbbbb"

    with self.assertRaises(ValueError):
      self.db.WritePathInfos(client_id, [rdf_objects.PathInfo(components=path)])
Exemplo n.º 14
0
Arquivo: vfs.py Projeto: costaafm/grr
    def Handle(self, args, token=None):
        ValidateVfsPath(args.file_path)

        # Directories are not really "files" so they cannot be stored in the
        # database but they still can be queried so we need to return something.
        # Sometimes they contain a trailing slash so we need to take care of that.
        #
        # TODO(hanuszczak): Require VFS paths to be normalized so that trailing
        # slash is either forbidden or mandatory.
        if args.file_path.endswith("/"):
            args.file_path = args.file_path[:-1]
        if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]:
            api_file = ApiFile(name=args.file_path,
                               path=args.file_path,
                               is_directory=True,
                               details=_GenerateApiFileDetails([]))
            return ApiGetFileDetailsResult(file=api_file)

        path_type, components = rdf_objects.ParseCategorizedPath(
            args.file_path)

        # TODO(hanuszczak): The tests passed even without support for timestamp
        # filtering. The test suite should be probably improved in that regard.
        client_id = str(args.client_id)
        path_infos = data_store.REL_DB.ReadPathInfoHistory(
            client_id, path_type, components)
        path_infos.reverse()
        if args.timestamp:
            path_infos = [
                pi for pi in path_infos if pi.timestamp <= args.timestamp
            ]

        if not path_infos:
            # TODO(user): As soon as we get rid of AFF4 - raise here. At the
            # moment we just return a directory-like stub instead to mimic the
            # AFF4Volume behavior.
            #
            # raise FileNotFoundError("No file matching the path %s at timestamp %s" %
            #                         (args.file_path, args.timestamp))
            pi = rdf_objects.PathInfo(path_type=path_type,
                                      components=components,
                                      directory=True)
            api_file = ApiFile(name=components[-1],
                               path=args.file_path,
                               is_directory=True,
                               details=_GenerateApiFileDetails([pi]))
            return ApiGetFileDetailsResult(file=api_file)

        last_path_info = path_infos[0]

        last_collection_pi = file_store.GetLastCollectionPathInfo(
            db.ClientPath.FromPathInfo(client_id, last_path_info),
            max_timestamp=args.timestamp)

        file_obj = ApiFile(
            name=components[-1],
            path=rdf_objects.ToCategorizedPath(path_type, components),
            stat=last_path_info.stat_entry,
            hash=last_path_info.hash_entry,
            details=_GenerateApiFileDetails(path_infos),
            is_directory=stat.S_ISDIR(last_path_info.stat_entry.st_mode),
            age=last_path_info.timestamp,
        )

        if last_collection_pi:
            file_obj.last_collected = last_collection_pi.timestamp
            file_obj.last_collected_size = last_collection_pi.hash_entry.num_bytes

        return ApiGetFileDetailsResult(file=file_obj)
Exemplo n.º 15
0
    def ListDescendentPathInfos(self,
                                client_id,
                                path_type,
                                components,
                                timestamp=None,
                                max_depth=None,
                                cursor=None):
        """Lists path info records that correspond to descendants of given path."""
        path_infos = []

        query = ""

        path = mysql_utils.ComponentsToPath(components)
        values = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "path": db_utils.EscapeWildcards(path),
        }

        query += """
    SELECT path, directory, UNIX_TIMESTAMP(p.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths AS p
    """
        if timestamp is None:
            query += """
      LEFT JOIN client_path_stat_entries AS s ON
                (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id AND
                 p.last_stat_entry_timestamp = s.timestamp)
      LEFT JOIN client_path_hash_entries AS h ON
                (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id AND
                 p.last_hash_entry_timestamp = h.timestamp)
      """
            only_explicit = False
        else:
            query += """
      LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry
                   FROM client_path_stat_entries AS sr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_stat_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS st
                     ON sr.client_id = st.client_id
                    AND sr.path_type = st.path_type
                    AND sr.path_id = st.path_id
                    AND sr.timestamp = st.max_timestamp) AS s
             ON (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id)
      LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry
                   FROM client_path_hash_entries AS hr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_hash_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS ht
                     ON hr.client_id = ht.client_id
                    AND hr.path_type = ht.path_type
                    AND hr.path_id = ht.path_id
                    AND hr.timestamp = ht.max_timestamp) AS h
             ON (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id)
      """
            values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp)
            only_explicit = True

        query += """
    WHERE p.client_id = %(client_id)s
      AND p.path_type = %(path_type)s
      AND path LIKE concat(%(path)s, '/%%')
    """

        if max_depth is not None:
            query += """
      AND depth <= %(depth)s
      """
            values["depth"] = len(components) + max_depth

        cursor.execute(query, values)
        for row in cursor.fetchall():
            # pyformat: disable
            (path, directory, timestamp, stat_entry_bytes,
             last_stat_entry_timestamp, hash_entry_bytes,
             last_hash_entry_timestamp) = row
            # pyformat: enable

            components = mysql_utils.PathToComponents(path)

            if stat_entry_bytes is not None:
                stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
                    stat_entry_bytes)
            else:
                stat_entry = None

            if hash_entry_bytes is not None:
                hash_entry = rdf_crypto.Hash.FromSerializedString(
                    hash_entry_bytes)
            else:
                hash_entry = None

            datetime = mysql_utils.TimestampToRDFDatetime
            path_info = rdf_objects.PathInfo(
                path_type=path_type,
                components=components,
                timestamp=datetime(timestamp),
                last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
                last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
                directory=directory,
                stat_entry=stat_entry,
                hash_entry=hash_entry)

            path_infos.append(path_info)

        path_infos.sort(key=lambda _: tuple(_.components))

        # For specific timestamp, we return information only about explicit paths
        # (paths that have associated stat or hash entry or have an ancestor that is
        # explicit).
        if not only_explicit:
            return path_infos

        explicit_path_infos = []
        has_explicit_ancestor = set()

        # This list is sorted according to the keys component, so by traversing it
        # in the reverse order we make sure that we process deeper paths first.
        for path_info in reversed(path_infos):
            components = tuple(path_info.components)

            if (path_info.HasField("stat_entry")
                    or path_info.HasField("hash_entry")
                    or components in has_explicit_ancestor):
                explicit_path_infos.append(path_info)
                has_explicit_ancestor.add(components[:-1])

        # Since we collected explicit paths in reverse order, we need to reverse it
        # again to conform to the interface.
        return list(reversed(explicit_path_infos))
Exemplo n.º 16
0
 def testUpdateFromValidatesComponents(self):
     with self.assertRaises(ValueError):
         rdf_objects.PathInfo(
             components=["usr", "local", "bin"]).UpdateFrom(
                 rdf_objects.PathInfo(
                     components=["usr", "local", "bin", "protoc"]))
Exemplo n.º 17
0
    def ReadPathInfosHistories(self,
                               client_id,
                               path_type,
                               components_list,
                               cursor=None):
        """Reads a collection of hash and stat entries for given paths."""
        # MySQL does not handle well empty `IN` clauses so we guard against that.
        if not components_list:
            return {}

        path_infos = {components: [] for components in components_list}

        path_id_components = {}
        for components in components_list:
            path_id = rdf_objects.PathID.FromComponents(components)
            path_id_components[path_id] = components

        # MySQL does not support full outer joins, so we emulate them with a union.
        query = """
    SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp),
           h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp)
      FROM client_path_stat_entries AS s
 LEFT JOIN client_path_hash_entries AS h
        ON s.client_id = h.client_id
       AND s.path_type = h.path_type
       AND s.path_id = h.path_id
       AND s.timestamp = h.timestamp
     WHERE s.client_id = %(client_id)s
       AND s.path_type = %(path_type)s
       AND s.path_id IN %(path_ids)s
     UNION
    SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp),
           h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp)
      FROM client_path_hash_entries AS h
 LEFT JOIN client_path_stat_entries AS s
        ON h.client_id = s.client_id
       AND h.path_type = s.path_type
       AND h.path_id = s.path_id
       AND h.timestamp = s.timestamp
     WHERE h.client_id = %(client_id)s
       AND h.path_type = %(path_type)s
       AND h.path_id IN %(path_ids)s
    """

        params = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "path_ids": [path_id.AsBytes() for path_id in path_id_components]
        }

        cursor.execute(query, params)
        for row in cursor.fetchall():
            # pyformat: disable
            (stat_entry_path_id_bytes, stat_entry_bytes, stat_entry_timestamp,
             hash_entry_path_id_bytes, hash_entry_bytes,
             hash_entry_timestamp) = row
            # pyformat: enable

            path_id_bytes = stat_entry_path_id_bytes or hash_entry_path_id_bytes
            path_id = rdf_objects.PathID.FromBytes(path_id_bytes)
            components = path_id_components[path_id]

            timestamp = stat_entry_timestamp or hash_entry_timestamp

            if stat_entry_bytes is not None:
                stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
                    stat_entry_bytes)
            else:
                stat_entry = None

            if hash_entry_bytes is not None:
                hash_entry = rdf_crypto.Hash.FromSerializedString(
                    hash_entry_bytes)
            else:
                hash_entry = None

            path_info = rdf_objects.PathInfo(
                path_type=path_type,
                components=components,
                stat_entry=stat_entry,
                hash_entry=hash_entry,
                timestamp=mysql_utils.TimestampToRDFDatetime(timestamp))

            path_infos[components].append(path_info)

        for components in components_list:
            path_infos[components].sort(
                key=lambda path_info: path_info.timestamp)

        return path_infos
Exemplo n.º 18
0
    def CreateClientObject(self, vfs_fixture):
        """Make a new client object."""

        # Constructing a client snapshot from the legacy fixture is hard, we are
        # using a serialized string instead.
        data_store.REL_DB.WriteClientMetadata(self.client_id,
                                              fleetspeak_enabled=False)

        snapshot = rdf_objects.ClientSnapshot.FromSerializedBytes(
            binascii.unhexlify(SERIALIZED_CLIENT))
        snapshot.client_id = self.client_id
        snapshot.knowledge_base.fqdn = "Host%s" % self.client_id
        # Client version number may affect flows behavior so it's important
        # to keep it current in order for flows tests to test the most
        # recent logic.
        snapshot.startup_info.client_info.client_version = config.CONFIG[
            "Source.version_numeric"]

        data_store.REL_DB.WriteClientSnapshot(snapshot)
        client_index.ClientIndex().AddClient(snapshot)

        for path, (typ, attributes) in vfs_fixture:
            path %= self.args

            path_info = None

            components = [
                component for component in path.split("/") if component
            ]
            if (len(components) > 1 and components[0] == "fs"
                    and components[1] in ["os", "tsk", "ntfs"]):
                path_info = rdf_objects.PathInfo()
                if components[1] == "os":
                    path_info.path_type = rdf_objects.PathInfo.PathType.OS
                elif components[1] == "ntfs":
                    path_info.path_type = rdf_objects.PathInfo.PathType.NTFS
                else:
                    path_info.path_type = rdf_objects.PathInfo.PathType.TSK
                path_info.components = components[2:]
                if typ == "File":
                    path_info.directory = False
                elif typ == "Directory":
                    path_info.directory = True
                else:
                    raise ValueError("Incorrect object type: %s" % typ)

            for attribute_name in attributes:
                if attribute_name not in ["stat", "content"]:
                    raise ValueError("Unknown attribute: " + attribute_name)

            stat = attributes.get("stat", None)
            if stat:
                stat_entry = rdf_client_fs.StatEntry.FromTextFormat(stat %
                                                                    self.args)
                if stat_entry.pathspec.pathtype != "UNSET":
                    path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry)

            content = attributes.get("content", None)
            if content:
                blob_id = rdf_objects.BlobID.FromBlobData(content)
                data_store.BLOBS.WriteBlobs({blob_id: content})
                blob_ref = rdf_objects.BlobReference(offset=0,
                                                     size=len(content),
                                                     blob_id=blob_id)
                hash_id = file_store.AddFileWithUnknownHash(
                    db.ClientPath.FromPathInfo(self.client_id, path_info),
                    [blob_ref])
                path_info.hash_entry.num_bytes = len(content)
                path_info.hash_entry.sha256 = hash_id.AsBytes()

            if path_info is not None:
                data_store.REL_DB.WritePathInfos(client_id=self.client_id,
                                                 path_infos=[path_info])