def ListDescendentPathInfos(self, client_id, path_type, components, timestamp=None, max_depth=None, cursor=None): """Lists path info records that correspond to descendants of given path.""" path_infos = [] query = "" path = mysql_utils.ComponentsToPath(components) values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path": db_utils.EscapeWildcards(path), } query += """ SELECT path, directory, UNIX_TIMESTAMP(p.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths AS p """ if timestamp is None: query += """ LEFT JOIN client_path_stat_entries AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id AND p.last_stat_entry_timestamp = s.timestamp) LEFT JOIN client_path_hash_entries AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id AND p.last_hash_entry_timestamp = h.timestamp) """ only_explicit = False else: query += """ LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry FROM client_path_stat_entries AS sr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_stat_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS st ON sr.client_id = st.client_id AND sr.path_type = st.path_type AND sr.path_id = st.path_id AND sr.timestamp = st.max_timestamp) AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id) LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry FROM client_path_hash_entries AS hr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_hash_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS ht ON hr.client_id = ht.client_id AND hr.path_type = ht.path_type AND hr.path_id = ht.path_id AND hr.timestamp = ht.max_timestamp) AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id) """ values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp) only_explicit = True query += """ WHERE p.client_id = %(client_id)s AND p.path_type = %(path_type)s AND path LIKE concat(%(path)s, '/%%') """ if max_depth is not None: query += """ AND depth <= %(depth)s """ values["depth"] = len(components) + max_depth cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos.append(path_info) path_infos.sort(key=lambda _: tuple(_.components)) # For specific timestamp, we return information only about explicit paths # (paths that have associated stat or hash entry or have an ancestor that is # explicit). if not only_explicit: return path_infos explicit_path_infos = [] has_explicit_ancestor = set() # This list is sorted according to the keys component, so by traversing it # in the reverse order we make sure that we process deeper paths first. for path_info in reversed(path_infos): components = tuple(path_info.components) if (path_info.HasField("stat_entry") or path_info.HasField("hash_entry") or components in has_explicit_ancestor): explicit_path_infos.append(path_info) has_explicit_ancestor.add(components[:-1]) # Since we collected explicit paths in reverse order, we need to reverse it # again to conform to the interface. return list(reversed(explicit_path_infos))
def _MultiWritePathInfos(self, path_infos, cursor=None): """Writes a collection of path info records for specified clients.""" path_info_count = 0 path_info_values = [] parent_path_info_count = 0 parent_path_info_values = [] has_stat_entries = False has_hash_entries = False for client_id, client_path_infos in iteritems(path_infos): for path_info in client_path_infos: path = mysql_utils.ComponentsToPath(path_info.components) path_info_values.append(db_utils.ClientIDToInt(client_id)) path_info_values.append(int(path_info.path_type)) path_info_values.append(path_info.GetPathID().AsBytes()) path_info_values.append(path) path_info_values.append(bool(path_info.directory)) path_info_values.append(len(path_info.components)) if path_info.HasField("stat_entry"): path_info_values.append( path_info.stat_entry.SerializeToString()) has_stat_entries = True else: path_info_values.append(None) if path_info.HasField("hash_entry"): path_info_values.append( path_info.hash_entry.SerializeToString()) path_info_values.append( path_info.hash_entry.sha256.AsBytes()) has_hash_entries = True else: path_info_values.append(None) path_info_values.append(None) path_info_count += 1 # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append( db_utils.ClientIDToInt(client_id)) parent_path_info_values.append( int(parent_path_info.path_type)) parent_path_info_values.append( parent_path_info.GetPathID().AsBytes()) parent_path_info_values.append(path) parent_path_info_values.append( len(parent_path_info.components)) parent_path_info_count += 1 with mysql_utils.TemporaryTable( cursor=cursor, name="client_path_infos", columns=[ ("client_id", "BIGINT UNSIGNED NOT NULL"), ("path_type", "INT UNSIGNED NOT NULL"), ("path_id", "BINARY(32) NOT NULL"), ("path", "TEXT NOT NULL"), ("directory", "BOOLEAN NOT NULL"), ("depth", "INT NOT NULL"), ("stat_entry", "MEDIUMBLOB NULL"), ("hash_entry", "MEDIUMBLOB NULL"), ("sha256", "BINARY(32) NULL"), ("timestamp", "TIMESTAMP(6) NOT NULL DEFAULT now(6)"), ]): if path_info_count > 0: query = """ INSERT INTO client_path_infos(client_id, path_type, path_id, path, directory, depth, stat_entry, hash_entry, sha256) VALUES {} """.format(mysql_utils.Placeholders(num=9, values=path_info_count)) cursor.execute(query, path_info_values) cursor.execute(""" INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) SELECT client_id, path_type, path_id, path, directory, depth FROM client_path_infos ON DUPLICATE KEY UPDATE client_paths.directory = (client_paths.directory OR VALUES(client_paths.directory)), client_paths.timestamp = now(6) """) if parent_path_info_count > 0: placeholders = ["(%s, %s, %s, %s, TRUE, %s)" ] * parent_path_info_count cursor.execute( """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES {} ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = now() """.format(", ".join(placeholders)), parent_path_info_values) if has_stat_entries: cursor.execute(""" INSERT INTO client_path_stat_entries(client_id, path_type, path_id, stat_entry, timestamp) SELECT client_id, path_type, path_id, stat_entry, timestamp FROM client_path_infos WHERE stat_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_stat_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.stat_entry IS NOT NULL """) if has_hash_entries: cursor.execute(""" INSERT INTO client_path_hash_entries(client_id, path_type, path_id, hash_entry, sha256, timestamp) SELECT client_id, path_type, path_id, hash_entry, sha256, timestamp FROM client_path_infos WHERE hash_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_hash_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.hash_entry IS NOT NULL """)
def _MultiWritePathInfos(self, path_infos, connection=None): """Writes a collection of path info records for specified clients.""" path_info_count = 0 path_info_values = [] parent_path_info_count = 0 parent_path_info_values = [] has_stat_entries = False has_hash_entries = False for client_id, client_path_infos in iteritems(path_infos): for path_info in client_path_infos: path = mysql_utils.ComponentsToPath(path_info.components) path_info_values.append(db_utils.ClientIDToInt(client_id)) path_info_values.append(int(path_info.path_type)) path_info_values.append(path_info.GetPathID().AsBytes()) path_info_values.append(path) path_info_values.append(bool(path_info.directory)) path_info_values.append(len(path_info.components)) if path_info.HasField("stat_entry"): path_info_values.append( path_info.stat_entry.SerializeToString()) has_stat_entries = True else: path_info_values.append(None) if path_info.HasField("hash_entry"): path_info_values.append( path_info.hash_entry.SerializeToString()) path_info_values.append( path_info.hash_entry.sha256.AsBytes()) has_hash_entries = True else: path_info_values.append(None) path_info_values.append(None) path_info_count += 1 # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append( db_utils.ClientIDToInt(client_id)) parent_path_info_values.append( int(parent_path_info.path_type)) parent_path_info_values.append( parent_path_info.GetPathID().AsBytes()) parent_path_info_values.append(path) parent_path_info_values.append( len(parent_path_info.components)) parent_path_info_count += 1 try: with contextlib.closing(connection.cursor()) as cursor: cursor.execute(""" CREATE TEMPORARY TABLE client_path_infos( client_id BIGINT UNSIGNED NOT NULL, path_type INT UNSIGNED NOT NULL, path_id BINARY(32) NOT NULL, path TEXT NOT NULL, directory BOOLEAN NOT NULL, depth INT NOT NULL, stat_entry MEDIUMBLOB NULL, hash_entry MEDIUMBLOB NULL, sha256 BINARY(32) NULL, timestamp TIMESTAMP(6) NOT NULL DEFAULT now(6) )""") if path_info_count > 0: cursor.execute( """ INSERT INTO client_path_infos(client_id, path_type, path_id, path, directory, depth, stat_entry, hash_entry, sha256) VALUES {} """.format(mysql_utils.Placeholders(num=9, values=path_info_count)), path_info_values) cursor.execute(""" INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) SELECT client_id, path_type, path_id, path, directory, depth FROM client_path_infos ON DUPLICATE KEY UPDATE client_paths.directory = client_paths.directory OR VALUES(client_paths.directory), client_paths.timestamp = now(6) """) if parent_path_info_count > 0: placeholders = ["(%s, %s, %s, %s, TRUE, %s)" ] * parent_path_info_count cursor.execute( """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES {} ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = now() """.format(", ".join(placeholders)), parent_path_info_values) if has_stat_entries: cursor.execute(""" INSERT INTO client_path_stat_entries(client_id, path_type, path_id, stat_entry, timestamp) SELECT client_id, path_type, path_id, stat_entry, timestamp FROM client_path_infos WHERE stat_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_stat_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.stat_entry IS NOT NULL """) if has_hash_entries: cursor.execute(""" INSERT INTO client_path_hash_entries(client_id, path_type, path_id, hash_entry, sha256, timestamp) SELECT client_id, path_type, path_id, hash_entry, sha256, timestamp FROM client_path_infos WHERE hash_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_hash_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.hash_entry IS NOT NULL """) finally: # Drop the temporary table in a separate cursor. This ensures that # even if the previous cursor.execute fails mid-way leaving the # temporary table created (as table creation can't be rolled back), the # table would still be correctly dropped. # # This is important since connections are reused in the MySQL connection # pool. with contextlib.closing(connection.cursor()) as cursor: cursor.execute( "DROP TEMPORARY TABLE IF EXISTS client_path_infos")
def _MultiWritePathInfos(self, path_infos, cursor=None): """Writes a collection of path info records for specified clients.""" now = rdfvalue.RDFDatetime.Now() path_info_values = [] parent_path_info_values = [] stat_entry_keys = [] stat_entry_values = [] hash_entry_keys = [] hash_entry_values = [] for client_id, client_path_infos in iteritems(path_infos): for path_info in client_path_infos: path = mysql_utils.ComponentsToPath(path_info.components) key = ( db_utils.ClientIDToInt(client_id), int(path_info.path_type), path_info.GetPathID().AsBytes(), ) path_info_values.append(key + ( mysql_utils.RDFDatetimeToTimestamp(now), path, bool(path_info.directory), len(path_info.components))) if path_info.HasField("stat_entry"): stat_entry_keys.extend(key) stat_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.stat_entry.SerializeToBytes())) if path_info.HasField("hash_entry"): hash_entry_keys.extend(key) hash_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.hash_entry.SerializeToBytes(), path_info.hash_entry.sha256.AsBytes())) # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append(( db_utils.ClientIDToInt(client_id), int(parent_path_info.path_type), parent_path_info.GetPathID().AsBytes(), path, len(parent_path_info.components), )) if path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, timestamp, path, directory, depth) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s, %s) ON DUPLICATE KEY UPDATE timestamp = VALUES(timestamp), directory = directory OR VALUES(directory) """ cursor.executemany(query, path_info_values) if parent_path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES (%s, %s, %s, %s, TRUE, %s) ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = NOW(6) """ cursor.executemany(query, parent_path_info_values) if stat_entry_values: query = """ INSERT INTO client_path_stat_entries(client_id, path_type, path_id, timestamp, stat_entry) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s) """ cursor.executemany(query, stat_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths SET last_stat_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(stat_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + stat_entry_keys cursor.execute(query, params) if hash_entry_values: query = """ INSERT INTO client_path_hash_entries(client_id, path_type, path_id, timestamp, hash_entry, sha256) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s) """ cursor.executemany(query, hash_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths SET last_hash_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(hash_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + hash_entry_keys cursor.execute(query, params)
def WritePathInfos( self, client_id: str, path_infos: Sequence[rdf_objects.PathInfo], cursor: Optional[MySQLdb.cursors.Cursor] = None, ) -> None: """Writes a collection of path_info records for a client.""" now = rdfvalue.RDFDatetime.Now() int_client_id = db_utils.ClientIDToInt(client_id) path_info_values = [] parent_path_info_values = [] stat_entry_keys = [] stat_entry_values = [] hash_entry_keys = [] hash_entry_values = [] for path_info in path_infos: path = mysql_utils.ComponentsToPath(path_info.components) key = ( int_client_id, int(path_info.path_type), path_info.GetPathID().AsBytes(), ) path_info_values.append(key + (mysql_utils.RDFDatetimeToTimestamp(now), path, bool(path_info.directory), len(path_info.components))) if path_info.HasField("stat_entry"): stat_entry_keys.extend(key) stat_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.stat_entry.SerializeToBytes())) if path_info.HasField("hash_entry"): hash_entry_keys.extend(key) hash_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.hash_entry.SerializeToBytes(), path_info.hash_entry.sha256.AsBytes())) # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append(( int_client_id, int(parent_path_info.path_type), parent_path_info.GetPathID().AsBytes(), path, len(parent_path_info.components), )) if path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, timestamp, path, directory, depth) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s, %s) ON DUPLICATE KEY UPDATE timestamp = VALUES(timestamp), directory = directory OR VALUES(directory) """ try: cursor.executemany(query, path_info_values) except MySQLdb.IntegrityError as error: raise db.UnknownClientError(client_id=client_id, cause=error) if parent_path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES (%s, %s, %s, %s, TRUE, %s) ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = NOW(6) """ cursor.executemany(query, parent_path_info_values) if stat_entry_values: query = """ INSERT INTO client_path_stat_entries(client_id, path_type, path_id, timestamp, stat_entry) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s) """ cursor.executemany(query, stat_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths FORCE INDEX (PRIMARY) SET last_stat_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(stat_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + stat_entry_keys cursor.execute(query, params) if hash_entry_values: query = """ INSERT INTO client_path_hash_entries(client_id, path_type, path_id, timestamp, hash_entry, sha256) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s) """ cursor.executemany(query, hash_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths FORCE INDEX (PRIMARY) SET last_hash_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(hash_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + hash_entry_keys cursor.execute(query, params)