def _ResponseToClientsFullInfo(self, response): """Creates a ClientFullInfo object from a database response.""" c_full_info = None prev_cid = None for row in response: (cid, fs, crt, ping, clk, ip, foreman, first, last_client_ts, last_crash_ts, last_startup_ts, client_obj, client_startup_obj, last_startup_obj, label_owner, label_name) = row if cid != prev_cid: if c_full_info: yield db_utils.IntToClientID(prev_cid), c_full_info metadata = rdf_objects.ClientMetadata( certificate=crt, fleetspeak_enabled=fs, first_seen=mysql_utils.TimestampToRDFDatetime(first), ping=mysql_utils.TimestampToRDFDatetime(ping), clock=mysql_utils.TimestampToRDFDatetime(clk), ip=mysql_utils.StringToRDFProto(rdf_client_network.NetworkAddress, ip), last_foreman_time=mysql_utils.TimestampToRDFDatetime(foreman), startup_info_timestamp=mysql_utils.TimestampToRDFDatetime( last_startup_ts), last_crash_timestamp=mysql_utils.TimestampToRDFDatetime( last_crash_ts)) if client_obj is not None: l_snapshot = rdf_objects.ClientSnapshot.FromSerializedString( client_obj) l_snapshot.timestamp = mysql_utils.TimestampToRDFDatetime( last_client_ts) l_snapshot.startup_info = rdf_client.StartupInfo.FromSerializedString( client_startup_obj) l_snapshot.startup_info.timestamp = l_snapshot.timestamp else: l_snapshot = rdf_objects.ClientSnapshot( client_id=db_utils.IntToClientID(cid)) if last_startup_obj is not None: startup_info = rdf_client.StartupInfo.FromSerializedString( last_startup_obj) startup_info.timestamp = mysql_utils.TimestampToRDFDatetime( last_startup_ts) else: startup_info = None prev_cid = cid c_full_info = rdf_objects.ClientFullInfo( metadata=metadata, labels=[], last_snapshot=l_snapshot, last_startup_info=startup_info) if label_owner and label_name: c_full_info.labels.append( rdf_objects.ClientLabel(name=label_name, owner=label_owner)) if c_full_info: yield db_utils.IntToClientID(prev_cid), c_full_info
def MultiReadClientMetadata(self, client_ids, cursor=None): """Reads ClientMetadata records for a list of clients.""" ids = [db_utils.ClientIDToInt(client_id) for client_id in client_ids] query = ("SELECT client_id, fleetspeak_enabled, certificate, " "UNIX_TIMESTAMP(last_ping), " "UNIX_TIMESTAMP(last_clock), last_ip, " "UNIX_TIMESTAMP(last_foreman), UNIX_TIMESTAMP(first_seen), " "UNIX_TIMESTAMP(last_crash_timestamp), " "UNIX_TIMESTAMP(last_startup_timestamp) FROM " "clients WHERE client_id IN ({})").format(", ".join(["%s"] * len(ids))) ret = {} cursor.execute(query, ids) while True: row = cursor.fetchone() if not row: break cid, fs, crt, ping, clk, ip, foreman, first, lct, lst = row ret[db_utils.IntToClientID(cid)] = rdf_objects.ClientMetadata( certificate=crt, fleetspeak_enabled=fs, first_seen=mysql_utils.TimestampToRDFDatetime(first), ping=mysql_utils.TimestampToRDFDatetime(ping), clock=mysql_utils.TimestampToRDFDatetime(clk), ip=mysql_utils.StringToRDFProto( rdf_client_network.NetworkAddress, ip), last_foreman_time=mysql_utils.TimestampToRDFDatetime(foreman), startup_info_timestamp=mysql_utils.TimestampToRDFDatetime(lst), last_crash_timestamp=mysql_utils.TimestampToRDFDatetime(lct)) return ret
def MultiReadClientSnapshot(self, client_ids, cursor=None): """Reads the latest client snapshots for a list of clients.""" int_ids = [db_utils.ClientIDToInt(cid) for cid in client_ids] query = ( "SELECT h.client_id, h.client_snapshot, UNIX_TIMESTAMP(h.timestamp)," " s.startup_info " "FROM clients as c FORCE INDEX (PRIMARY), " "client_snapshot_history as h FORCE INDEX (PRIMARY), " "client_startup_history as s FORCE INDEX (PRIMARY) " "WHERE h.client_id = c.client_id " "AND s.client_id = c.client_id " "AND h.timestamp = c.last_snapshot_timestamp " "AND s.timestamp = c.last_startup_timestamp " "AND c.client_id IN ({})").format(", ".join(["%s"] * len(client_ids))) ret = {cid: None for cid in client_ids} cursor.execute(query, int_ids) while True: row = cursor.fetchone() if not row: break cid, snapshot, timestamp, startup_info = row client_obj = mysql_utils.StringToRDFProto( rdf_objects.ClientSnapshot, snapshot) client_obj.startup_info = mysql_utils.StringToRDFProto( rdf_client.StartupInfo, startup_info) client_obj.timestamp = mysql_utils.TimestampToRDFDatetime( timestamp) ret[db_utils.IntToClientID(cid)] = client_obj return ret
def ReadClientLastPings(self, min_last_ping=None, max_last_ping=None, fleetspeak_enabled=None, cursor=None): """Reads client ids for all clients in the database.""" query = "SELECT client_id, UNIX_TIMESTAMP(last_ping) FROM clients " query_values = [] where_filters = [] if min_last_ping is not None: where_filters.append("last_ping >= FROM_UNIXTIME(%s) ") query_values.append( mysql_utils.RDFDatetimeToTimestamp(min_last_ping)) if max_last_ping is not None: where_filters.append( "(last_ping IS NULL OR last_ping <= FROM_UNIXTIME(%s))") query_values.append( mysql_utils.RDFDatetimeToTimestamp(max_last_ping)) if fleetspeak_enabled is not None: if fleetspeak_enabled: where_filters.append("fleetspeak_enabled IS TRUE") else: where_filters.append( "(fleetspeak_enabled IS NULL OR fleetspeak_enabled IS FALSE)" ) if where_filters: query += "WHERE " + "AND ".join(where_filters) cursor.execute(query, query_values) last_pings = {} for int_client_id, last_ping in cursor.fetchall(): client_id = db_utils.IntToClientID(int_client_id) last_pings[client_id] = mysql_utils.TimestampToRDFDatetime( last_ping) return last_pings
def MultiReadClientLabels(self, client_ids, cursor=None): """Reads the user labels for a list of clients.""" int_ids = [db_utils.ClientIDToInt(cid) for cid in client_ids] query = ("SELECT client_id, owner_username, label " "FROM client_labels " "WHERE client_id IN ({})").format(", ".join(["%s"] * len(client_ids))) ret = {client_id: [] for client_id in client_ids} cursor.execute(query, int_ids) for client_id, owner, label in cursor.fetchall(): ret[db_utils.IntToClientID(client_id)].append( rdf_objects.ClientLabel(name=label, owner=owner)) for r in itervalues(ret): r.sort(key=lambda label: (label.owner, label.name)) return ret
def ReadClientLastPings(self, min_last_ping=None, max_last_ping=None, fleetspeak_enabled=None, batch_size=db.CLIENT_IDS_BATCH_SIZE): """Yields dicts of last-ping timestamps for clients in the DB.""" last_client_id = db_utils.IntToClientID(0) while True: last_client_id, last_pings = self._ReadClientLastPings( last_client_id, batch_size, min_last_ping=min_last_ping, max_last_ping=max_last_ping, fleetspeak_enabled=fleetspeak_enabled) if last_pings: yield last_pings if len(last_pings) < batch_size: break
def _ReadClientLastPings(self, last_client_id, count, min_last_ping=None, max_last_ping=None, fleetspeak_enabled=None, cursor=None): """Yields dicts of last-ping timestamps for clients in the DB.""" where_filters = ["client_id > %s"] query_values = [db_utils.ClientIDToInt(last_client_id)] if min_last_ping is not None: where_filters.append("last_ping >= FROM_UNIXTIME(%s) ") query_values.append( mysql_utils.RDFDatetimeToTimestamp(min_last_ping)) if max_last_ping is not None: where_filters.append( "(last_ping IS NULL OR last_ping <= FROM_UNIXTIME(%s))") query_values.append( mysql_utils.RDFDatetimeToTimestamp(max_last_ping)) if fleetspeak_enabled is not None: if fleetspeak_enabled: where_filters.append("fleetspeak_enabled IS TRUE") else: where_filters.append( "(fleetspeak_enabled IS NULL OR fleetspeak_enabled IS FALSE)" ) query = """ SELECT client_id, UNIX_TIMESTAMP(last_ping) FROM clients WHERE {} ORDER BY client_id LIMIT %s""".format(" AND ".join(where_filters)) cursor.execute(query, query_values + [count]) last_pings = {} last_client_id = None for int_client_id, last_ping in cursor.fetchall(): last_client_id = db_utils.IntToClientID(int_client_id) last_pings[last_client_id] = mysql_utils.TimestampToRDFDatetime( last_ping) return last_client_id, last_pings
def ReadHuntOutputPluginLogEntries(self, hunt_id, output_plugin_id, offset, count, with_type=None, cursor=None): """Reads hunt output plugin log entries.""" query = ("SELECT client_id, flow_id, log_entry_type, message, " "UNIX_TIMESTAMP(timestamp) " "FROM flow_output_plugin_log_entries " "FORCE INDEX (flow_output_plugin_log_entries_by_hunt) " "WHERE hunt_id = %s AND output_plugin_id = %s ") args = [ db_utils.HuntIDToInt(hunt_id), db_utils.OutputPluginIDToInt(output_plugin_id) ] if with_type is not None: query += "AND log_entry_type = %s " args.append(int(with_type)) query += "ORDER BY log_id ASC LIMIT %s OFFSET %s" args.append(count) args.append(offset) cursor.execute(query, args) ret = [] for (client_id_int, flow_id_int, log_entry_type, message, timestamp) in cursor.fetchall(): ret.append( rdf_flow_objects.FlowOutputPluginLogEntry( hunt_id=hunt_id, client_id=db_utils.IntToClientID(client_id_int), flow_id=db_utils.IntToFlowID(flow_id_int), output_plugin_id=output_plugin_id, log_entry_type=log_entry_type, message=message, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp))) return ret
def ListClientsForKeywords(self, keywords, start_time=None, cursor=None): """Lists the clients associated with keywords.""" keywords = set(keywords) hash_to_kw = {mysql_utils.Hash(kw): kw for kw in keywords} result = {kw: [] for kw in keywords} query = """ SELECT keyword_hash, client_id FROM client_keywords FORCE INDEX (client_index_by_keyword_hash) WHERE keyword_hash IN ({}) """.format(", ".join(["%s"] * len(result))) args = list(iterkeys(hash_to_kw)) if start_time: query += " AND timestamp >= FROM_UNIXTIME(%s)" args.append(mysql_utils.RDFDatetimeToTimestamp(start_time)) cursor.execute(query, args) for kw_hash, cid in cursor.fetchall(): result[hash_to_kw[kw_hash]].append(db_utils.IntToClientID(cid)) return result
def ReadHuntLogEntries(self, hunt_id, offset, count, with_substring=None, cursor=None): """Reads hunt log entries of a given hunt using given query options.""" hunt_id_int = db_utils.HuntIDToInt(hunt_id) query = ( "SELECT client_id, flow_id, message, UNIX_TIMESTAMP(timestamp) " "FROM flow_log_entries " "FORCE INDEX(flow_log_entries_by_hunt) " "WHERE hunt_id = %s AND flow_id = hunt_id ") args = [hunt_id_int] if with_substring is not None: query += "AND message LIKE %s " args.append("%" + db_utils.EscapeWildcards(with_substring) + "%") query += "ORDER BY timestamp ASC LIMIT %s OFFSET %s" args.append(count) args.append(offset) cursor.execute(query, args) flow_log_entries = [] for client_id_int, flow_id_int, message, timestamp in cursor.fetchall( ): flow_log_entries.append( rdf_flow_objects.FlowLogEntry( client_id=db_utils.IntToClientID(client_id_int), flow_id=db_utils.IntToFlowID(flow_id_int), hunt_id=hunt_id, message=message, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp))) return flow_log_entries
def ReadLatestPathInfosWithHashBlobReferences(self, client_paths, max_timestamp=None, cursor=None): """Returns PathInfos that have corresponding HashBlobReferences.""" path_infos = {client_path: None for client_path in client_paths} path_id_components = {} for client_path in client_paths: path_id_components[client_path.path_id] = client_path.components params = [] query = """ SELECT t.client_id, t.path_type, t.path_id, UNIX_TIMESTAMP(t.timestamp), s.stat_entry, h.hash_entry FROM (SELECT h.client_id, h.path_type, h.path_id, MAX(h.timestamp) AS timestamp FROM client_path_hash_entries AS h INNER JOIN hash_blob_references AS b ON b.hash_id = h.sha256 WHERE {conditions} GROUP BY client_id, path_type, path_id) AS t LEFT JOIN client_path_stat_entries AS s ON s.client_id = t.client_id AND s.path_type = t.path_type AND s.path_id = t.path_id AND s.timestamp = t.timestamp LEFT JOIN client_path_hash_entries AS h ON h.client_id = t.client_id AND h.path_type = t.path_type AND h.path_id = t.path_id AND h.timestamp = t.timestamp """ path_conditions = [] for client_path in client_paths: path_conditions.append(""" (client_id = %s AND path_type = %s AND path_id = %s) """) params.append(db_utils.ClientIDToInt(client_path.client_id)) params.append(int(client_path.path_type)) params.append(client_path.path_id.AsBytes()) conditions = " OR ".join(path_conditions) if max_timestamp is not None: conditions = "({}) AND UNIX_TIMESTAMP(timestamp) <= %s".format( conditions) params.append(mysql_utils.RDFDatetimeToTimestamp(max_timestamp)) cursor.execute(query.format(conditions=conditions), params) for row in cursor.fetchall(): # pyformat: disable (client_id, path_type, path_id_bytes, timestamp, stat_entry_bytes, hash_entry_bytes) = row # pyformat: enable path_id = rdf_objects.PathID.FromBytes(path_id_bytes) components = path_id_components[path_id] if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None hash_entry = rdf_crypto.Hash.FromSerializedString(hash_entry_bytes) client_path = db.ClientPath( client_id=db_utils.IntToClientID(client_id), path_type=path_type, components=path_id_components[path_id]) path_info = rdf_objects.PathInfo( path_type=path_type, components=components, stat_entry=stat_entry, hash_entry=hash_entry, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp)) path_infos[client_path] = path_info return path_infos
def ReadHuntClientResourcesStats(self, hunt_id, cursor=None): """Read/calculate hunt client resources stats.""" hunt_id_int = db_utils.HuntIDToInt(hunt_id) query = """ SELECT COUNT(*), SUM(user_cpu_time_used_micros), SUM((user_cpu_time_used_micros) * (user_cpu_time_used_micros)), SUM(system_cpu_time_used_micros), SUM((system_cpu_time_used_micros) * (system_cpu_time_used_micros)), SUM(network_bytes_sent), SUM(network_bytes_sent * network_bytes_sent), """ scaled_bins = [ int(1000000 * b) for b in rdf_stats.ClientResourcesStats.CPU_STATS_BINS ] query += self._BinsToQuery(scaled_bins, "(user_cpu_time_used_micros)") query += "," query += self._BinsToQuery(scaled_bins, "(system_cpu_time_used_micros)") query += "," query += self._BinsToQuery( rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS, "network_bytes_sent") query += " FROM flows " query += "FORCE INDEX(flows_by_hunt) " query += "WHERE parent_hunt_id = %s AND parent_flow_id IS NULL" cursor.execute(query, [hunt_id_int]) response = cursor.fetchone() (count, user_sum, user_sq_sum, system_sum, system_sq_sum, network_sum, network_sq_sum) = response[:7] stats = rdf_stats.ClientResourcesStats( user_cpu_stats=rdf_stats.RunningStats( num=count, sum=db_utils.MicrosToSeconds(int(user_sum or 0)), sum_sq=int(user_sq_sum or 0) / 1e12, ), system_cpu_stats=rdf_stats.RunningStats( num=count, sum=db_utils.MicrosToSeconds(int(system_sum or 0)), sum_sq=int(system_sq_sum or 0) / 1e12, ), network_bytes_sent_stats=rdf_stats.RunningStats( num=count, sum=float(network_sum or 0), sum_sq=float(network_sq_sum or 0), ), ) offset = 7 stats.user_cpu_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.CPU_STATS_BINS): stats.user_cpu_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS) stats.system_cpu_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.CPU_STATS_BINS): stats.system_cpu_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS) stats.network_bytes_sent_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS): stats.network_bytes_sent_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) query = """ SELECT client_id, flow_id, user_cpu_time_used_micros, system_cpu_time_used_micros, network_bytes_sent FROM flows FORCE INDEX(flows_by_hunt) WHERE parent_hunt_id = %s AND parent_flow_id IS NULL AND (user_cpu_time_used_micros > 0 OR system_cpu_time_used_micros > 0 OR network_bytes_sent > 0) ORDER BY (user_cpu_time_used_micros + system_cpu_time_used_micros) DESC LIMIT 10 """ cursor.execute(query, [hunt_id_int]) for cid, fid, ucpu, scpu, nbs in cursor.fetchall(): client_id = db_utils.IntToClientID(cid) flow_id = db_utils.IntToFlowID(fid) stats.worst_performers.append( rdf_client_stats.ClientResources( client_id=client_id, session_id=rdfvalue.RDFURN(client_id).Add(flow_id), cpu_usage=rdf_client_stats.CpuSeconds( user_cpu_time=db_utils.MicrosToSeconds(ucpu), system_cpu_time=db_utils.MicrosToSeconds(scpu), ), network_bytes_sent=nbs)) return stats
def ReadHuntResults(self, hunt_id, offset, count, with_tag=None, with_type=None, with_substring=None, with_timestamp=None, cursor=None): """Reads hunt results of a given hunt using given query options.""" hunt_id_int = db_utils.HuntIDToInt(hunt_id) query = ("SELECT client_id, flow_id, hunt_id, payload, type, " "UNIX_TIMESTAMP(timestamp), tag " "FROM flow_results " "FORCE INDEX(flow_results_hunt_id_flow_id_timestamp) " "WHERE hunt_id = %s ") args = [hunt_id_int] if with_tag: query += "AND tag = %s " args.append(with_tag) if with_type: query += "AND type = %s " args.append(with_type) if with_substring: query += "AND payload LIKE %s " args.append("%" + db_utils.EscapeWildcards(with_substring) + "%") if with_timestamp: query += "AND timestamp = FROM_UNIXTIME(%s) " args.append(mysql_utils.RDFDatetimeToTimestamp(with_timestamp)) query += "ORDER BY timestamp ASC LIMIT %s OFFSET %s" args.append(count) args.append(offset) cursor.execute(query, args) ret = [] for ( client_id_int, flow_id_int, hunt_id_int, serialized_payload, payload_type, timestamp, tag, ) in cursor.fetchall(): if payload_type in rdfvalue.RDFValue.classes: payload = rdfvalue.RDFValue.classes[ payload_type].FromSerializedBytes(serialized_payload) else: payload = rdf_objects.SerializedValueOfUnrecognizedType( type_name=payload_type, value=serialized_payload) result = rdf_flow_objects.FlowResult( client_id=db_utils.IntToClientID(client_id_int), flow_id=db_utils.IntToFlowID(flow_id_int), hunt_id=hunt_id, payload=payload, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp)) if tag is not None: result.tag = tag ret.append(result) return ret