def testDoesNotFetchDuplicates(self): process1 = rdf_client.Process(pid=2, ppid=1, cmdline=["test_img.dd"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=long(1333718907.167083 * 1e6)) process2 = rdf_client.Process(pid=3, ppid=1, cmdline=["test_img.dd", "--arg"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=long(1333718907.167083 * 1e6)) client_mock = action_mocks.ListProcessesMock([process1, process2]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, client_id=self.SetupClient(0), fetch_binaries=True, token=self.token) processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 1)
def testWhenFetchingIgnoresMissingFiles(self): process1 = rdf_client.Process(pid=2, ppid=1, cmdline=["test_img.dd"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=long(1333718907.167083 * 1e6)) process2 = rdf_client.Process(pid=2, ppid=1, cmdline=["file_that_does_not_exist"], exe=os.path.join( self.base_path, "file_that_does_not_exist"), ctime=long(1333718907.167083 * 1e6)) client_mock = action_mocks.ListProcessesMock([process1, process2]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, client_id=self.SetupClient(0), fetch_binaries=True, token=self.token, check_flow_errors=False) results = flow.GRRFlow.ResultCollectionForFID(session_id) binaries = list(results) self.assertEqual(len(binaries), 1) self.assertEqual(binaries[0].pathspec.path, process1.exe)
def testWhenFetchingFiltersOutProcessesWithoutExeAndConnectionState(self): client_id = self.SetupClient(0) p1 = rdf_client.Process(pid=2, ppid=1, cmdline=["test_img.dd"], ctime=long(1333718907.167083 * 1e6)) p2 = rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=long(1333718907.167083 * 1e6), connections=rdf_client.NetworkConnection( family="INET", state="ESTABLISHED")) client_mock = action_mocks.ListProcessesMock([p1, p2]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, fetch_binaries=True, client_id=client_id, connection_states=["LISTEN"], token=self.token) # No output matched. processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 0)
def testExportCommandIsNotShownForNonFileResults(self): values = [rdf_client.Process(pid=1), rdf_client.Process(pid=42423)] hunt_urn = self.CreateGenericHuntWithCollection(values=values) self.Open("/#/hunts/%s/results" % hunt_urn.Basename()) self.WaitUntil(self.IsElementPresent, "css=grr-hunt-results:contains('Value')") self.WaitUntilNot(self.IsTextPresent, "Show export command")
def testFindNoRunningLogserver(self): chk_id = "CIS-SERVICE-LOGSERVER-RUNNING" sym = "Missing attribute: Logging software is not running." context = "RAW" found = ["Expected state was not found"] host_data = self.GenHostData() # Try it without rsyslog. results = self.RunChecks(host_data) self.assertCheckDetectedAnom(chk_id, results, sym, found) # Now rsyslog is running. logs = rdf_client.Process(name="rsyslogd", pid=1236) host_data["ListProcessesGrr"][context].append(logs) results = self.RunChecks(host_data) self.assertCheckUndetected(chk_id, results) # Check with some problematic real-world data. host_data = self.GenHostData() # Reset the host_data. # Added a non-logger process. We expect to raise an anom. proc1 = rdf_client.Process(name="python", pid=10554, ppid=1, exe="/usr/bin/python", cmdline=[ "/usr/bin/python", "-E", "/usr/sbin/foo_agent", "/etc/foo/conf.d/rsyslogd.conf", "/etc/foo/foobar.conf" ]) host_data["ListProcessesGrr"][context].append(proc1) results = self.RunChecks(host_data) self.assertCheckDetectedAnom(chk_id, results, sym, found) # Now added a logging service proc. We expect no anom. this time. proc2 = rdf_client.Process(name="rsyslogd", pid=10200, ppid=1, exe="/sbin/rsyslogd", cmdline=[ "/sbin/rsyslogd", "-i", "/var/run/rsyslogd.pid", "-m", "0" ]) host_data["ListProcessesGrr"][context].append(proc2) results = self.RunChecks(host_data) self.assertCheckUndetected(chk_id, results) # Add yet another non-logger process. We should still raise no anom. proc3 = rdf_client.Process( name="foobar", pid=31337, ppid=1, exe="/usr/local/bin/foobar", cmdline=["/usr/local/bin/foobar", "--test", "args"]) host_data["ListProcessesGrr"][context].append(proc3) results = self.RunChecks(host_data) self.assertCheckUndetected(chk_id, results)
def testDoesNotShowGenerateArchiveButtonForNonExportableRDFValues(self): values = [rdf_client.Process(pid=1), rdf_client.Process(pid=42423)] self.CreateGenericHuntWithCollection(values=values) self.Open("/") self.Click("css=a[grrtarget=hunts]") self.Click("css=td:contains('GenericHunt')") self.Click("css=li[heading=Results]") self.WaitUntil(self.IsTextPresent, "42423") self.WaitUntilNot(self.IsTextPresent, "Files referenced in this collection can be downloaded")
def testDoesNotShowPerFileDownloadButtonForNonExportableRDFValues(self): values = [rdf_client.Process(pid=1), rdf_client.Process(pid=42423)] self.CreateGenericHuntWithCollection(values=values) self.Open("/") self.Click("css=a[grrtarget=hunts]") self.Click("css=td:contains('GenericHunt')") self.Click("css=li[heading=Results]") self.WaitUntil(self.IsTextPresent, "42423") self.WaitUntilNot( self.IsElementPresent, "css=grr-results-collection button:has(span.glyphicon-download)")
def testProcessListingWithFilter(self): """Test that the ListProcesses flow works with filter.""" client_id = self.SetupClient(0) client_mock = action_mocks.ListProcessesMock([ rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=long(1333718907.167083 * 1e6)), rdf_client.Process(pid=3, ppid=1, cmdline=["cmd2.exe"], exe="c:\\windows\\cmd2.exe", ctime=long(1333718907.167083 * 1e6)), rdf_client.Process(pid=4, ppid=1, cmdline=["missing_exe.exe"], ctime=long(1333718907.167083 * 1e6)), rdf_client.Process(pid=5, ppid=1, cmdline=["missing2_exe.exe"], ctime=long(1333718907.167083 * 1e6)) ]) flow_urn = flow.GRRFlow.StartFlow( client_id=client_id, flow_name=flow_processes.ListProcesses.__name__, filename_regex=r".*cmd2.exe", token=self.token) session_id = flow_test_lib.TestFlowHelper(flow_urn, client_mock, client_id=client_id, token=self.token) # Expect one result that matches regex processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 1) self.assertEqual(processes[0].ctime, 1333718907167083L) self.assertEqual(processes[0].cmdline, ["cmd2.exe"]) # Expect two skipped results logs = flow.GRRFlow.LogCollectionForFID(flow_urn) for log in logs: if "Skipped 2" in log.log_message: return raise RuntimeError("Skipped process not mentioned in logs")
def testProcessListingOnly(self): """Test that the ListProcesses flow works.""" client_id = self.SetupClient(0) client_mock = action_mocks.ListProcessesMock([ rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=long(1333718907.167083 * 1e6)) ]) flow_urn = flow.GRRFlow.StartFlow( client_id=client_id, flow_name=flow_processes.ListProcesses.__name__, token=self.token) session_id = flow_test_lib.TestFlowHelper(flow_urn, client_mock, client_id=client_id, token=self.token) # Check the output collection processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 1) self.assertEqual(processes[0].ctime, 1333718907167083L) self.assertEqual(processes[0].cmdline, ["cmd.exe"])
def testExportedFilenamesAndManifestForValuesOfMultipleTypes(self): zip_fd, prefix = self.ProcessValuesToZip({ rdf_client.StatEntry: [ rdf_client.StatEntry(pathspec=rdf_paths.PathSpec( path="/foo/bar", pathtype="OS")) ], rdf_client.Process: [rdf_client.Process(pid=42)] }) self.assertEqual( set(zip_fd.namelist()), { "%s/MANIFEST" % prefix, "%s/ExportedFile_from_StatEntry.sql" % prefix, "%s/ExportedProcess_from_Process.sql" % prefix }) parsed_manifest = yaml.load(zip_fd.read("%s/MANIFEST" % prefix)) self.assertEqual( parsed_manifest, { "export_stats": { "StatEntry": { "ExportedFile": 1 }, "Process": { "ExportedProcess": 1 } } })
def testListResultsForListProcessesFlow(self): process = rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=long(1333718907.167083 * 1e6), RSS_size=42) client_urn = self.SetupClient(0) client_mock = action_mocks.ListProcessesMock([process]) flow_urn = flow.StartFlow(client_id=client_urn, flow_name=processes.ListProcesses.__name__, token=self.token) flow_test_lib.TestFlowHelper(flow_urn, client_mock, client_id=client_urn, token=self.token) result_flow = self.api.Client(client_id=client_urn.Basename()).Flow( flow_urn.Basename()) results = list(result_flow.ListResults()) self.assertEqual(len(results), 1) self.assertEqual(process.AsPrimitiveProto(), results[0].payload)
def GenProcessData(self, processes, **kwargs): """Create some process-based host data.""" host_data = self.SetKnowledgeBase(**kwargs) data = [] for (name, pid, cmdline) in processes: data.append(rdf_client.Process(name=name, pid=pid, cmdline=cmdline)) # ListProcessesGrr is a flow artifact, thus it needs stored as raw. host_data["ListProcessesGrr"] = self.SetArtifactData(raw=data) return host_data
def ParseProcess(self, item): cybox = item.get("_EPROCESS", {}).get("Cybox", {}) result = rdf_client.Process( exe=cybox.get("Name"), pid=cybox.get("PID"), ppid=cybox.get("Parent_PID"), num_threads=item.get("thread_count"), ctime=item.get("process_create_time", {}).get("epoch")) return result
def GenHostData(self): # Create some host_data.. host_data = self.SetKnowledgeBase() loop4 = self.AddListener("127.0.0.1", 6000) loop6 = self.AddListener("::1", 6000, "INET6") ext4 = self.AddListener("10.1.1.1", 6000) ext6 = self.AddListener("fc00::1", 6000, "INET6") x11 = rdf_client.Process(name="x11", pid=1233, connections=[loop4, loop6]) xorg = rdf_client.Process(name="xorg", pid=1234, connections=[loop4, loop6, ext4, ext6]) sshd = rdf_client.Process(name="sshd", pid=1235, connections=[loop4, loop6, ext4, ext6]) # Note: ListProcessesGrr is a flow artifact, hence it needs to be of # raw context. host_data["ListProcessesGrr"] = self.SetArtifactData( raw=[x11, xorg, sshd]) return host_data
def testProcessListingFilterConnectionState(self): client_id = self.SetupClient(0) p1 = rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=long(1333718907.167083 * 1e6), connections=rdf_client.NetworkConnection( family="INET", state="CLOSED")) p2 = rdf_client.Process(pid=3, ppid=1, cmdline=["cmd2.exe"], exe="c:\\windows\\cmd2.exe", ctime=long(1333718907.167083 * 1e6), connections=rdf_client.NetworkConnection( family="INET", state="LISTEN")) p3 = rdf_client.Process(pid=4, ppid=1, cmdline=["missing_exe.exe"], ctime=long(1333718907.167083 * 1e6), connections=rdf_client.NetworkConnection( family="INET", state="ESTABLISHED")) client_mock = action_mocks.ListProcessesMock([p1, p2, p3]) flow_urn = flow.GRRFlow.StartFlow( client_id=client_id, flow_name=flow_processes.ListProcesses.__name__, connection_states=["ESTABLISHED", "LISTEN"], token=self.token) session_id = flow_test_lib.TestFlowHelper(flow_urn, client_mock, client_id=client_id, token=self.token) processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 2) states = set() for process in processes: states.add(str(process.connections[0].state)) self.assertItemsEqual(states, ["ESTABLISHED", "LISTEN"])
def testEmailPluginSendsEmailPerEveyBatchOfResponses(self): self.ProcessResponses(plugin_args=email_plugin.EmailOutputPluginArgs( email_address=self.email_address), responses=[rdf_client.Process(pid=42)]) self.assertEqual(len(self.email_messages), 1) msg = self.email_messages[0] self.assertEqual(msg["address"], self.email_address) self.assertTrue("got a new result in %s" % self.results_urn in msg["title"]) self.assertTrue(utils.SmartStr(self.client_id) in msg["message"]) self.assertTrue(utils.SmartStr(self.hostname) in msg["message"])
def testProcessListingOnlyFleetspeak(self): """Test that the ListProcesses flow works with Fleetspeak.""" client_mock = action_mocks.ListProcessesMock([ rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe=r"c:\windows\cmd.exe", ctime=1333718907167083L) ]) client_mock.mock_task_queue = [] def SendCallback(fs_msg): pb_msg = jobs_pb2.GrrMessage() fs_msg.data.Unpack(pb_msg) msg = rdf_flows.GrrMessage.FromSerializedString( pb_msg.SerializeToString()) client_mock.mock_task_queue.append(msg) service_name = "GRR" fake_service_client = _FakeGRPCServiceClient( service_name, send_callback=SendCallback) fleetspeak_connector.Reset() fleetspeak_connector.Init(service_client=fake_service_client) with mock.patch.object( fake_service_client.outgoing, "InsertMessage", wraps=fake_service_client.outgoing.InsertMessage): flow_urn = flow.GRRFlow.StartFlow( client_id=self.client_id, flow_name=flow_processes.ListProcesses.__name__, token=self.token) session_id = flow_test_lib.TestFlowHelper(flow_urn, client_mock, client_id=self.client_id, token=self.token) fleetspeak_connector.CONN.outgoing.InsertMessage.assert_called() # Check the output collection processes = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(processes), 1) process, = processes self.assertEqual(process.ctime, 1333718907167083L) self.assertEqual(process.cmdline, ["cmd.exe"])
def ProcessIterator(pids, process_regex_string, ignore_grr_process, error_list): """Yields all (psutil-) processes that match certain criteria. Args: pids: A list of pids. If given, only the processes with those pids are returned. process_regex_string: If given, only processes whose name matches the regex are returned. ignore_grr_process: If True, the grr process itself will not be returned. error_list: All errors while handling processes are appended to this list. Type is repeated YaraProcessError. Yields: psutils.Process objects matching all criteria. """ pids = set(pids) if ignore_grr_process: grr_pid = psutil.Process().pid else: grr_pid = -1 if process_regex_string: process_regex = re.compile(process_regex_string) else: process_regex = None if pids: process_iterator = [] for pid in pids: try: process_iterator.append(psutil.Process(pid=pid)) except Exception as e: # pylint: disable=broad-except error_list.Append( rdf_yara.YaraProcessError( process=rdf_client.Process(pid=pid), error=str(e))) else: process_iterator = psutil.process_iter() for p in process_iterator: if process_regex and not process_regex.search(p.name()): continue if p.pid == grr_pid: continue yield p
def testYamlPluginWithValuesOfMultipleTypes(self): zip_fd, prefix = self.ProcessValuesToZip({ rdf_client.StatEntry: [ rdf_client.StatEntry(pathspec=rdf_paths.PathSpec( path="/foo/bar", pathtype="OS")) ], rdf_client.Process: [rdf_client.Process(pid=42)] }) self.assertEqual( set(zip_fd.namelist()), { "%s/MANIFEST" % prefix, "%s/ExportedFile/from_StatEntry.yaml" % prefix, "%s/ExportedProcess/from_Process.yaml" % prefix }) parsed_manifest = yaml.load(zip_fd.read("%s/MANIFEST" % prefix)) self.assertEqual( parsed_manifest, { "export_stats": { "StatEntry": { "ExportedFile": 1 }, "Process": { "ExportedProcess": 1 } } }) parsed_output = yaml.load( zip_fd.read("%s/ExportedFile/from_StatEntry.yaml" % prefix)) self.assertEqual(len(parsed_output), 1) # Only the client_urn is filled in by the plugin. Doing lookups for # all the clients metadata is possible but expensive. It doesn't seem to # be worth it. self.assertEqual(parsed_output[0]["metadata"]["client_urn"], str(self.client_id)) self.assertEqual(parsed_output[0]["metadata"]["source_urn"], str(self.results_urn)) self.assertEqual(parsed_output[0]["urn"], self.client_id.Add("/fs/os/foo/bar")) parsed_output = yaml.load( zip_fd.read("%s/ExportedProcess/from_Process.yaml" % prefix)) self.assertEqual(len(parsed_output), 1) self.assertEqual(parsed_output[0]["pid"], "42")
def testBigQueryPluginWithValuesOfMultipleTypes(self): output = self.ProcessResponses( plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(), responses=[ rdf_client.StatEntry(pathspec=rdf_paths.PathSpec( path="/中国新闻网新闻中", pathtype="OS")), rdf_client.Process(pid=42) ], process_responses_separately=True) # Should have two separate output streams for the two types self.assertEqual(len(output), 2) for name, stream, _, job_id in output: self.assertTrue(job_id in [ "C-1000000000000000_Results_ExportedFile_1445995873", "C-1000000000000000_Results_ExportedProcess_1445995873" ]) self._parseOutput(name, stream)
def testEmailPluginStopsSendingEmailsAfterLimitIsReached(self): responses = [rdf_client.Process(pid=i) for i in range(11)] self.ProcessResponses(plugin_args=email_plugin.EmailOutputPluginArgs( email_address=self.email_address, emails_limit=10), responses=responses, process_responses_separately=True) self.assertEqual(len(self.email_messages), 10) for msg in self.email_messages: self.assertEqual(msg["address"], self.email_address) self.assertTrue("got a new result in %s" % self.results_urn in msg["title"]) self.assertTrue(utils.SmartStr(self.client_id) in msg["message"]) self.assertTrue(utils.SmartStr(self.hostname) in msg["message"]) for msg in self.email_messages[:10]: self.assertFalse("sending of emails will be disabled now" in msg) self.assertTrue("sending of emails will be disabled now" in self.email_messages[9]["message"])
def testExportedRowsForValuesOfMultipleTypes(self): zip_fd, prefix = self.ProcessValuesToZip({ rdf_client.StatEntry: [ rdf_client.StatEntry(pathspec=rdf_paths.PathSpec( path="/foo/bar", pathtype="OS")) ], rdf_client.Process: [rdf_client.Process(pid=42)] }) with self.db_connection: stat_entry_script = zip_fd.read( "%s/ExportedFile_from_StatEntry.sql" % prefix) process_script = zip_fd.read( "%s/ExportedProcess_from_Process.sql" % prefix) self.db_cursor.executescript(stat_entry_script) self.db_cursor.executescript(process_script) self.db_cursor.execute( "SELECT \"metadata.client_urn\", \"metadata.source_urn\", urn " "FROM \"ExportedFile.from_StatEntry\";") stat_entry_results = self.db_cursor.fetchall() self.assertEqual(len(stat_entry_results), 1) # Client URN self.assertEqual(stat_entry_results[0][0], str(self.client_id)) # Source URN self.assertEqual(stat_entry_results[0][1], str(self.results_urn)) # URN self.assertEqual(stat_entry_results[0][2], self.client_id.Add("/fs/os/foo/bar")) self.db_cursor.execute( "SELECT \"metadata.client_urn\", \"metadata.source_urn\", pid " "FROM \"ExportedProcess.from_Process\";") process_results = self.db_cursor.fetchall() self.assertEqual(len(process_results), 1) # Client URN self.assertEqual(process_results[0][0], str(self.client_id)) # Source URN self.assertEqual(process_results[0][1], str(self.results_urn)) # PID self.assertEqual(process_results[0][2], 42)
def testFetchesAndStoresBinary(self): process = rdf_client.Process(pid=2, ppid=1, cmdline=["test_img.dd"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=long(1333718907.167083 * 1e6)) client_mock = action_mocks.ListProcessesMock([process]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, client_id=self.SetupClient(0), fetch_binaries=True, token=self.token) results = flow.GRRFlow.ResultCollectionForFID(session_id) binaries = list(results) self.assertEqual(len(binaries), 1) self.assertEqual(binaries[0].pathspec.path, process.exe) self.assertEqual(binaries[0].st_size, os.stat(process.exe).st_size)
def testBigQueryPluginFallbackToAFF4(self): plugin_args = bigquery_plugin.BigQueryOutputPluginArgs() responses = [ rdf_client.StatEntry( pathspec=rdf_paths.PathSpec(path="/中国新闻网新闻中", pathtype="OS")), rdf_client.Process(pid=42), rdf_client.Process(pid=43), rdf_client.SoftwarePackage(name="test.deb") ] plugin = bigquery_plugin.BigQueryOutputPlugin( source_urn=self.results_urn, output_base_urn=self.base_urn, args=plugin_args, token=self.token) plugin.InitializeState() messages = [] for response in responses: messages.append( rdf_flows.GrrMessage(source=self.client_id, payload=response)) with test_lib.FakeTime(1445995873): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider( {"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We have 3 output types but a limit of 2 upload failures, so we # shouldn't try the third one. self.assertEqual( mock_bigquery.return_value.InsertData.call_count, 2) # We should have written a data file and a schema file for each type. for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: schema_fd = aff4.FACTORY.Open(self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.schema" % output_name), token=self.token) data_fd = aff4.FACTORY.Open(self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) if output_name == "ExportedFile": self.CompareSchemaToKnownGood(json.load(schema_fd)) self.assertEqual( json.load(actual_fd)["urn"], self.client_id.Add("/fs/os/中国新闻网新闻中")) elif output_name == "ExportedProcess": self.assertEqual(json.load(schema_fd)[1]["name"], "pid") expected_pids = ["42", "43"] for i, line in enumerate(actual_fd): self.assertEqual(json.loads(line)["pid"], expected_pids[i]) else: self.assertEqual(json.load(schema_fd)[1]["name"], "name") self.assertEqual(json.load(actual_fd)["name"], "test.deb") # Process the same messages to make sure we're re-using the filehandles. with test_lib.FakeTime(1445995878): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider( {"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We shouldn't call insertdata at all because we have passed max # failures already self.assertEqual( mock_bigquery.return_value.InsertData.call_count, 0) expected_line_counts = { "ExportedFile": 2, "ExportedProcess": 4, "AutoExportedSoftwarePackage": 2 } for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: data_fd = aff4.FACTORY.Open(self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) self.assertEqual(sum(1 for line in actual_fd), expected_line_counts[output_name])
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): """Parse the ps output. Note that cmdline consumes every field up to the end of line and as it is string, we can't perfectly see what the arguments on the command line really were. We just assume a space is the arg seperator. It's imperfect, but it's better than nothing. Obviously, if cmd/cmdline is specified, it must be the last column of output. Args: cmd: A string containing the base command that was run. args: A list of strings containing the commandline args for the command. stdout: A string containing the stdout of the command run. stderr: A string containing the stderr of the command run. (Unused) return_val: The return code following command execution. time_taken: The time taken to run the process. (Unused) knowledge_base: An RDF KnowledgeBase. (Unused) Yields: RDF Process objects. """ _ = stderr, time_taken, knowledge_base # Unused. self.CheckReturn(cmd, return_val) if not stdout: # We have nothing to process so bug out. (Handles a input of None.) return rdf_convert_table = { "pid": ("pid", int), "tgid": ("pid", int), "ppid": ("ppid", int), "comm": ("name", str), "ucomm": ("name", str), "ruid": ("real_uid", int), "uid": ("effective_uid", int), "euid": ("effective_uid", int), "suid": ("saved_uid", int), "svuid": ("saved_uid", int), "user": ("username", str), "euser": ("username", str), "uname": ("username", str), "rgid": ("real_gid", int), "gid": ("effective_gid", int), "egid": ("effective_gid", int), "sgid": ("saved_gid", int), "svgid": ("saved_gid", int), "tty": ("terminal", str), "tt": ("terminal", str), "tname": ("terminal", str), "stat": ("status", str), "nice": ("nice", int), "ni": ("nice", int), "thcount": ("num_threads", int), "nlwp": ("num_threads", int), "pcpu": ("cpu_percent", float), "%cpu": ("cpu_percent", float), "c": ("cpu_percent", float), "rss": ("RSS_size", long), "rssize": ("RSS_size", long), "rsz": ("RSS_size", long), "vsz": ("VMS_size", long), "vsize": ("VMS_size", long), "pmem": ("memory_percent", float), "%mem": ("memory_percent", float), "args": ("cmdline", self._SplitCmd), "command": ("cmdline", self._SplitCmd), "cmd": ("cmdline", self._SplitCmd) } expected_fields = self._FindPsOutputFormat(cmd, args) # If we made it here, we expect we can now parse the output and we know # expected its format. lines = stdout.splitlines() if self._HasHeaders(args): # Ignore the headers. lines = lines[1:] for line in lines: try: # The "len() - 1" allows us to group any extra fields into # the last field. e.g. cmdline. entries = line.split(None, len(expected_fields) - 1) # Create an empty process for us to fill in as best we can. process = rdf_client.Process() for name, value in zip(expected_fields, entries): if name not in rdf_convert_table: # If the field is not something we can process, skip it. continue rdf_name, method = rdf_convert_table[name] setattr(process, rdf_name, method(value)) # Approximate the 'comm' from the cmdline if it wasn't detailed. # i.e. the basename of the first arg of the commandline. if not process.name and process.cmdline: process.name = os.path.basename(process.cmdline[0]) yield process except ValueError: logging.warn("Unparsable line found for %s %s:\n" " %s", cmd, args, line)