Пример #1
0
    def testBigQueryPluginWithValuesOfSameType(self):
        responses = []
        for i in range(10):
            responses.append(
                rdf_client.StatEntry(
                    aff4path=self.client_id.Add("/fs/os/foo/bar").Add(str(i)),
                    pathspec=rdf_paths.PathSpec(path="/foo/bar"),
                    st_mode=33184,  # octal = 100640 => u=rw,g=r,o= => -rw-r-----
                    st_ino=1063090,
                    st_dev=64512L,
                    st_nlink=1 + i,
                    st_uid=139592,
                    st_gid=5000,
                    st_size=0,
                    st_atime=1336469177,
                    st_mtime=1336129892,
                    st_ctime=1336129892))

        output = self.ProcessResponses(
            plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(),
            responses=responses)

        self.assertEqual(len(output), 1)
        _, stream, schema, job_id = output[0]

        self.assertEqual(job_id,
                         "C-1000000000000000_Results_ExportedFile_1445995873")

        self.CompareSchemaToKnownGood(schema)

        actual_fd = gzip.GzipFile(
            None, "r",
            bigquery_plugin.BigQueryOutputPlugin.GZIP_COMPRESSION_LEVEL,
            stream)

        # Compare to our stored data.
        expected_fd = open(
            os.path.join(config_lib.CONFIG["Test.data_dir"], "bigquery",
                         "ExportedFile.json"))

        # Bigquery expects a newline separarted list of JSON dicts, but this isn't
        # valid JSON so we can't just load the whole thing and compare.
        counter = 0
        for actual, expected in zip(actual_fd, expected_fd):
            self.assertEqual(json.loads(actual), json.loads(expected))
            counter += 1

        self.assertEqual(counter, 10)
Пример #2
0
  def testBigQueryPluginWithEarlyFlush(self):
    responses = []
    for i in range(10):
      responses.append(
          rdf_client.StatEntry(
              pathspec=rdf_paths.PathSpec(
                  path="/foo/bar/%d" % i, pathtype="OS"),
              st_mode=33184,  # octal = 100640 => u=rw,g=r,o= => -rw-r-----
              st_ino=1063090,
              st_dev=64512L,
              st_nlink=1 + i,
              st_uid=139592,
              st_gid=5000,
              st_size=0,
              st_atime=1336469177,
              st_mtime=1336129892,
              st_ctime=1336129892))

    # Force an early flush. This max file size value has been chosen to force a
    # flush at least once, but not for all 10 records.
    with test_lib.ConfigOverrider({"BigQuery.max_file_post_size": 800}):
      output = self.ProcessResponses(
          plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(),
          responses=responses)

    self.assertEqual(len(output), 2)
    # Check that the output is still consistent
    actual_fds = []

    for _, stream, _, _ in output:
      actual_fds.append(gzip.GzipFile(None, "r", 9, stream))

    # Compare to our stored data.
    # TODO(user): there needs to be a better way to generate these files on
    # change than breaking into the debugger.
    expected_fd = open(
        os.path.join(config.CONFIG["Test.data_dir"], "bigquery",
                     "ExportedFile.json"), "rb")

    # Check that the same entries we expect are spread across the two files.
    counter = 0
    for actual_fd in actual_fds:
      for actual, expected in zip(actual_fd, expected_fd):
        self.assertEqual(json.loads(actual), json.loads(expected))
        counter += 1

    self.assertEqual(counter, 10)
Пример #3
0
  def testBigQueryPluginWithValuesOfMultipleTypes(self):
    output = self.ProcessResponses(
        plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(),
        responses=[
            rdf_client.StatEntry(pathspec=rdf_paths.PathSpec(
                path="/中国新闻网新闻中", pathtype="OS")), rdf_client.Process(pid=42)
        ],
        process_responses_separately=True)

    # Should have two separate output streams for the two types
    self.assertEqual(len(output), 2)

    for name, stream, _, job_id in output:
      self.assertTrue(job_id in [
          "C-1000000000000000_Results_ExportedFile_1445995873",
          "C-1000000000000000_Results_ExportedProcess_1445995873"
      ])
      self._parseOutput(name, stream)
Пример #4
0
    def testBigQueryPluginFallbackToAFF4(self):
        plugin_args = bigquery_plugin.BigQueryOutputPluginArgs()
        responses = [
            rdf_client.StatEntry(
                aff4path=self.client_id.Add("/fs/os/中国新闻网新闻中"),
                pathspec=rdf_paths.PathSpec(path="/中国新闻网新闻中")),
            rdf_client.Process(pid=42),
            rdf_client.Process(pid=43),
            rdf_client.SoftwarePackage(name="test.deb")
        ]

        plugin = bigquery_plugin.BigQueryOutputPlugin(
            source_urn=self.results_urn,
            output_base_urn=self.base_urn,
            args=plugin_args,
            token=self.token)

        plugin.Initialize()

        messages = []
        for response in responses:
            messages.append(
                rdf_flows.GrrMessage(source=self.client_id, payload=response))

        with test_lib.FakeTime(1445995873):
            with mock.patch.object(bigquery,
                                   "GetBigQueryClient") as mock_bigquery:
                mock_bigquery.return_value.configure_mock(**{
                    "InsertData.side_effect":
                    bigquery.BigQueryJobUploadError()
                })
                with test_lib.ConfigOverrider(
                    {"BigQuery.max_upload_failures": 2}):
                    for message in messages:
                        plugin.ProcessResponses([message])
                    plugin.Flush()

                    # We have 3 output types but a limit of 2 upload failures, so we
                    # shouldn't try the third one.
                    self.assertEqual(
                        mock_bigquery.return_value.InsertData.call_count, 2)

        # We should have written a data file and a schema file for each type.
        for output_name in [
                "ExportedFile", "ExportedProcess",
                "AutoExportedSoftwarePackage"
        ]:
            schema_fd = aff4.FACTORY.Open(self.base_urn.Add(
                "C-1000000000000000_Results_%s_1445995873.schema" %
                output_name),
                                          token=self.token)
            data_fd = aff4.FACTORY.Open(self.base_urn.Add(
                "C-1000000000000000_Results_%s_1445995873.data" % output_name),
                                        token=self.token)
            actual_fd = gzip.GzipFile(None, "r", 9, data_fd)

            if output_name == "ExportedFile":
                self.CompareSchemaToKnownGood(json.load(schema_fd))
                self.assertEqual(
                    json.load(actual_fd)["urn"],
                    self.client_id.Add("/fs/os/中国新闻网新闻中"))
            elif output_name == "ExportedProcess":
                self.assertEqual(json.load(schema_fd)[1]["name"], "pid")
                expected_pids = ["42", "43"]
                for i, line in enumerate(actual_fd):
                    self.assertEqual(json.loads(line)["pid"], expected_pids[i])
            else:
                self.assertEqual(json.load(schema_fd)[1]["name"], "name")
                self.assertEqual(json.load(actual_fd)["name"], "test.deb")

        # Process the same messages to make sure we're re-using the filehandles.
        with test_lib.FakeTime(1445995878):
            with mock.patch.object(bigquery,
                                   "GetBigQueryClient") as mock_bigquery:
                mock_bigquery.return_value.configure_mock(**{
                    "InsertData.side_effect":
                    bigquery.BigQueryJobUploadError()
                })
                with test_lib.ConfigOverrider(
                    {"BigQuery.max_upload_failures": 2}):
                    for message in messages:
                        plugin.ProcessResponses([message])
                    plugin.Flush()

                    # We shouldn't call insertdata at all because we have passed max
                    # failures already
                    self.assertEqual(
                        mock_bigquery.return_value.InsertData.call_count, 0)

        expected_line_counts = {
            "ExportedFile": 2,
            "ExportedProcess": 4,
            "AutoExportedSoftwarePackage": 2
        }
        for output_name in [
                "ExportedFile", "ExportedProcess",
                "AutoExportedSoftwarePackage"
        ]:
            data_fd = aff4.FACTORY.Open(self.base_urn.Add(
                "C-1000000000000000_Results_%s_1445995873.data" % output_name),
                                        token=self.token)
            actual_fd = gzip.GzipFile(None, "r", 9, data_fd)
            self.assertEqual(sum(1 for line in actual_fd),
                             expected_line_counts[output_name])