def _GetArgsFromRequest(self, request, method_metadata, route_args): """Builds args struct out of HTTP request.""" format_mode = GetRequestFormatMode(request, method_metadata) if request.method in ["GET", "HEAD"]: if method_metadata.args_type: unprocessed_request = request.args if hasattr(unprocessed_request, "dict"): unprocessed_request = unprocessed_request.dict() args = method_metadata.args_type() for type_info in args.type_infos: try: if type_info.name in route_args: self._SetField(args, type_info, route_args[type_info.name]) elif type_info.name in unprocessed_request: self._SetField(args, type_info, unprocessed_request[type_info.name]) except Exception as e: # pylint: disable=broad-except raise InvalidRequestArgumentsInRouteError(e) else: args = None elif request.method in ["POST", "DELETE", "PATCH"]: try: if request.content_type and request.content_type.startswith( "multipart/form-data;"): payload = json.Parse(request.form["_params_"].decode("utf-8")) args = method_metadata.args_type() args.FromDict(payload) for name, fd in request.files.items(): args.Set(name, fd.read()) elif format_mode == JsonMode.PROTO3_JSON_MODE: # NOTE: Arguments rdfvalue has to be a protobuf-based RDFValue. args_proto = method_metadata.args_type().protobuf() json_format.Parse(request.get_data(as_text=True) or "{}", args_proto) args = method_metadata.args_type.FromSerializedBytes( args_proto.SerializeToString()) else: json_data = request.get_data(as_text=True) or "{}" payload = json.Parse(json_data) args = method_metadata.args_type() if payload: args.FromDict(payload) except Exception as e: # pylint: disable=broad-except logging.exception("Error while parsing POST request %s (%s): %s", request.path, request.method, e) raise PostRequestParsingError(e) for type_info in args.type_infos: if type_info.name in route_args: try: self._SetField(args, type_info, route_args[type_info.name]) except Exception as e: # pylint: disable=broad-except raise InvalidRequestArgumentsInRouteError(e) else: raise UnsupportedHttpMethod("Unsupported method: %s." % request.method) return args
def testBigQueryPluginWithEarlyFlush(self): responses = [] for i in range(10): responses.append( rdf_client_fs.StatEntry( pathspec=rdf_paths.PathSpec(path="/foo/bar/%d" % i, pathtype="OS"), st_mode=33184, # octal = 100640 => u=rw,g=r,o= => -rw-r----- st_ino=1063090, st_dev=64512, st_nlink=1 + i, st_uid=139592, st_gid=5000, st_size=0, st_atime=1336469177, st_mtime=1336129892, st_ctime=1336129892, st_btime=1338111338)) sizes = [37, 687, 722, 755, 788, 821, 684, 719, 752, 785] def GetSize(unused_path): return sizes.pop(0) # Force an early flush. Gzip is non deterministic since our # metadata is a dict with unpredictable order so we make up the file sizes # such that there is one flush during processing. with test_lib.ConfigOverrider({"BigQuery.max_file_post_size": 800}): with utils.Stubber(os.path, "getsize", GetSize): output = self.ProcessResponses( plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(), responses=responses) self.assertLen(output, 2) # Check that the output is still consistent actual_fds = [] for _, stream, _, _ in output: actual_fds.append(gzip.GzipFile(None, "r", 9, stream)) # Compare to our stored data. # TODO(user): there needs to be a better way to generate these files on # change than breaking into the debugger. expected_fd = open( os.path.join(config.CONFIG["Test.data_dir"], "bigquery", "ExportedFile.jsonlines"), "rb") # Check that the same entries we expect are spread across the two files. counter = 0 for actual_fd in actual_fds: for actual, expected in zip(actual_fd, expected_fd): actual = actual.decode("utf-8") expected = expected.decode("utf-8") self.assertEqual(json.Parse(actual), json.Parse(expected)) counter += 1 self.assertEqual(counter, 10)
def testBigQueryPluginWithValuesOfSameType(self): responses = [] for i in range(10): responses.append( rdf_client_fs.StatEntry( pathspec=rdf_paths.PathSpec(path="/foo/bar/%d" % i, pathtype="OS"), st_mode=33184, # octal = 100640 => u=rw,g=r,o= => -rw-r----- st_ino=1063090, st_dev=64512, st_nlink=1 + i, st_uid=139592, st_gid=5000, st_size=0, st_atime=1336469177, st_mtime=1336129892, st_ctime=1336129892, st_btime=1338111338)) output = self.ProcessResponses( plugin_args=bigquery_plugin.BigQueryOutputPluginArgs(), responses=responses) self.assertLen(output, 1) _, stream, schema, job_id = output[0] self.assertEqual(job_id, "C-1000000000000000_Results_ExportedFile_1445995873") self.CompareSchemaToKnownGood(schema) actual_fd = gzip.GzipFile( None, "r", bigquery_plugin.BigQueryOutputPlugin.GZIP_COMPRESSION_LEVEL, stream) # Compare to our stored data. expected_fd = open( os.path.join(config.CONFIG["Test.data_dir"], "bigquery", "ExportedFile.jsonlines"), "rb") # Bigquery expects a newline separarted list of JSON dicts, but this isn't # valid JSON so we can't just load the whole thing and compare. counter = 0 for actual, expected in zip(actual_fd, expected_fd): actual = actual.decode("utf-8") expected = expected.decode("utf-8") self.assertEqual(json.Parse(actual), json.Parse(expected)) counter += 1 self.assertEqual(counter, 10)
def _parseOutput(self, name, stream): content_fd = gzip.GzipFile(None, "r", 9, stream) counter = 0 for item in content_fd: counter += 1 row = json.Parse(item.decode("utf-8")) if name == "ExportedFile": self.assertEqual(row["metadata"]["client_urn"], self.client_id) self.assertEqual(row["metadata"]["hostname"], "Host-0") self.assertEqual(row["metadata"]["mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(row["metadata"]["source_urn"], self.results_urn) self.assertEqual(row["urn"], self.client_id.Add("/fs/os/中国新闻网新闻中")) else: self.assertEqual(row["metadata"]["client_urn"], self.client_id) self.assertEqual(row["metadata"]["hostname"], "Host-0") self.assertEqual(row["metadata"]["mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(row["metadata"]["source_urn"], self.results_urn) self.assertEqual(row["pid"], "42") self.assertEqual(counter, 1)
def _parseOutput(self, name, stream): content_fd = gzip.GzipFile(None, "r", 9, stream) counter = 0 # The source id is converted to a URN then to a JSON string. source_urn = str(rdfvalue.RDFURN(self.source_id)) for item in content_fd: counter += 1 row = json.Parse(item.decode("utf-8")) if name == "ExportedFile": self.assertEqual(row["metadata"]["client_urn"], "aff4:/%s" % self.client_id) self.assertEqual(row["metadata"]["hostname"], "Host-0.example.com") self.assertEqual(row["metadata"]["mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(row["metadata"]["source_urn"], source_urn) self.assertEqual(row["urn"], "aff4:/%s/fs/os/中国新闻网新闻中" % self.client_id) else: self.assertEqual(row["metadata"]["client_urn"], "aff4:/%s" % self.client_id) self.assertEqual(row["metadata"]["hostname"], "Host-0.example.com") self.assertEqual(row["metadata"]["mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(row["metadata"]["source_urn"], source_urn) self.assertEqual(row["pid"], "42") self.assertEqual(counter, 1)
def _FormatResultAsJson(self, result, format_mode=None): if result is None: return dict(status="OK") if format_mode == JsonMode.PROTO3_JSON_MODE: json_data = json_format.MessageToJson( result.AsPrimitiveProto(), float_precision=8) if compatibility.PY2: json_data = json_data.decode("utf-8") return json.Parse(json_data) elif format_mode == JsonMode.GRR_ROOT_TYPES_STRIPPED_JSON_MODE: result_dict = {} for field, value in result.ListSetFields(): if isinstance(field, (rdf_structs.ProtoDynamicEmbedded, rdf_structs.ProtoEmbedded, rdf_structs.ProtoList)): result_dict[field.name] = api_value_renderers.RenderValue(value) else: result_dict[field.name] = api_value_renderers.RenderValue( value)["value"] return result_dict elif format_mode == JsonMode.GRR_TYPE_STRIPPED_JSON_MODE: rendered_data = api_value_renderers.RenderValue(result) return api_value_renderers.StripTypeInfo(rendered_data) elif format_mode == JsonMode.GRR_JSON_MODE: return api_value_renderers.RenderValue(result) else: raise ValueError("Invalid format_mode: %s" % format_mode)
def _ParseJSON(self, json_str): """Parses response JSON.""" precondition.AssertType(json_str, Text) xssi_prefix = ")]}'\n" if json_str.startswith(xssi_prefix): json_str = json_str[len(xssi_prefix):] return json.Parse(json_str)
def testUnicode(self): parsed = json.Parse("""{ "gęsi (🦆)": ["zbożowa", "krótkodzioba", "białoczelna"], "grzebiące (🐔)": ["jarząbek", "głuszec", "bażant"] }""") expected = { "gęsi (🦆)": ["zbożowa", "krótkodzioba", "białoczelna"], "grzebiące (🐔)": ["jarząbek", "głuszec", "bażant"], } self.assertEqual(parsed, expected)
def _ParseEvents(self, patched): request = patched.call_args[KWARGS]['data'] # Elasticsearch bulk requests are line-deliminated pairs, where the first # line is the index command and the second is the actual document to index split_requests = [json.Parse(line) for line in request.split('\n')] update_pairs = [(split_requests[i], split_requests[i + 1]) for i in range(0, len(split_requests), 2)] return update_pairs
def testComplexDict(self): parsed = json.Parse("""{ "foo.bar": { "quux": [108, 1337], "thud": ["blargh", "norf"] }, "foo.baz": [3.14, 1.62] }""") expected = { "foo.bar": { "quux": [108, 1337], "thud": ["blargh", "norf"], }, "foo.baz": [3.14, 1.62], } self.assertEqual(parsed, expected)
def testBinaryDataExportDisabled(self): response = rdf_client_fs.BlobImageChunkDescriptor() response.digest = b"\x00\xff\x00\xff\x00" args = bigquery_plugin.BigQueryOutputPluginArgs() args.base64_bytes_export = False output = self.ProcessResponses(plugin_args=args, responses=[response]) self.assertLen(output, 1) _, filedesc, _, _ = output[0] with gzip.GzipFile(mode="r", fileobj=filedesc) as filedesc: content = json.Parse(filedesc.read().decode("utf-8")) self.assertNotIn("digest", content)
def testMissingTimestampSerialization(self): response = rdf_client_fs.StatEntry() response.pathspec.pathtype = rdf_paths.PathSpec.PathType.OS response.pathspec.path = "/foo/bar" response.st_mtime = None args = bigquery_plugin.BigQueryOutputPluginArgs() output = self.ProcessResponses(plugin_args=args, responses=[response]) self.assertLen(output, 1) _, filedesc, _, _ = output[0] with gzip.GzipFile(mode="r", fileobj=filedesc) as filedesc: content = json.Parse(filedesc.read().decode("utf-8")) self.assertIsNone(content["st_mtime"])
def GetBigQueryClient(service_account_json=None, project_id=None, dataset_id=None): """Create a BigQueryClient.""" service_account_data = ( service_account_json or config.CONFIG["BigQuery.service_acct_json"]) project_id = project_id or config.CONFIG["BigQuery.project_id"] dataset_id = dataset_id or config.CONFIG["BigQuery.dataset_id"] if not (service_account_data and project_id and dataset_id): raise RuntimeError("BigQuery.service_account_json, " "BigQuery.project_id and BigQuery.dataset_id " "must be defined.") creds = ServiceAccountCredentials.from_json_keyfile_dict( json.Parse(service_account_data), scopes=BIGQUERY_SCOPE) http_obj = httplib2.Http() http_obj = creds.authorize(http_obj) service = discovery.build("bigquery", "v2", http=http_obj) return BigQueryClient( project_id=project_id, bq_service=service, dataset_id=dataset_id)
def _GetResponseContent(self, response): content = response.get_data(as_text=True) if content.startswith(")]}'\n"): content = content[5:] return json.Parse(content)
def testSimpleList(self): parsed = json.Parse("""[4, 8, 15, 16, 23, 42]""") expected = [4, 8, 15, 16, 23, 42] self.assertEqual(parsed, expected)
def testSimpleDict(self): parsed = json.Parse("""{ "foo": "bar", "quux": 42 }""") expected = {"foo": "bar", "quux": 42} self.assertEqual(parsed, expected)
def _ParseEvents(self, patched): request = patched.call_args[KWARGS]['data'] return [json.Parse(part) for part in request.split('\n\n')]