def get_data_from_request(event: dict, capture_body: bool = False, capture_headers: bool = True) -> dict: """ Capture context data from API gateway event """ result = {} if capture_headers and "headers" in event: result["headers"] = event["headers"] method = nested_key(event, "requestContext", "httpMethod") or nested_key( event, "requestContext", "http", "method") if not method: # Not API Gateway return result result["method"] = method if method in constants.HTTP_WITH_BODY and "body" in event: body = event["body"] if capture_body: if event.get("isBase64Encoded"): body = base64.b64decode(body) else: try: jsonbody = json.loads(body) body = jsonbody except Exception: pass if body is not None: result["body"] = body if capture_body else "[REDACTED]" result["url"] = get_url_dict(event) return result
def autofill_resource_context(self): """Automatically fills "resource" fields based on other fields""" if self.context: resource = nested_key(self.context, "destination", "service", "resource") if not resource and (self.leaf or any(k in self.context for k in ("destination", "db", "message", "http"))): type_info = self.subtype or self.type instance = nested_key(self.context, "db", "instance") queue_name = nested_key(self.context, "message", "queue", "name") http_url = nested_key(self.context, "http", "url") if instance: resource = f"{type_info}/{instance}" elif queue_name: resource = f"{type_info}/{queue_name}" elif http_url: resource = url_to_destination_resource(http_url) else: resource = type_info if "destination" not in self.context: self.context["destination"] = {} if "service" not in self.context["destination"]: self.context["destination"]["service"] = {} self.context["destination"]["service"]["resource"] = resource # set fields that are deprecated, but still required by APM Server API if "name" not in self.context["destination"]["service"]: self.context["destination"]["service"]["name"] = "" if "type" not in self.context["destination"]["service"]: self.context["destination"]["service"]["type"] = ""
def __enter__(self): """ Transaction setup """ trace_parent = TraceParent.from_headers(self.event.get("headers", {})) global COLD_START cold_start = COLD_START COLD_START = False self.source = "other" transaction_type = "request" transaction_name = os.environ.get("AWS_LAMBDA_FUNCTION_NAME", self.name) self.httpmethod = nested_key( self.event, "requestContext", "httpMethod") or nested_key( self.event, "requestContext", "http", "method") if self.httpmethod: # API Gateway self.source = "api" if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): transaction_name = "{} {}".format( self.httpmethod, os.environ["AWS_LAMBDA_FUNCTION_NAME"]) else: transaction_name = self.name elif "Records" in self.event and len(self.event["Records"]) == 1: record = self.event["Records"][0] if record.get("eventSource") == "aws:s3": # S3 self.source = "s3" transaction_name = "{} {}".format( record["eventName"], record["s3"]["bucket"]["name"]) elif record.get("EventSource") == "aws:sns": # SNS self.source = "sns" transaction_type = "messaging" transaction_name = "RECEIVE {}".format( record["Sns"]["TopicArn"].split(":")[5]) elif record.get("eventSource") == "aws:sqs": # SQS self.source = "sqs" transaction_type = "messaging" transaction_name = "RECEIVE {}".format( record["eventSourceARN"].split(":")[5]) self.transaction = self.client.begin_transaction( transaction_type, trace_parent=trace_parent) elasticapm.set_transaction_name(transaction_name, override=False) if self.source == "api": elasticapm.set_context( lambda: get_data_from_request( self.event, capture_body=self.client.config.capture_body in ("transactions", "all"), capture_headers=self.client.config.capture_headers, ), "request", ) self.set_metadata_and_context(cold_start)
def is_same_kind(self, other_span: SpanType) -> bool: """ For compression purposes, two spans are considered to be of the same kind if they have the same values for type, subtype, and destination.service.resource :param other_span: another span object :return: bool """ resource = nested_key(self.context, "destination", "service", "resource") return bool( self.type == other_span.type and self.subtype == other_span.subtype and (resource and resource == nested_key(other_span.context, "destination", "service", "resource")) )
def get_url_dict(event: dict) -> dict: """ Reconstruct URL from API Gateway """ headers = event.get("headers", {}) protocol = headers.get("X-Forwarded-Proto", headers.get("x-forwarded-proto", "https")) host = headers.get("Host", headers.get("host", "")) stage = "/" + (nested_key(event, "requestContext", "stage") or "") path = event.get("path", event.get("rawPath", "").split(stage)[-1]) port = headers.get("X-Forwarded-Port", headers.get("x-forwarded-port")) query = "" if "rawQueryString" in event: query = event["rawQueryString"] elif event.get("queryStringParameters"): query = "?" for k, v in compat.iteritems(event["queryStringParameters"]): query += "{}={}".format(k, v) url = protocol + "://" + host + stage + path + query url_dict = { "full": encoding.keyword_field(url), "protocol": protocol, "hostname": encoding.keyword_field(host), "pathname": encoding.keyword_field(stage + path), } if port: url_dict["port"] = port if query: url_dict["search"] = encoding.keyword_field(query) return url_dict
def to_dict(self) -> dict: if ( self.composite and self.composite["compression_strategy"] == "same_kind" and nested_key(self.context, "destination", "service", "resource") ): name = "Calls to " + self.context["destination"]["service"]["resource"] else: name = self.name result = { "id": self.id, "transaction_id": self.transaction.id, "trace_id": self.transaction.trace_parent.trace_id, # use either the explicitly set parent_span_id, or the id of the parent, or finally the transaction id "parent_id": self.parent_span_id or (self.parent.id if self.parent else self.transaction.id), "name": encoding.keyword_field(name), "type": encoding.keyword_field(self.type), "subtype": encoding.keyword_field(self.subtype), "action": encoding.keyword_field(self.action), "timestamp": int(self.timestamp * 1000000), # microseconds "duration": self.duration * 1000, # milliseconds "outcome": self.outcome, } if self.transaction.sample_rate is not None: result["sample_rate"] = float(self.transaction.sample_rate) if self.sync is not None: result["sync"] = self.sync if self.labels: if self.context is None: self.context = {} self.context["tags"] = self.labels if self.context: self.autofill_resource_context() result["context"] = self.context if self.frames: result["stacktrace"] = self.frames if self.composite: result["composite"] = { "compression_strategy": self.composite["compression_strategy"], "sum": self.composite["sum"] * 1000, "count": self.composite["count"], } return result
def to_dict(self) -> dict: if ( self.composite and self.composite["compression_strategy"] == "same_kind" and nested_key(self.context, "destination", "service", "resource") ): name = "Calls to " + self.context["destination"]["service"]["resource"] else: name = self.name result = { "id": self.id, "transaction_id": self.transaction.id, "trace_id": self.transaction.trace_parent.trace_id, # use either the explicitly set parent_span_id, or the id of the parent, or finally the transaction id "parent_id": self.parent_span_id or (self.parent.id if self.parent else self.transaction.id), "name": encoding.keyword_field(name), "type": encoding.keyword_field(self.type), "subtype": encoding.keyword_field(self.subtype), "action": encoding.keyword_field(self.action), "timestamp": int(self.timestamp * 1000000), # microseconds "duration": self.duration * 1000, # milliseconds "outcome": self.outcome, } if self.transaction.sample_rate is not None: result["sample_rate"] = float(self.transaction.sample_rate) if self.sync is not None: result["sync"] = self.sync if self.labels: if self.context is None: self.context = {} self.context["tags"] = self.labels if self.context: resource = nested_key(self.context, "destination", "service", "resource") if not resource and (self.leaf or any(k in self.context for k in ("destination", "db", "message", "http"))): type_info = self.subtype or self.type instance = nested_key(self.context, "db", "instance") queue_name = nested_key(self.context, "message", "queue", "name") http_url = nested_key(self.context, "http", "url") if instance: resource = f"{type_info}/{instance}" elif queue_name: resource = f"{type_info}/{queue_name}" elif http_url: resource = url_to_destination_resource(http_url) else: resource = type_info if "destination" not in self.context: self.context["destination"] = {} if "service" not in self.context["destination"]: self.context["destination"]["service"] = {} self.context["destination"]["service"]["resource"] = resource # set fields that are deprecated, but still required by APM Server API if "name" not in self.context["destination"]["service"]: self.context["destination"]["service"]["name"] = "" if "type" not in self.context["destination"]["service"]: self.context["destination"]["service"]["type"] = "" result["context"] = self.context if self.frames: result["stacktrace"] = self.frames if self.composite: result["composite"] = { "compression_strategy": self.composite["compression_strategy"], "sum": self.composite["sum"] * 1000, "count": self.composite["count"], } return result
def set_metadata_and_context(self, coldstart: bool) -> None: """ Process the metadata and context fields for this request """ metadata = {} cloud_context = {"origin": {"provider": "aws"}} service_context = {} message_context = {} faas = {} faas["coldstart"] = coldstart faas["trigger"] = {"type": "other"} faas["execution"] = self.context.aws_request_id if self.source == "api": faas["trigger"]["type"] = "http" faas["trigger"]["request_id"] = self.event["requestContext"][ "requestId"] path = (self.event["requestContext"].get("resourcePath") or self.event["requestContext"]["http"]["path"].split( self.event["requestContext"]["stage"])[-1]) service_context["origin"] = { "name": "{} {}/{}".format( self.httpmethod, self.event["requestContext"]["stage"], path, ) } service_context["origin"]["id"] = self.event["requestContext"][ "apiId"] service_context["origin"]["version"] = self.event.get( "version", "1.0") cloud_context["origin"] = {} cloud_context["origin"]["service"] = {"name": "api gateway"} cloud_context["origin"]["account"] = { "id": self.event["requestContext"]["accountId"] } cloud_context["origin"]["provider"] = "aws" elif self.source == "sqs": record = self.event["Records"][0] faas["trigger"]["type"] = "pubsub" faas["trigger"]["request_id"] = record["messageId"] service_context["origin"] = {} service_context["origin"]["name"] = record["eventSourceARN"].split( ":")[5] service_context["origin"]["id"] = record["eventSourceARN"] cloud_context["origin"] = {} cloud_context["origin"]["service"] = {"name": "sqs"} cloud_context["origin"]["region"] = record["awsRegion"] cloud_context["origin"]["account"] = { "id": record["eventSourceARN"].split(":")[4] } cloud_context["origin"]["provider"] = "aws" message_context["queue"] = service_context["origin"]["name"] if "SentTimestamp" in record["attributes"]: message_context["age"] = { "ms": int((time.time() * 1000) - int(record["attributes"]["SentTimestamp"])) } if self.client.config.capture_body in ("transactions", "all") and "body" in record: message_context["body"] = record["body"] if self.client.config.capture_headers and record.get( "messageAttributes"): message_context["headers"] = record["messageAttributes"] elif self.source == "sns": record = self.event["Records"][0] faas["trigger"]["type"] = "pubsub" faas["trigger"]["request_id"] = record["Sns"]["TopicArn"] service_context["origin"] = {} service_context["origin"]["name"] = record["Sns"][ "TopicArn"].split(":")[5] service_context["origin"]["id"] = record["Sns"]["TopicArn"] service_context["origin"]["version"] = record["EventVersion"] service_context["origin"]["service"] = {"name": "sns"} cloud_context["origin"] = {} cloud_context["origin"]["region"] = record["Sns"][ "TopicArn"].split(":")[3] cloud_context["origin"]["account_id"] = record["Sns"][ "TopicArn"].split(":")[4] cloud_context["origin"]["provider"] = "aws" message_context["queue"] = service_context["origin"]["name"] if "Timestamp" in record["Sns"]: message_context["age"] = { "ms": int((datetime.datetime.now() - datetime.datetime.strptime( record["Sns"]["Timestamp"], r"%Y-%m-%dT%H:%M:%S.%fZ")).total_seconds() * 1000) } if self.client.config.capture_body in ( "transactions", "all") and "Message" in record["Sns"]: message_context["body"] = record["Sns"]["Message"] if self.client.config.capture_headers and record["Sns"].get( "MessageAttributes"): message_context["headers"] = record["Sns"]["MessageAttributes"] elif self.source == "s3": record = self.event["Records"][0] faas["trigger"]["type"] = "datasource" faas["trigger"]["request_id"] = record["responseElements"][ "x-amz-request-id"] service_context["origin"] = {} service_context["origin"]["name"] = record["s3"]["bucket"]["name"] service_context["origin"]["id"] = record["s3"]["bucket"]["arn"] service_context["origin"]["version"] = record["eventVersion"] cloud_context["origin"] = {} cloud_context["origin"]["service"] = {"name": "s3"} cloud_context["origin"]["region"] = record["awsRegion"] cloud_context["origin"]["provider"] = "aws" metadata["service"] = {} metadata["service"]["name"] = os.environ.get( "AWS_LAMBDA_FUNCTION_NAME") metadata["service"]["framework"] = {"name": "AWS Lambda"} metadata["service"]["runtime"] = { "name": os.environ.get("AWS_EXECUTION_ENV"), "version": platform.python_version(), } arn = self.context.invoked_function_arn if len(arn.split(":")) > 7: arn = ":".join(arn.split(":")[:7]) metadata["service"]["id"] = arn metadata["service"]["version"] = os.environ.get( "AWS_LAMBDA_FUNCTION_VERSION") metadata["service"]["node"] = { "configured_name": os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME") } # This is the one piece of metadata that requires deep merging. We add it manually # here to avoid having to deep merge in _transport.add_metadata() if self.client._transport._metadata: node_name = nested_key(self.client._transport._metadata, "service", "node", "name") if node_name: metadata["service"]["node"]["name"] = node_name metadata["cloud"] = {} metadata["cloud"]["provider"] = "aws" metadata["cloud"]["region"] = os.environ.get("AWS_REGION") metadata["cloud"]["service"] = {"name": "lambda"} metadata["cloud"]["account"] = {"id": arn.split(":")[4]} elasticapm.set_context(cloud_context, "cloud") elasticapm.set_context(service_context, "service") # faas doesn't actually belong in context, but we handle this in to_dict elasticapm.set_context(faas, "faas") if message_context: elasticapm.set_context(service_context, "message") self.client._transport.add_metadata(metadata)
def test_nested_key(data, key, expected): r = nested_key(data, *key.split(".")) if expected is None: assert r is expected else: assert r == expected