def index(): request_id = log_helper.extract_request_id(request.headers) if not request_id: return Response(f"Header {log_helper.REQUEST_ID_HEADER_NAME} not found", 400) type_header = request.headers.get(log_helper.TYPE_HEADER_NAME) if type_header is None: return Response(f"Header {log_helper.TYPE_HEADER_NAME} not found", 400) message_type = log_helper.parse_message_type(type_header) index_name = log_helper.build_index_name(request.headers) body = request.get_json(force=True) # max size is configurable with env var or defaults to constant max_payload_bytes = log_helper.get_max_payload_bytes(MAX_PAYLOAD_BYTES) body_length = request.headers.get(log_helper.LENGTH_HEADER_NAME) if body_length and int(body_length) > int(max_payload_bytes): too_large_message = ( "body too large for " + index_name + "/" + (log_helper.DOC_TYPE_NAME if log_helper.DOC_TYPE_NAME != None else "_doc") + "/" + request_id + " adding " + message_type ) print(too_large_message) sys.stdout.flush() return too_large_message if not type(body) is dict: body = json.loads(body) # print('RECEIVED MESSAGE.') # print(str(request.headers)) # print(str(body)) # print('----') # sys.stdout.flush() try: # now process and update the doc added_content = process_and_update_elastic_doc( es, message_type, body, request_id, request.headers, index_name ) return jsonify(added_content) except Exception as ex: traceback.print_exc() sys.stdout.flush() return Response("problem logging request", 500)
def index(): request_id = log_helper.extract_request_id(request.headers) type_header = request.headers.get(log_helper.TYPE_HEADER_NAME) message_type = log_helper.parse_message_type(type_header) index_name = log_helper.build_index_name(request.headers) body = request.get_json(force=True) # max size is configurable with env var or defaults to constant max_payload_bytes = log_helper.get_max_payload_bytes(MAX_PAYLOAD_BYTES) body_length = request.headers.get(log_helper.LENGTH_HEADER_NAME) if body_length and int(body_length) > int(max_payload_bytes): too_large_message = ( "body too large for " + index_name + "/" + log_helper.DOC_TYPE_NAME + "/" + request_id + " adding " + message_type ) print(too_large_message) sys.stdout.flush() return too_large_message if not type(body) is dict: body = json.loads(body) # print('RECEIVED MESSAGE.') # print(str(request.headers)) # print(str(body)) # print('----') # sys.stdout.flush() try: # now process and update the doc doc = process_and_update_elastic_doc( es, message_type, body, request_id, request.headers, index_name ) return str(doc) except Exception as ex: print(ex) sys.stdout.flush() return "problem logging request"
def process_and_update_elastic_doc( elastic_object, message_type, message_body, request_id, headers, index_name ): added_content = [] if message_type == "unknown": print("UNKNOWN REQUEST TYPE FOR " + request_id + " - NOT PROCESSING") sys.stdout.flush() # first do any needed transformations new_content_part = process_content(message_type, message_body, headers) # set metadata to go just in this part (request or response) and not top-level log_helper.field_from_header( content=new_content_part, header_name="ce-time", headers=headers ) log_helper.field_from_header( content=new_content_part, header_name="ce-source", headers=headers ) doc_body = {message_type: new_content_part} log_helper.set_metadata(doc_body, headers, message_type, request_id) # req or res might be batches of instances so split out into individual docs if "instance" in new_content_part: if log_helper.is_reference_data(headers): index_name = log_helper.build_index_name(headers, prefix="reference", suffix=False) # Ignore payload for reference data doc_body[message_type].pop("payload", None) if type(new_content_part["instance"]) == type([]) and not (new_content_part["dataType"] == "json"): # if we've a list then this is batch # we assume first dimension is always batch bulk_upsert_doc_to_elastic(elastic_object, message_type, doc_body, doc_body[message_type].copy(), request_id, index_name) else: #not batch so don't batch elements either if "elements" in new_content_part and type(new_content_part["elements"]) == type([]): new_content_part["elements"] = new_content_part["elements"][0] item_request_id = build_request_id_batched(request_id, 1, 0) added_content.append(upsert_doc_to_elastic( elastic_object, message_type, doc_body, item_request_id, index_name )) elif message_type == "feedback": item_request_id = build_request_id_batched(request_id, 1, 0) upsert_doc_to_elastic(elastic_object, message_type, doc_body, item_request_id, index_name) elif "data" in new_content_part and message_type == "outlier": no_items_in_batch = len(doc_body[message_type]["data"]["is_outlier"]) index = 0 for item in doc_body[message_type]["data"]["is_outlier"]: item_body = doc_body.copy() item_body[message_type]["data"]["is_outlier"] = item if ( "feature_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["feature_score"] is not None and len(item_body[message_type]["data"]["feature_score"]) == no_items_in_batch ): item_body[message_type]["data"]["feature_score"] = item_body[ message_type ]["data"]["feature_score"][index] if ( "instance_score" in item_body[message_type]["data"] and item_body[message_type]["data"]["instance_score"] is not None and len(item_body[message_type]["data"]["instance_score"]) == no_items_in_batch ): item_body[message_type]["data"]["instance_score"] = item_body[ message_type ]["data"]["instance_score"][index] item_request_id = build_request_id_batched( request_id, no_items_in_batch, index ) upsert_doc_to_elastic( elastic_object, message_type, item_body, item_request_id, index_name ) index = index + 1 elif "data" in new_content_part and message_type == "drift": item_body = doc_body.copy() namespace = log_helper.get_header(log_helper.NAMESPACE_HEADER_NAME, headers) inferenceservice_name = log_helper.get_header(log_helper.INFERENCESERVICE_HEADER_NAME, headers) endpoint_name = log_helper.get_header(log_helper.ENDPOINT_HEADER_NAME, headers) serving_engine = log_helper.serving_engine(headers) item_body[message_type]["data"]["is_drift"] = bool(item_body[message_type]["data"]["is_drift"]) item_body[message_type]["data"]["drift_type"] = "batch" if ( "distance" in item_body[message_type]["data"] and item_body[message_type]["data"]["distance"] is not None and isinstance(item_body[message_type]["data"]["distance"], list) ): content_dist = np.array(item_body[message_type]["data"]["distance"]) x = np.expand_dims(content_dist, axis=0) item_body[message_type]["data"]["drift_type"] = "feature" elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True) if type(elements) == type([]): elements = elements[0] item_body[message_type]["data"]["feature_distance"] = elements del item_body[message_type]["data"]["distance"] if ( "p_val" in item_body[message_type]["data"] and item_body[message_type]["data"]["p_val"] is not None and isinstance(item_body[message_type]["data"]["p_val"], list) ): content_dist = np.array(item_body[message_type]["data"]["p_val"]) x = np.expand_dims(content_dist, axis=0) item_body[message_type]["data"]["drift_type"] = "feature" elements = createElelmentsArray(x, None, namespace, serving_engine, inferenceservice_name, endpoint_name, "request", True) if type(elements) == type([]): elements = elements[0] item_body[message_type]["data"]["feature_p_val"] = elements del item_body[message_type]["data"]["p_val"] detectorName=None ce_source = item_body[message_type]["ce-source"] if ce_source.startswith("io.seldon.serving."): detectorName = ce_source[len("io.seldon.serving."):] elif ce_source.startswith("org.kubeflow.serving."): detectorName = ce_source[len("org.kubeflow.serving."):] index_name = log_helper.build_index_name(request.headers, message_type, False, detectorName) upsert_doc_to_elastic( elastic_object, message_type, item_body, request_id, index_name ) else: print("unexpected data format") print(new_content_part) return added_content