def __init__(self, path, storage=None): from datetime import datetime self._descriptor_path = os.path.join(path, "descriptor.json") with open(self._descriptor_path, "r") as f: self.descriptor_json = f.read() self._descriptor = json.loads(self.descriptor_json) self._powerlog_path = os.path.join(path, "power.log") with open(self._powerlog_path, "r") as f: self._log = f.read() api_key_str = self._descriptor["event"]["headers"]["X-Api-Key"] self._api_key = APIKey.objects.get_or_create(api_key=api_key_str, defaults={ "full_name": "Test Client", "email": "*****@*****.**", "website": "https://example.org", })[0] auth_token_str = self._descriptor["event"]["headers"][ "Authorization"].split()[1] self._auth_token = AuthToken.objects.get_or_create( key=auth_token_str, creation_apikey=self._api_key)[0] self._api_key.tokens.add(self._auth_token) timestamp_str = self._descriptor["upload_metadata"]["match_start"][ 0:16] self._timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M") self._shortid = self._descriptor["shortid"] if storage: key = _generate_upload_key(self._timestamp, self._shortid) os.makedirs(os.path.dirname(storage.path(key)), exist_ok=True) with storage.open(key, mode="w") as log_file: log_file.write(self._log) self._reason = None self._delete_was_called = False
def __init__(self, path, storage=None): self._descriptor_path = os.path.join(path, "descriptor.json") self._descriptor = json.load(open(self._descriptor_path)) self._powerlog_path = os.path.join(path, "power.log") self._log = open(self._powerlog_path).read() api_key_str = self._descriptor["gateway_headers"]["X-Api-Key"] self._api_key = APIKey.objects.get_or_create(api_key=api_key_str, defaults={ "full_name": "Test Client", "email": "*****@*****.**", "website": "https://example.org", })[0] auth_token_str = self._descriptor["gateway_headers"]["Authorization"].split()[1] self._auth_token = AuthToken.objects.get_or_create( key=auth_token_str, creation_apikey=self._api_key )[0] self._api_key.tokens.add(self._auth_token) timestamp_str = self._descriptor["upload_metadata"]["match_start"][0:16] self._timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M") self._shortid = self._descriptor["shortid"] if storage: key = _generate_upload_key(self._timestamp, self._shortid) os.makedirs(os.path.dirname(storage.path(key)), exist_ok=True) with storage.open(key, mode="w") as log_file: log_file.write(self._log) self._upload_event_log_bucket = None self._upload_event_log_key = None self._reason = None self._delete_was_called = False
def process_raw_upload(raw_upload, reprocess=False, log_group_name="", log_stream_name=""): """ Generic processing logic for raw log files. """ from ..games.serializers import UploadEventSerializer logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload") obj, created = UploadEvent.objects.get_or_create( shortid=raw_upload.shortid, defaults={"status": UploadEventStatus.PENDING}) logger.debug("UploadEvent Created: %r", created) if not created and not reprocess: # This can occur two ways: # 1) The client sends the PUT request twice # 2) Re-enabling processing queues an upload to the stream and the S3 event fires logger.info("Invocation is an instance of double_put. Exiting Early.") influx_metric("raw_log_double_put", { "count": 1, "shortid": raw_upload.shortid, "key": raw_upload.log_key }) return obj.log_group_name = log_group_name obj.log_stream_name = log_stream_name descriptor = raw_upload.descriptor new_log_key = _generate_upload_key(raw_upload.timestamp, raw_upload.shortid) new_bucket = settings.AWS_STORAGE_BUCKET_NAME # Move power.log to the other bucket if it's needed raw_upload.prepare_upload_event_log_location(new_bucket, new_log_key) upload_metadata = descriptor["upload_metadata"] event = descriptor["event"] headers = {k.lower(): v for k, v in event.get("headers", {}).items()} user_agent = headers.get("user-agent", "") logger.debug("User Agent: %r", user_agent) obj.file = new_log_key obj.descriptor_data = raw_upload.descriptor_json obj.upload_ip = event.get("requestContext", {}).get("identity", {}).get("sourceIp", "") obj.canary = "canary" in upload_metadata and upload_metadata["canary"] obj.user_agent = user_agent[:100] obj.status = UploadEventStatus.VALIDATING try: if not obj.user_agent: raise ValidationError("Missing User-Agent header") header = headers.get("authorization", "") token = auth_token_from_header(header) if not token: msg = "Malformed or Invalid Authorization Header: %r" % (header) logger.error(msg) raise ValidationError(msg) obj.token = token api_key = headers.get("x-api-key", "") if not api_key: raise ValidationError( "Missing X-Api-Key header. Please contact us for an API key.") obj.api_key = LegacyAPIKey.objects.get(api_key=api_key) except (ValidationError, LegacyAPIKey.DoesNotExist) as e: logger.error("Exception: %r", e) obj.status = UploadEventStatus.VALIDATION_ERROR obj.error = e obj.save() logger.info("All state successfully saved to UploadEvent with id: %r", obj.id) # If we get here, now everything is in the DB. # Clear out the raw upload so it doesn't clog up the pipeline. raw_upload.delete() logger.info("Deleting objects from S3 succeeded.") logger.info( "Validation Error will be raised and we will not proceed to processing" ) raise else: if "test_data" in upload_metadata or obj.token.test_data: logger.debug("Upload Event Is TEST DATA") if obj.token.test_data: # When token.test_data = True, then all UploadEvents are test_data = True obj.test_data = True # Only old clients released during beta do not include a user agent is_unsupported_client = obj.user_agent.startswith( settings.UPLOAD_USER_AGENT_BLACKLIST) if is_unsupported_client: logger.info( "No UA provided. Marking as unsupported (client too old).") influx_metric( "upload_from_unsupported_client", { "count": 1, "shortid": raw_upload.shortid, "api_key": obj.api_key.full_name }) obj.status = UploadEventStatus.UNSUPPORTED_CLIENT obj.save() logger.debug("Saved: UploadEvent.id = %r", obj.id) # If we get here, now everything is in the DB. raw_upload.delete() logger.debug("Deleting objects from S3 succeeded") if is_unsupported_client: # Wait until after we have deleted the raw_upload to exit # But do not start processing if it's an unsupported client logger.info("Exiting Without Processing - Unsupported Client") return serializer = UploadEventSerializer(obj, data=upload_metadata) if serializer.is_valid(): logger.debug("UploadEvent passed serializer validation") obj.status = UploadEventStatus.PROCESSING serializer.save() logger.debug("Starting GameReplay processing for UploadEvent") obj.process() else: obj.error = serializer.errors logger.info("UploadEvent failed validation with errors: %r", obj.error) obj.status = UploadEventStatus.VALIDATION_ERROR obj.save() logger.debug("Done")
def process_raw_upload(raw_upload): """A method for processing a raw upload in S3. This will usually be invoked by process_s3_create_handler, however it can also be invoked when a raw upload is queued for reprocessing via SNS. """ logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload") logger.info("Starting processing for RawUpload: %s", str(raw_upload)) descriptor = raw_upload.descriptor new_key = _generate_upload_key(raw_upload.timestamp, raw_upload.shortid) new_bucket = settings.AWS_STORAGE_BUCKET_NAME # First we copy the log to the proper location copy_source = "%s/%s" % (raw_upload.bucket, raw_upload.log_key) logger.info("*** COPY RAW LOG TO NEW LOCATION ***") logger.info("SOURCE: %s" % copy_source) logger.info("DESTINATION: %s/%s" % (new_bucket, new_key)) aws.S3.copy_object( Bucket=new_bucket, Key=new_key, CopySource=copy_source, ) # Then we build the request and send it to DRF # If "file" is a string, DRF will interpret as a S3 Key upload_metadata = descriptor["upload_metadata"] upload_metadata["shortid"] = descriptor["shortid"] upload_metadata["file"] = new_key upload_metadata["type"] = int(UploadEventType.POWER_LOG) gateway_headers = descriptor["gateway_headers"] headers = { "HTTP_X_FORWARDED_FOR": descriptor["source_ip"], "HTTP_AUTHORIZATION": gateway_headers["Authorization"], "HTTP_X_API_KEY": gateway_headers["X-Api-Key"], "format": "json", } path = descriptor["event"]["path"] request = emulate_api_request(path, upload_metadata, headers) try: result = create_upload_event_from_request(request) except Exception as e: logger.info("Create Upload Event Failed!!") # If DRF fails: delete the copy of the log to not leave orphans around. aws.S3.delete_object(Bucket=new_bucket, Key=new_key) # Now move the failed upload into the failed location for easier inspection. raw_upload.make_failed(str(e)) logger.info("RawUpload has been marked failed: %s", str(raw_upload)) raise else: logger.info("Create Upload Event Success - RawUpload will be deleted.") # If DRF returns success, then we delete the raw_upload raw_upload.delete() logger.info("Processing RawUpload Complete.") return result
def process_raw_upload(raw_upload, reprocess=False, log_group_name="", log_stream_name=""): """ Generic processing logic for raw log files. """ logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload") obj, created = UploadEvent.objects.get_or_create( shortid=raw_upload.shortid, defaults={"status": UploadEventStatus.PENDING} ) logger.debug("UploadEvent Created: %r", created) if not created and not reprocess: # This can occur two ways: # 1) The client sends the PUT request twice # 2) Re-enabling processing queues an upload to the stream and the S3 event fires logger.info("Invocation is an instance of double_put. Exiting Early.") influx_metric("raw_log_double_put", { "count": 1, "shortid": raw_upload.shortid, "key": raw_upload.log_key }) return obj.log_group_name = log_group_name obj.log_stream_name = log_stream_name descriptor = raw_upload.descriptor new_log_key = _generate_upload_key(raw_upload.timestamp, raw_upload.shortid) new_bucket = settings.AWS_STORAGE_BUCKET_NAME # Move power.log/descriptor.json to the other bucket if it's needed raw_upload.prepare_upload_event_log_location(new_bucket, new_log_key) upload_metadata = descriptor["upload_metadata"] gateway_headers = descriptor["gateway_headers"] if "User-Agent" in gateway_headers: logger.debug("User Agent: %s", gateway_headers["User-Agent"]) else: logger.debug("User Agent: UNKNOWN") obj.file = new_log_key obj.descriptor_data = json.dumps(descriptor) obj.upload_ip = descriptor["source_ip"] obj.canary = "canary" in upload_metadata and upload_metadata["canary"] obj.user_agent = gateway_headers.get("User-Agent", "")[:100] obj.status = UploadEventStatus.VALIDATING try: header = gateway_headers.get("Authorization", "") token = AuthToken.get_token_from_header(header) if not token: msg = "Malformed or Invalid Authorization Header: %r" % (header) logger.error(msg) raise ValidationError(msg) obj.token = token api_key = gateway_headers.get("X-Api-Key", "") if not api_key: raise ValidationError("Missing X-Api-Key header. Please contact us for an API key.") obj.api_key = APIKey.objects.get(api_key=api_key) except (ValidationError, APIKey.DoesNotExist) as e: logger.error("Exception: %r", e) obj.status = UploadEventStatus.VALIDATION_ERROR obj.error = e obj.save() logger.info("All state successfully saved to UploadEvent with id: %r", obj.id) # If we get here, now everything is in the DB. # Clear out the raw upload so it doesn't clog up the pipeline. raw_upload.delete() logger.info("Deleting objects from S3 succeeded.") logger.info("Validation Error will be raised and we will not proceed to processing") raise else: if "test_data" in upload_metadata or obj.token.test_data: logger.debug("Upload Event Is TEST DATA") if obj.token.test_data: # When token.test_data = True, then all UploadEvents are test_data = True obj.test_data = True # Only old clients released during beta do not include a user agent is_unsupported_client = not obj.user_agent if is_unsupported_client: logger.info("No UA provided. Marking as unsupported (client too old).") influx_metric("upload_from_unsupported_client", { "count": 1, "shortid": raw_upload.shortid, "api_key": obj.api_key.full_name }) obj.status = UploadEventStatus.UNSUPPORTED_CLIENT obj.save() logger.debug("Saved: UploadEvent.id = %r", obj.id) # If we get here, now everything is in the DB. raw_upload.delete() logger.debug("Deleting objects from S3 succeeded") if is_unsupported_client: # Wait until after we have deleted the raw_upload to exit # But do not start processing if it's an unsupported client logger.info("Exiting Without Processing - Unsupported Client") return serializer = UploadEventSerializer(obj, data=upload_metadata) if serializer.is_valid(): logger.debug("UploadEvent passed serializer validation") obj.status = UploadEventStatus.PROCESSING serializer.save() logger.debug("Starting GameReplay processing for UploadEvent") obj.process() else: obj.error = serializer.errors logger.info("UploadEvent failed validation with errors: %r", obj.error) obj.status = UploadEventStatus.VALIDATION_ERROR obj.save() logger.debug("Done")
def process_raw_upload(raw_upload, reprocess=False, log_group_name="", log_stream_name=""): """ Generic processing logic for raw log files. """ logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload") obj, created = UploadEvent.objects.get_or_create( shortid=raw_upload.shortid, defaults={"status": UploadEventStatus.PENDING}) logger.info("The created flag for this upload event is: %r", created) if not created and not reprocess: # This can occur two ways: # 1) The client sends the PUT request twice # 2) Re-enabling processing queues an upload to the stream and the S3 event fires logger.info("Invocation is an instance of double_put. Exiting Early.") influx_metric("raw_log_double_put", { "count": 1, "shortid": raw_upload.shortid, "key": raw_upload.log_key }) return obj.log_group_name = log_group_name obj.log_stream_name = log_stream_name descriptor = raw_upload.descriptor new_log_key = _generate_upload_key(raw_upload.timestamp, raw_upload.shortid) new_descriptor_key = _generate_upload_key(raw_upload.timestamp, raw_upload.shortid, "descriptor.json") new_bucket = settings.AWS_STORAGE_BUCKET_NAME # Move power.log/descriptor.json to the other bucket if it's needed raw_upload.prepare_upload_event_log_location(new_bucket, new_log_key, new_descriptor_key) upload_metadata = descriptor["upload_metadata"] gateway_headers = descriptor["gateway_headers"] if "User-Agent" in gateway_headers: logger.info("The uploading user agent is: %s", gateway_headers["User-Agent"]) else: logger.info("A User-Agent header was not provided.") obj.file = new_log_key obj.descriptor = new_descriptor_key obj.upload_ip = descriptor["source_ip"] obj.canary = "canary" in upload_metadata and upload_metadata["canary"] obj.user_agent = gateway_headers.get("User-Agent", "")[:100] obj.status = UploadEventStatus.VALIDATING try: header = gateway_headers["Authorization"] token = AuthToken.get_token_from_header(header) if not token: msg = "Malformed or Invalid Authorization Header: %r" % (header) logger.error(msg) raise Exception(msg) obj.token = token obj.api_key = APIKey.objects.get(api_key=gateway_headers["X-Api-Key"]) except Exception as e: logger.error("Exception: %r", e) obj.status = UploadEventStatus.VALIDATION_ERROR obj.error = e obj.save() logger.info("All state successfully saved to UploadEvent with id: %r", obj.id) # If we get here, now everything is in the DB. # Clear out the raw upload so it doesn't clog up the pipeline. raw_upload.delete() logger.info("Deleting objects from S3 succeeded.") logger.info( "Validation Error will be raised and we will not proceed to processing" ) raise else: if "test_data" in upload_metadata or obj.token.test_data: logger.info("Upload Event Is TEST DATA") if obj.token.test_data: # When token.test_data = True, then all UploadEvents are test_data = True obj.test_data = True # Only old clients released during beta do not include a user agent is_unsupported_client = not obj.user_agent if is_unsupported_client: logger.info( "No UA provided. Marking as unsupported (client too old).") influx_metric( "upload_from_unsupported_client", { "count": 1, "shortid": raw_upload.shortid, "api_key": obj.api_key.full_name }) obj.status = UploadEventStatus.UNSUPPORTED_CLIENT obj.save() logger.info("All state successfully saved to UploadEvent with id: %r", obj.id) # If we get here, now everything is in the DB. raw_upload.delete() logger.info("Deleting objects from S3 succeeded") if is_unsupported_client: # Wait until after we have deleted the raw_upload to exit # But do not start processing if it's an unsupported client logger.info("Exiting Without Processing - Unsupported Client") return serializer = UploadEventSerializer(obj, data=upload_metadata) if serializer.is_valid(): logger.info("UploadEvent passed serializer validation") obj.status = UploadEventStatus.PROCESSING serializer.save() logger.info("Starting GameReplay processing for UploadEvent") obj.process() else: obj.error = serializer.errors logger.info("UploadEvent failed validation with errors: %r", obj.error) obj.status = UploadEventStatus.VALIDATION_ERROR obj.save() logger.info("RawUpload event processing is complete")