Exemplo n.º 1
0
def emit_deletion_event(message_body, stats):
    job_id = message_body["JobId"]
    event_data = {
        "Statistics": stats,
        "Object": message_body["Object"],
    }
    emit_event(job_id, "ObjectUpdated", event_data, get_emitter_id())
Exemplo n.º 2
0
def process_job(job):
    job_id = job["Id"]
    state = {
        k: job[k]
        for k in [
            "AthenaConcurrencyLimit", "DeletionTasksMaxNumber",
            "ForgetQueueWaitSeconds", "Id", "QueryExecutionWaitSeconds",
            "QueryQueueWaitSeconds"
        ]
    }

    try:
        client.start_execution(stateMachineArn=state_machine_arn,
                               name=job_id,
                               input=json.dumps(state, cls=DecimalEncoder))
    except client.exceptions.ExecutionAlreadyExists:
        logger.warning("Execution %s already exists", job_id)
    except (ClientError, ValueError) as e:
        emit_event(
            job_id, "Exception", {
                "Error":
                "ExecutionFailure",
                "Cause":
                "Unable to start StepFunction execution: {}".format(str(e))
            }, "StreamProcessor")
Exemplo n.º 3
0
def emit_failure_event(message_body, err_message, event_name):
    json_body = json.loads(message_body)
    job_id = json_body.get("JobId")
    if not job_id:
        raise ValueError("Message missing Job ID")
    event_data = {
        "Error": err_message,
        'Message': json_body,
    }
    emit_event(job_id, event_name, event_data, get_emitter_id())
Exemplo n.º 4
0
def test_it_provides_defaults(mock_table):
    emit_event("job123", "event_name", "data")
    mock_table.put_item.assert_called_with(
        Item={
            "Id": "job123",
            "Sk": mock.ANY,
            "Type": "JobEvent",
            "EventName": "event_name",
            "EventData": "data",
            "EmitterId": "1234",
            "CreatedAt": mock.ANY,
            "Expires": mock.ANY,
        })
Exemplo n.º 5
0
def test_it_writes_events_to_ddb(mock_table):
    emit_event("job123", "event_name", "data", "emitter123", 123)
    mock_table.put_item.assert_called_with(
        Item={
            "Id": "job123",
            "Sk": "123000#1234",  # gets converted to microseconds
            "Type": "JobEvent",
            "EventName": "event_name",
            "EventData": "data",
            "EmitterId": "emitter123",
            "CreatedAt": 123,
            "Expires": mock.ANY,
        })
def handler(event, context):
    records = event["Records"]
    new_jobs = [
        deserialize_item(r["dynamodb"]["NewImage"]) for r in records
        if is_record_type(r, "Job") and is_operation(r, "INSERT")
    ]
    events = [
        deserialize_item(r["dynamodb"]["NewImage"]) for r in records
        if is_record_type(r, "JobEvent") and is_operation(r, "INSERT")
    ]
    grouped_events = groupby(sorted(events, key=itemgetter("Id")),
                             key=itemgetter("Id"))

    for job in new_jobs:
        process_job(job)

    for job_id, group in grouped_events:
        group = [i for i in group]
        update_stats(job_id, group)
        updated_job = update_status(job_id, group)
        # Perform cleanup if required
        if (updated_job and updated_job.get("JobStatus")
                == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS"):
            try:
                clear_deletion_queue(updated_job)
                emit_event(job_id, "CleanupSucceeded", utc_timestamp(),
                           "StreamProcessor")
            except Exception as e:
                emit_event(
                    job_id,
                    "CleanupFailed",
                    {
                        "Error":
                        "Unable to clear deletion queue: {}".format(str(e))
                    },
                    "StreamProcessor",
                )
def handler(event, context):
    records = event["Records"]
    new_jobs = get_records(records, "Job", "INSERT")
    deleted_jobs = get_records(records, "Job", "REMOVE", new_image=False)
    events = get_records(records, "JobEvent", "INSERT")
    grouped_events = groupby(sorted(events, key=itemgetter("Id")),
                             key=itemgetter("Id"))
    for job in new_jobs:
        process_job(job)

    for job in deleted_jobs:
        cleanup_manifests(job)

    for job_id, group in grouped_events:
        group = [i for i in group]
        update_stats(job_id, group)
        updated_job = update_status(job_id, group)
        # Perform cleanup if required
        if (updated_job and updated_job.get("JobStatus")
                == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS"):
            try:
                clear_deletion_queue(updated_job)
                emit_event(job_id, "CleanupSucceeded", utc_timestamp(),
                           "StreamProcessor")
            except Exception as e:
                emit_event(
                    job_id,
                    "CleanupFailed",
                    {
                        "Error":
                        "Unable to clear deletion queue: {}".format(str(e))
                    },
                    "StreamProcessor",
                )
        elif updated_job and updated_job.get(
                "JobStatus") in skip_cleanup_states:
            emit_event(job_id, "CleanupSkipped", utc_timestamp(),
                       "StreamProcessor")
Exemplo n.º 8
0
def handler(event, context):
    job_id = event["JobId"]
    event_name = event["EventName"]
    event_data = event["EventData"]
    emitter_id = event.get("EmitterId", str(uuid4()))
    emit_event(job_id, event_name, event_data, emitter_id)