示例#1
0
def handler(event, context):
    job_id = event["ExecutionName"]
    deletion_items = get_deletion_queue()
    manifests_partitions = []
    data_mappers = get_data_mappers()
    total_queries = 0
    for data_mapper in data_mappers:
        query_executor = data_mapper["QueryExecutor"]
        if query_executor == "athena":
            queries = generate_athena_queries(data_mapper, deletion_items, job_id)
            if len(queries) > 0:
                manifests_partitions.append([job_id, data_mapper["DataMapperId"]])
        else:
            raise NotImplementedError(
                "Unsupported data mapper query executor: '{}'".format(query_executor)
            )

        batch_sqs_msgs(queue, queries)
        total_queries += len(queries)
    write_partitions(manifests_partitions)
    return {
        "GeneratedQueries": total_queries,
        "DeletionQueueSize": len(deletion_items),
        "Manifests": [
            "s3://{}/{}".format(
                manifests_bucket_name,
                MANIFEST_KEY.format(
                    job_id=partition_tuple[0], data_mapper_id=partition_tuple[1]
                ),
            )
            for partition_tuple in manifests_partitions
        ],
    }
def handler(event, context):
    query_id = event["QueryId"]
    results = paginate(athena,
                       athena.get_query_results, ["ResultSet.Rows"],
                       QueryExecutionId=query_id)
    rows = [result for result in results]
    header_row = rows.pop(0)
    path_field_index = next((index
                             for (index, d) in enumerate(header_row["Data"])
                             if d["VarCharValue"] == "$path"), None)

    paths = [row["Data"][path_field_index]["VarCharValue"] for row in rows]
    messages = []
    for p in paths:
        msg = {
            "JobId": event["JobId"],
            "Object": p,
            "Columns": event["Columns"],
            "RoleArn": event.get("RoleArn", None),
            "DeleteOldVersions": event.get("DeleteOldVersions", True),
        }
        messages.append({k: v for k, v in msg.items() if v is not None})

    batch_sqs_msgs(queue, messages)

    return paths
示例#3
0
def handler(event, context):
    query_id = event["QueryId"]
    results = paginate(athena,
                       athena.get_query_results, ["ResultSet.Rows"],
                       QueryExecutionId=query_id)
    rows = [result for result in results]
    header_row = rows.pop(0)
    path_field_index = next(
        (index for (index, d) in enumerate(header_row["Data"])
         if d["VarCharValue"] == "$path"),
        None,
    )

    paths = [row["Data"][path_field_index]["VarCharValue"] for row in rows]
    messages = []
    for p in paths:
        msg = {
            "AllFiles": event["AllFiles"],
            "JobId": event["JobId"],
            "Object": p,
            "QueryBucket": event["Bucket"],
            "QueryKey": event["Key"],
            "RoleArn": event.get("RoleArn", None),
            "DeleteOldVersions": event.get("DeleteOldVersions", True),
            "Format": event.get("Format"),
        }
        messages.append({k: v for k, v in msg.items() if v is not None})
    btached_msgs = [
        messages[i:i + NUM_OF_MESSAGES_IN_BATCH]
        for i in range(0, len(messages), NUM_OF_MESSAGES_IN_BATCH)
    ]
    for batch in btached_msgs:
        batch_sqs_msgs(queue, batch)

    return None
def test_it_sets_message_group_id_where_queue_is_fifo():
    queue = MagicMock()
    queue.attributes = {"FifoQueue": True}
    msgs = [1]
    batch_sqs_msgs(queue, msgs)
    for call in queue.send_messages.call_args_list:
        args, kwargs = call
        for msg in kwargs['Entries']:
            assert "MessageGroupId" in msg
def test_it_passes_through_queue_args():
    queue = MagicMock()
    queue.attributes = {}
    msgs = [1]
    batch_sqs_msgs(queue, msgs, DelaySeconds=60)
    queue.send_messages.assert_any_call(Entries=[{
        "DelaySeconds": 60,
        "Id": ANY,
        "MessageBody": ANY,
    }])
def handler(event, context):
    deletion_items = get_deletion_queue(event['ExecutionName'])
    for data_mapper in get_data_mappers():
        query_executor = data_mapper["QueryExecutor"]
        if query_executor == "athena":
            queries = generate_athena_queries(data_mapper, deletion_items)
        else:
            raise NotImplementedError("Unsupported data mapper query executor: '{}'".format(query_executor))

        batch_sqs_msgs(queue, queries)
def test_it_batches_msgs():
    queue = MagicMock()
    queue.attributes = {}
    msgs = list(range(0, 15))
    batch_sqs_msgs(queue, msgs)
    queue.send_messages.assert_any_call(Entries=[{
        "Id": ANY,
        "MessageBody": json.dumps(x),
    } for x in range(0, 10)])
    queue.send_messages.assert_any_call(Entries=[{
        "Id": ANY,
        "MessageBody": json.dumps(x),
    } for x in range(10, 15)])