def handler(event, context): job_id = event["ExecutionName"] deletion_items = get_deletion_queue() manifests_partitions = [] data_mappers = get_data_mappers() total_queries = 0 for data_mapper in data_mappers: query_executor = data_mapper["QueryExecutor"] if query_executor == "athena": queries = generate_athena_queries(data_mapper, deletion_items, job_id) if len(queries) > 0: manifests_partitions.append([job_id, data_mapper["DataMapperId"]]) else: raise NotImplementedError( "Unsupported data mapper query executor: '{}'".format(query_executor) ) batch_sqs_msgs(queue, queries) total_queries += len(queries) write_partitions(manifests_partitions) return { "GeneratedQueries": total_queries, "DeletionQueueSize": len(deletion_items), "Manifests": [ "s3://{}/{}".format( manifests_bucket_name, MANIFEST_KEY.format( job_id=partition_tuple[0], data_mapper_id=partition_tuple[1] ), ) for partition_tuple in manifests_partitions ], }
def handler(event, context): query_id = event["QueryId"] results = paginate(athena, athena.get_query_results, ["ResultSet.Rows"], QueryExecutionId=query_id) rows = [result for result in results] header_row = rows.pop(0) path_field_index = next((index for (index, d) in enumerate(header_row["Data"]) if d["VarCharValue"] == "$path"), None) paths = [row["Data"][path_field_index]["VarCharValue"] for row in rows] messages = [] for p in paths: msg = { "JobId": event["JobId"], "Object": p, "Columns": event["Columns"], "RoleArn": event.get("RoleArn", None), "DeleteOldVersions": event.get("DeleteOldVersions", True), } messages.append({k: v for k, v in msg.items() if v is not None}) batch_sqs_msgs(queue, messages) return paths
def handler(event, context): query_id = event["QueryId"] results = paginate(athena, athena.get_query_results, ["ResultSet.Rows"], QueryExecutionId=query_id) rows = [result for result in results] header_row = rows.pop(0) path_field_index = next( (index for (index, d) in enumerate(header_row["Data"]) if d["VarCharValue"] == "$path"), None, ) paths = [row["Data"][path_field_index]["VarCharValue"] for row in rows] messages = [] for p in paths: msg = { "AllFiles": event["AllFiles"], "JobId": event["JobId"], "Object": p, "QueryBucket": event["Bucket"], "QueryKey": event["Key"], "RoleArn": event.get("RoleArn", None), "DeleteOldVersions": event.get("DeleteOldVersions", True), "Format": event.get("Format"), } messages.append({k: v for k, v in msg.items() if v is not None}) btached_msgs = [ messages[i:i + NUM_OF_MESSAGES_IN_BATCH] for i in range(0, len(messages), NUM_OF_MESSAGES_IN_BATCH) ] for batch in btached_msgs: batch_sqs_msgs(queue, batch) return None
def test_it_sets_message_group_id_where_queue_is_fifo(): queue = MagicMock() queue.attributes = {"FifoQueue": True} msgs = [1] batch_sqs_msgs(queue, msgs) for call in queue.send_messages.call_args_list: args, kwargs = call for msg in kwargs['Entries']: assert "MessageGroupId" in msg
def test_it_passes_through_queue_args(): queue = MagicMock() queue.attributes = {} msgs = [1] batch_sqs_msgs(queue, msgs, DelaySeconds=60) queue.send_messages.assert_any_call(Entries=[{ "DelaySeconds": 60, "Id": ANY, "MessageBody": ANY, }])
def handler(event, context): deletion_items = get_deletion_queue(event['ExecutionName']) for data_mapper in get_data_mappers(): query_executor = data_mapper["QueryExecutor"] if query_executor == "athena": queries = generate_athena_queries(data_mapper, deletion_items) else: raise NotImplementedError("Unsupported data mapper query executor: '{}'".format(query_executor)) batch_sqs_msgs(queue, queries)
def test_it_batches_msgs(): queue = MagicMock() queue.attributes = {} msgs = list(range(0, 15)) batch_sqs_msgs(queue, msgs) queue.send_messages.assert_any_call(Entries=[{ "Id": ANY, "MessageBody": json.dumps(x), } for x in range(0, 10)]) queue.send_messages.assert_any_call(Entries=[{ "Id": ANY, "MessageBody": json.dumps(x), } for x in range(10, 15)])