def test_it_assumes_role_for_session_where_given(mock_sts, mock_boto):
    mock_sts.assume_role.return_value = {
        "Credentials": {
            "AccessKeyId": "a",
            "SecretAccessKey": "b",
            "SessionToken": "c",
        }
    }
    get_session(assume_role_arn="arn:aws:iam:accountid::role/rolename")
    mock_sts.assume_role.assert_called_with(RoleArn="arn:aws:iam:accountid::role/rolename", RoleSessionName=ANY)
    mock_boto.session.Session.assert_called_with(
        aws_access_key_id="a",
        aws_secret_access_key="b",
        aws_session_token="c",
    )
Пример #2
0
def _aggregate_stats(events):
    stats = Counter({})

    for event in events:
        event_name = event["EventName"]
        event_data = event.get("EventData", {})
        if event_name in ["QuerySucceeded", "QueryFailed"]:
            stats += Counter({
                "TotalQueryCount":
                1,
                "TotalQuerySucceededCount":
                1 if event_name == "QuerySucceeded" else 0,
                "TotalQueryFailedCount":
                1 if event_name == "QueryFailed" else 0,
                "TotalQueryScannedInBytes":
                event_data.get("Statistics", {}).get("DataScannedInBytes", 0),
                "TotalQueryTimeInMillis":
                event_data.get("Statistics",
                               {}).get("EngineExecutionTimeInMillis", 0),
            })
        if event_name in [
                "ObjectUpdated", "ObjectUpdateFailed", "ObjectRollbackFailed"
        ]:
            stats += Counter({
                "TotalObjectUpdatedCount":
                1 if event_name == "ObjectUpdated" else 0,
                "TotalObjectUpdateFailedCount":
                1 if event_name == "ObjectUpdateFailed" else 0,
                "TotalObjectRollbackFailedCount":
                1 if event_name == "ObjectRollbackFailed" else 0,
            })
            if event_name == "ObjectUpdateFailed":
                # TODO:: this can only work when deleteOldVersion flag is True for all involved data mappers
                try:
                    data_access_role_arn = event_data["Message"].get(
                        "RoleArn", getenv("DataAccessRoleArn"))
                    session = get_session(data_access_role_arn)
                    client = session.client("s3")
                    s3_object = event_data["Message"]["Object"]
                    input_bucket, input_key = parse_s3_url(s3_object)
                    revert_last(client, input_bucket, input_key)
                except Exception:
                    logger.exception("Unable to revert last")
                    try:
                        purge_queue(deletion_queue_url)
                    except Exception:
                        logger.exception("Unable to purge queue")

    return stats
Пример #3
0
def execute(queue_url, message_body, receipt_handle):
    logger.info("Message received")
    queue = get_queue(queue_url)
    msg = queue.Message(receipt_handle)
    try:
        # Parse and validate incoming message
        validate_message(message_body)
        body = json.loads(message_body)
        session = get_session(body.get("RoleArn"))
        client = session.client("s3")
        cols, object_path, job_id = itemgetter('Columns', 'Object',
                                               'JobId')(body)
        input_bucket, input_key = parse_s3_url(object_path)
        validate_bucket_versioning(client, input_bucket)
        creds = session.get_credentials().get_frozen_credentials()
        s3 = s3fs.S3FileSystem(key=creds.access_key,
                               secret=creds.secret_key,
                               token=creds.token,
                               default_cache_type='none',
                               requester_pays=True,
                               default_fill_cache=False,
                               version_aware=True)
        # Download the object in-memory and convert to PyArrow NativeFile
        logger.info("Downloading and opening %s object in-memory", object_path)
        with s3.open(object_path, "rb") as f:
            source_version = f.version_id
            logger.info("Using object version %s as source", source_version)
            infile = load_parquet(f)
            # Write new file in-memory
            logger.info("Generating new parquet file without matches")
            out_sink, stats = delete_matches_from_file(infile, cols)
        if stats["DeletedRows"] == 0:
            raise ValueError(
                "The object {} was processed successfully but no rows required deletion"
                .format(object_path))
        with pa.BufferReader(out_sink.getvalue()) as output_buf:
            new_version = save(s3, client, output_buf, input_bucket, input_key,
                               source_version)
            logger.info("New object version: %s", new_version)
            verify_object_versions_integrity(client, input_bucket, input_key,
                                             source_version, new_version)
        if body.get("DeleteOldVersions"):
            logger.info(
                "Deleting object {} versions older than version {}".format(
                    input_key, new_version))
            delete_old_versions(client, input_bucket, input_key, new_version)
        msg.delete()
        emit_deletion_event(body, stats)
    except (KeyError, ArrowException) as e:
        err_message = "Parquet processing error: {}".format(str(e))
        handle_error(msg, message_body, err_message)
    except IOError as e:
        err_message = "Unable to retrieve object: {}".format(str(e))
        handle_error(msg, message_body, err_message)
    except MemoryError as e:
        err_message = "Insufficient memory to work on object: {}".format(
            str(e))
        handle_error(msg, message_body, err_message)
    except ClientError as e:
        err_message = "ClientError: {}".format(str(e))
        if e.operation_name == "PutObjectAcl":
            err_message += ". Redacted object uploaded successfully but unable to restore WRITE ACL"
        if e.operation_name == "ListObjectVersions":
            err_message += ". Could not verify redacted object version integrity"
        handle_error(msg, message_body, err_message)
    except ValueError as e:
        err_message = "Unprocessable message: {}".format(str(e))
        handle_error(msg, message_body, err_message)
    except DeleteOldVersionsError as e:
        err_message = "Unable to delete previous versions: {}".format(str(e))
        handle_error(msg, message_body, err_message)
    except IntegrityCheckFailedError as e:
        err_description, client, bucket, key, version_id = e.args
        err_message = "Object version integrity check failed: {}".format(
            err_description)
        handle_error(msg, message_body, err_message)
        rollback_object_version(
            client,
            bucket,
            key,
            version_id,
            on_error=lambda err: handle_error(None, "{}", err,
                                              "ObjectRollbackFailed", False))
    except Exception as e:
        err_message = "Unknown error during message processing: {}".format(
            str(e))
        handle_error(msg, message_body, err_message)
def test_it_returns_default_session(mock_sts):
    resp = get_session()
    mock_sts.assume_role.assert_not_called()
    assert isinstance(resp, Session)
Пример #5
0
import boto3
import boto_utils
from aws_requests_auth.aws_auth import AWSRequestsAuth
from elasticsearch import Elasticsearch, RequestsHttpConnection
import configparser

config = configparser.ConfigParser()
config.read('../../config.ini')
aws_config = config['AWS']

session = boto_utils.get_session()

credentials = session.get_credentials().get_frozen_credentials()

awsauth = AWSRequestsAuth(aws_access_key=credentials.access_key,
                          aws_secret_access_key=credentials.secret_key,
                          aws_token=credentials.token,
                          aws_host=aws_config['es_host'],
                          aws_region=session.region_name,
                          aws_service='es')

# use the requests connection_class and pass in our custom auth class
es = Elasticsearch(hosts=[{
    'host': aws_config['es_host'],
    'port': 443
}],
                   http_auth=awsauth,
                   use_ssl=True,
                   verify_certs=True,
                   connection_class=RequestsHttpConnection)