示例#1
0
    def trigger_page_out(self, config_data, write_cuboid_key, resource):
        """
        Method to invoke lambda function to page out via data in an SQS message

        Args:
            config_data (dict): Dictionary of configuration dictionaries
            write_cuboid_key (str): Unique write-cuboid to be flushed to S3
            resource (spdb.project.resource.BossResource): resource for the given write cuboid key

        Returns:
            None
        """
        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {"config": config_data,
                    "write_cuboid_key": write_cuboid_key,
                    "lambda-name": "s3_flush",
                    "resource": resource.to_dict()}

        response = sqs.send_message(QueueUrl=self.config["s3_flush_queue"],
                                    MessageBody=json.dumps(msg_data))

        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
            raise SpdbError("Error sending SQS message to trigger page out operation.",
                            ErrorCodes.SPDB_ERROR)

        # Trigger lambda to handle it
        client = boto3.client('lambda', region_name=get_region())

        response = client.invoke(
            FunctionName=self.config["page_out_lambda_function"],
            InvocationType='Event',
            Payload=json.dumps(msg_data).encode())
    def verify_queue(self):
        client = boto3.client('sqs', region_name=get_region())
        https_status_code, new_message_num = self.check_queue_count(client)

        if new_message_num is None:
            self.log.error("sqs_watcherd get_queue_attributes failed. Response HTTPSStatusCode: " + str(
                https_status_code))
            return

        self.old_message_num = self.message_num
        self.message_num = new_message_num

        if ((self.message_num != 0) and (self.message_num == self.old_message_num)):
            client = boto3.client('lambda', region_name=get_region())
            lambdas_to_invoke = min(self.message_num, MAX_LAMBDAS_TO_INVOKE)
            self.log.info("kicking off {} lambdas".format(lambdas_to_invoke))
            for i in range(lambdas_to_invoke):
                response = client.invoke(
                    FunctionName=self.lambda_data["config"]["object_store_config"]["page_out_lambda_function"],
                    InvocationType='Event',
                    Payload=json.dumps(self.lambda_data).encode())
                if response['ResponseMetadata']['HTTPStatusCode'] != 202:
                    self.log.error("sqs_watcherd invoke_lambda failed. Response HTTPSStatusCode: " + str(
                        response['ResponseMetadata']['HTTPStatusCode']))
            return lambdas_to_invoke
示例#3
0
    def verify_queue(self):
        client = boto3.client('sqs', region_name=get_region())
        https_status_code, new_message_num = self.check_queue_count(client)

        if new_message_num is None:
            self.log.error(
                "sqs_watcherd get_queue_attributes failed. Response HTTPSStatusCode: "
                + str(https_status_code))
            return

        self.old_message_num = self.message_num
        self.message_num = new_message_num

        if ((self.message_num != 0)
                and (self.message_num == self.old_message_num)):
            client = boto3.client('lambda', region_name=get_region())
            lambdas_to_invoke = min(self.message_num, MAX_LAMBDAS_TO_INVOKE)
            self.log.info("kicking off {} lambdas".format(lambdas_to_invoke))
            for i in range(lambdas_to_invoke):
                response = client.invoke(
                    FunctionName=self.lambda_data["config"]
                    ["object_store_config"]["page_out_lambda_function"],
                    InvocationType='Event',
                    Payload=json.dumps(self.lambda_data).encode())
                if response['ResponseMetadata']['HTTPStatusCode'] != 202:
                    self.log.error(
                        "sqs_watcherd invoke_lambda failed. Response HTTPSStatusCode: "
                        + str(response['ResponseMetadata']['HTTPStatusCode']))
            return lambdas_to_invoke
示例#4
0
 def __init__(self, pid_file_name, pid_dir="/var/run"):
     super().__init__(pid_file_name, pid_dir)
     self.config = BossConfig()
     self.host = self.config['system']['fqdn']
     self.dead_letter_queue = self.config['aws']['s3-flush-deadletter-queue']
     self.sns_write_locked = self.config['aws']['sns-write-locked']
     self.sqs_client = boto3.client('sqs', region_name=get_region())
     self.sns_client = boto3.client('sns', region_name=get_region())
     self._sp = None
    def test_sqs_watcher_send_message(self):
        """Inject message into queue and test that SqsWatcher kicks off a lambda and writes cuboid to s3."""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(self.resource.get_lookup_key(), 0)
        morton_idx = ndlib.XYZMorton([0, 0, 0])
        t = 0
        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(base_write_cuboid_key, t, morton_idx,
                                                               cube1.to_blosc_by_time_index(t))

        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {"config": self.config_data,
                    "write_cuboid_key": write_cuboid_key,
                    "lambda-name": "s3_flush",
                    "resource": self.resource.to_dict()}

        response = sqs.send_message(QueueUrl=self.object_store_config["s3_flush_queue"],
                                    MessageBody=json.dumps(msg_data))
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200

        watcher = SqsWatcher(self.lambda_data)
        #  verify_queue() needs the be run multiple times to verify that the queue is not changing
        #  only then does it send off a lambda message.
        time.sleep(5)
        watcher.verify_queue()
        time.sleep(5)
        lambdas_invoked = watcher.verify_queue()
        if lambdas_invoked < 1:
            time.sleep(5)
            watcher.verify_queue()
        time.sleep(15)

        client = boto3.client('sqs', region_name=get_region())
        response = client.get_queue_attributes(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            AttributeNames=[
                'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'
            ]
        )
        https_status_code = response['ResponseMetadata']['HTTPStatusCode']
        queue_count = int(response['Attributes']['ApproximateNumberOfMessages'])
        # test that the queue count is now 0
        assert queue_count == 0

        s3 = boto3.client('s3', region_name=get_region())
        objects_list = s3.list_objects(Bucket=self.object_store_config['cuboid_bucket'])
        # tests that bucket has some Contents.
        assert "Contents" in objects_list.keys()
def handler(event, context):
    id_index_table = event['id_index_table']
    s3_index_table = event['s3_index_table']
    id_count_table = event['id_count_table']
    cuboid_bucket = event['cuboid_bucket']

    write_id_index_status = event['write_id_index_status']

    id_index_new_chunk_threshold = (event['id_index_new_chunk_threshold'])

    obj_ind = ObjectIndices(s3_index_table, id_index_table, id_count_table,
                            cuboid_bucket, get_region())

    try:
        for obj_id in event['id_group']:
            obj_ind.write_id_index(id_index_new_chunk_threshold,
                                   event['cuboid_object_key'], obj_id,
                                   event['version'])
        write_id_index_status['done'] = True
    except botocore.exceptions.ClientError as ex:
        # Probably had a throttle or a ConditionCheckFailed.
        print('ClientError caught: {}'.format(ex))
        if int(write_id_index_status['retries_left']) < 1:
            if get_class_name(ex.__class__) in DO_NOT_WRAP_THESE_EXCEPTIONS:
                raise
            msg = '{}: {}'.format(type(ex), ex)
            raise DynamoClientError(msg) from ex
        event['result'] = str(ex)
        prep_for_retry(write_id_index_status)

    return event
示例#7
0
    def put_objects(self, key_list, cube_list, version=0):
        """

        Args:
            key_list (list(str)): A list of object keys to put into the object store
            cube_list (list(bytes)): A list of blosc compressed cuboid data
            version (int): The ID of the version node - Default to 0 until fully implemented, but will eliminate
                           need to do a migration

        Returns:

        """
        s3 = boto3.client('s3', region_name=get_region())

        for key, cube in zip(key_list, cube_list):
            # Append version to key
            key = "{}&{}".format(key, version)

            response = s3.put_object(
                Body=cube,
                Key=key,
                Bucket=self.config["cuboid_bucket"],
            )
            if response['ResponseMetadata']['HTTPStatusCode'] != 200:
                raise SpdbError("Error writing cuboid to S3.",
                                ErrorCodes.OBJECT_STORE_ERROR)
示例#8
0
    def get_objects(self, key_list, version=0):
        """ Method to get multiple objects serially in a loop

        Args:
            key_list (list(str)): A list of object keys to retrieve from the object store
            version (int): The ID of the version node - Default to 0 until fully implemented, but will eliminate
                           need to do a migration

        Returns:
            (list(bytes)): A list of blosc compressed cuboid data

        """
        s3 = boto3.client('s3', region_name=get_region())

        results = []

        for key in key_list:
            # Append version to key
            key = "{}&{}".format(key, version)

            response = s3.get_object(
                Key=key,
                Bucket=self.config["cuboid_bucket"],
            )
            if response['ResponseMetadata']['HTTPStatusCode'] != 200:
                raise SpdbError("Error reading cuboid from S3.",
                                ErrorCodes.OBJECT_STORE_ERROR)

            results.append(response['Body'].read())

        return results
示例#9
0
def handler(event, context):
    """
    Write all ids in a cuboid to the S3 cuboid index so we can quickly
    retrieve the ids containined within a cuboid.

    Args:
        event (dict): Input parameters.  See comment at top for expected contents.
        context (Context): Contains runtime info about the lambda.

    Returns:
        (dict): see Output description at top of file.
    """

    id_index_table = event['config']['object_store_config']['id_index_table']
    s3_index_table = event['config']['object_store_config']['s3_index_table']
    id_count_table = event['config']['object_store_config']['id_count_table']
    cuboid_bucket = event['config']['object_store_config']['cuboid_bucket']

    obj_ind = ObjectIndices(
        s3_index_table, id_index_table, id_count_table, cuboid_bucket, get_region())
    ids_list = obj_ind.write_s3_index(
        event['cuboid_object_key'], event['version'])

    return { 
        'config': event['config'],
        'id_index_step_fcn': event['id_index_step_fcn'],
        'fanout_id_writers_step_fcn': event['fanout_id_writers_step_fcn'],
        'cuboid_object_key': event['cuboid_object_key'],
        'version': event['version'],
        'max_write_id_index_lambdas': event['max_write_id_index_lambdas'],
        'num_ids': len(ids_list),
        'finished': False
    }
示例#10
0
    def get_single_object(self, key, version=0):
        """ Method to get a single object. Used in the lambda page-in function and non-parallelized version

        Args:
            key (list(str)): A list of cached-cuboid keys to retrieve from the object store
            version (int): The ID of the version node - Default to 0 until fully implemented, but will eliminate
                           need to do a migration

        Returns:
            (bytes): A list of blosc compressed cuboid data

        """
        s3 = boto3.client('s3', region_name=get_region())

        # Append version to key
        key = "{}&{}".format(key, version)

        response = s3.get_object(
            Key=key,
            Bucket=self.config["cuboid_bucket"],
        )
        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
            raise SpdbError("Error reading cuboid from S3.",
                            ErrorCodes.OBJECT_STORE_ERROR)

        return response['Body'].read()
示例#11
0
    def remove_sqs_event_source_from_lambda(self, queue_arn, lambda_name):
        """
        Removes an SQS event triggger from the given lambda.

        Args:
            queue_arn (str): Arn of SQS queue that will be the trigger source.
            lambda_name (str): Lambda function name.
        """
        client = boto3.client('lambda', region_name=get_region())
        try:
            resp = client.list_event_source_mappings(EventSourceArn=queue_arn,
                                                     FunctionName=lambda_name)
        except Exception as ex:
            log.error(
                f"Couldn't list event source mappings for {lambda_name}: {ex}")
            return
        for evt in resp['EventSourceMappings']:
            try:
                client.delete_event_source_mapping(UUID=evt['UUID'])
            except client.exceptions.ResourceNotFoundException:
                pass
            except Exception as ex:
                log.error(
                    f"Couldn't remove event source mapping {queue_arn} from {lambda_name}: {ex}"
                )
示例#12
0
    def _delete_cuboid_bucket(self, bucket_name):
        """Method to delete the S3 bucket for cuboid storage"""
        s3 = boto3.resource('s3', region_name=get_region())
        bucket = s3.Bucket(bucket_name)
        for obj in bucket.objects.all():
            obj.delete()

        # Delete bucket
        bucket.delete()
        return bucket
    def tearDownClass(cls):
        try:
            cls.setup_helper.delete_flush_queue(cls.object_store_config["s3_flush_queue"])
        except:
            pass

        lambda_client = boto3.client('lambda', region_name=get_region())
        try:
            lambda_client.delete_function(FunctionName=cls.test_lambda)
        except botocore.exceptions.ClientError:
            pass
示例#14
0
    def _delete_index_table(self, table_name):
        """Method to delete the S3 index table"""

        endpoint_url = None
        if 'LOCAL_DYNAMODB_URL' in os.environ:
            endpoint_url = os.environ['LOCAL_DYNAMODB_URL']

        client = boto3.client('dynamodb',
                              region_name=get_region(),
                              endpoint_url=endpoint_url)
        client.delete_table(TableName=table_name)
    def tearDownClass(cls):
        try:
            cls.setup_helper.delete_flush_queue(
                cls.object_store_config["s3_flush_queue"])
        except:
            pass

        lambda_client = boto3.client('lambda', region_name=get_region())
        try:
            lambda_client.delete_function(FunctionName=cls.test_lambda)
        except botocore.exceptions.ClientError:
            pass
示例#16
0
    def test_add_cuboid_to_index(self):
        """Test method to compute final object key and add to S3"""
        dummy_key = "SLDKFJDSHG&1&1&1&0&0&12"
        os = AWSObjectStore(self.object_store_config)
        os.add_cuboid_to_index(dummy_key)

        # Get item
        dynamodb = boto3.client('dynamodb', region_name=get_region())
        response = dynamodb.get_item(
            TableName=self.object_store_config['s3_index_table'],
            Key={'object-key': {'S': dummy_key},
                 'version-node': {'N': "0"}},
            ReturnConsumedCapacity='NONE'
        )

        assert response['Item']['object-key']['S'] == dummy_key
        assert response['Item']['version-node']['N'] == "0"
        assert response['Item']['ingest-job-hash']['S'] == '1'
        assert response['Item']['ingest-job-range']['S'] == '1&1&0&0'
示例#17
0
    def cuboids_exist(self, key_list, cache_miss_key_idx=None, version=0):
        """
        Method to check if cuboids exist in S3 by checking the S3 Index table.

        Currently versioning is not implemented, so a version of "a" is simply used

        Args:
            key_list (list(str)): A list of cached-cuboid keys to check for existence in the object store
            cache_miss_key_idx (list(int)): A list of ints indexing the keys in key_list that should be checked
            version (int): The ID of the version node - Default to 0 until fully implemented, but will eliminate
                           need to do a migration

        Returns:
            (list(int)), (list(int)): A tuple of 2 lists.  The first is the index into key_list of keys IN S3.  The
            second is the index into key_list of keys not in S3

        """
        if not cache_miss_key_idx:
            cache_miss_key_idx = range(0, len(key_list))

        object_keys = self.cached_cuboid_to_object_keys(key_list)

        # TODO: Should use batch read to speed up
        dynamodb = boto3.client('dynamodb', region_name=get_region())

        s3_key_index = []
        zero_key_index = []
        for idx, key in enumerate(object_keys):
            if idx not in cache_miss_key_idx:
                continue
            response = dynamodb.get_item(
                TableName=self.config['s3_index_table'],
                Key={'object-key': {'S': key}, 'version-node': {'N': "{}".format(version)}},
                ConsistentRead=True,
                ReturnConsumedCapacity='NONE')

            if "Item" not in response:
                # Item not in S3
                zero_key_index.append(idx)
            else:
                s3_key_index.append(idx)

        return s3_key_index, zero_key_index
示例#18
0
    def _create_index_table(self, table_name, schema_file):
        """Method to create the S3 index table"""

        # Load json spec
        with open(schema_file) as handle:
            json_str = handle.read()
            table_params = json.loads(json_str)

        endpoint_url = None
        if 'LOCAL_DYNAMODB_URL' in os.environ:
            endpoint_url = os.environ['LOCAL_DYNAMODB_URL']

        # Create table
        client = boto3.client('dynamodb',
                              region_name=get_region(),
                              endpoint_url=endpoint_url)
        _ = client.create_table(TableName=table_name, **table_params)

        return client.get_waiter('table_exists')
示例#19
0
    def __init__(self, conf):
        """
        A class to implement the object store for cuboid storage using AWS (using S3 and DynamoDB)

        Args:
            conf(dict): Dictionary containing configuration details for the object store


        Params in the conf dictionary:
            s3_flush_queue: URL for the SQS queue tracking flush tasks
            cuboid_bucket: Bucket for storage of cuboid objects in S3
            page_in_lambda_function: name of lambda function for page in operation (e.g. page_in.handler)
            page_out_lambda_function: name of lambda function for page out operation (e.g. page_in.handler)
            s3_index_table: name of the dynamoDB table for storing the s3 cuboid index
            id_index_table: name of DynamoDB table that maps object ids to cuboid object keys
            id_count_table: name of DynamoDB table that reserves objects ids for channels
        """
        # call the base class constructor
        ObjectStore.__init__(self, conf)
        self.obj_ind = ObjectIndices(
            conf['s3_index_table'], conf['id_index_table'], conf['id_count_table'], get_region())
示例#20
0
    def wait_table_delete(self, table_name):
        """Poll dynamodb at a 2s interval until the table deletes."""
        endpoint_url = None
        if 'LOCAL_DYNAMODB_URL' in os.environ:
            endpoint_url = os.environ['LOCAL_DYNAMODB_URL']

        client = boto3.client('dynamodb',
                              region_name=get_region(),
                              endpoint_url=endpoint_url)
        cnt = 0
        while True:
            time.sleep(2)
            cnt += 1
            if cnt > 50:
                # Give up waiting.
                return
            try:
                print('-', end='', flush=True)
                resp = client.describe_table(TableName=table_name)
            except:
                # Exception thrown when table doesn't exist.
                return
示例#21
0
    def page_in_objects(self, key_list, page_in_chan, kv_config, state_config):
        # TODO Update parent class once tested
        """
        Method to page in objects from S3 to the Cache Database via Lambda invocation directly

        Args:
            key_list (list(str)): A list of cached-cuboid keys to retrieve from the object store
            page_in_chan (str): Redis channel used for sending status of page in operations
            kv_config (dict): Configuration information for the key-value engine interface
            state_config (dict): Configuration information for the state database interface

        Returns:
            key_list (list(str)): A list of object keys

        """
        # Convert cuboid-cached keys into object keys
        object_keys = self.cached_cuboid_to_object_keys(key_list)

        # Trigger lambda for all keys
        client = boto3.client('lambda', region_name=get_region())

        params = {"page_in_channel": page_in_chan,
                  "kv_config": kv_config,
                  "state_config": state_config,
                  "lambda-name": "page_in_lambda_function",
                  "object_store_config": self.config}

        # TODO: Make concurrent
        for key in object_keys:
            params["object_key"] = key

            response = client.invoke(
                FunctionName=self.config["page_in_lambda_function"],
                InvocationType='Event',
                Payload=json.dumps(params).encode())

        return object_keys
示例#22
0
    def wait_table_create(self, table_name):
        """Poll dynamodb at a 2s interval until the table creates."""
        endpoint_url = None
        if 'LOCAL_DYNAMODB_URL' in os.environ:
            endpoint_url = os.environ['LOCAL_DYNAMODB_URL']

        client = boto3.client('dynamodb',
                              region_name=get_region(),
                              endpoint_url=endpoint_url)
        cnt = 0
        while True:
            time.sleep(2)
            cnt += 1
            if cnt > 50:
                # Give up waiting.
                return
            try:
                print('-', end='', flush=True)
                resp = client.describe_table(TableName=table_name)
                if resp['Table']['TableStatus'] == 'ACTIVE':
                    return
            except:
                # May get an exception if table doesn't currently exist.
                pass
示例#23
0
    def add_cuboid_to_index(self, object_key, version=0, ingest_job=0):
        """
        Method to add a cuboid's object_key to the S3 index table

        Currently versioning is not implemented, so a version of "0" is simply used

        Args:
            object_key (str): An object-keys for a cuboid to add to the index
            version (int): The ID of the version node - Default to 0 until fully implemented, but will eliminate
                           need to do a migration
            ingest_job (int): Id of ingest job that added this cuboid - default to 0 (if this was added via the cutout service, for example).

        Returns:
            None
        """
        dynamodb = boto3.client('dynamodb', region_name=get_region())

        # Get lookup key and resolution from object key
        parts = self.get_object_key_parts(object_key)

        # range key is exp&ch&res&task
        ingest_job_range = "{}&{}&{}&{}".format(parts.experiment_id, parts.channel_id, parts.resolution, ingest_job)

        try:
            dynamodb.put_item(
                TableName=self.config['s3_index_table'],
                Item={'object-key': {'S': object_key},
                      'version-node': {'N': "{}".format(version)},
                      'ingest-job-hash': {'S': "{}".format(parts.collection_id)},
                      'ingest-job-range': {'S': ingest_job_range}},
                ReturnConsumedCapacity='NONE',
                ReturnItemCollectionMetrics='NONE',
            )
        except:
            raise SpdbError("Error adding object-key to index.",
                            ErrorCodes.SPDB_ERROR)
示例#24
0
 def _create_upload_queue(self, queue_name):
     """Method to create a test sqs for uploading tiles for the ingest"""
     client = boto3.client('sqs', region_name=get_region())
     response = client.create_queue(QueueName=queue_name)
     url = response['QueueUrl']
     return url
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test use setUpClass()
            and store the result as class variable
        """

        # Suppress ResourceWarning messages about unclosed connections.
        warnings.simplefilter('ignore')

        cls.setUpParams(cls)

        lambda_client = boto3.client('lambda', region_name=get_region())
        cls.test_lambda = 'IntTest-{}'.format(cls.domain).replace('.', '-')
        try:
            lambda_client.delete_function(FunctionName=cls.test_lambda)
        except botocore.exceptions.ClientError:
            pass

        resp = lambda_client.get_function(
            FunctionName=cls.object_store_config['page_out_lambda_function'])
        lambda_cfg = resp['Configuration']
        vpc_cfg = lambda_cfg['VpcConfig']
        # VpcId is not a valid field when creating a lambda fcn.
        del vpc_cfg['VpcId']

        temp_file = tempfile.NamedTemporaryFile()
        temp_name = temp_file.name + '.zip'
        temp_file.close()
        with ZipFile(temp_name, mode='w') as zip:
            t = time.localtime()
            lambda_file = ZipInfo('lambda_function.py',
                                  date_time=(t.tm_year, t.tm_mon, t.tm_mday,
                                             t.tm_hour, t.tm_min, t.tm_sec))
            # Set file permissions.
            lambda_file.external_attr = 0o777 << 16
            code = 'def handler(event, context):\n    return\n'
            zip.writestr(lambda_file, code)

        with open(temp_name, 'rb') as zip2:
            lambda_bytes = zip2.read()

        lambda_client.create_function(FunctionName=cls.test_lambda,
                                      VpcConfig=vpc_cfg,
                                      Role=lambda_cfg['Role'],
                                      Runtime=lambda_cfg['Runtime'],
                                      Handler='lambda_function.handler',
                                      MemorySize=128,
                                      Code={'ZipFile': lambda_bytes})

        # Set page out function to the test lambda.
        cls.object_store_config['page_out_lambda_function'] = cls.test_lambda

        print('standby for queue creation (slow ~30s)')
        try:
            cls.object_store_config[
                "s3_flush_queue"] = cls.setup_helper.create_flush_queue(
                    cls.s3_flush_queue_name)
        except ClientError:
            try:
                cls.setup_helper.delete_flush_queue(
                    cls.object_store_config["s3_flush_queue"])
            except:
                pass
            time.sleep(61)
            cls.object_store_config[
                "s3_flush_queue"] = cls.setup_helper.create_flush_queue(
                    cls.s3_flush_queue_name)

        print('done')
示例#26
0
    def post(self, request, collection, experiment, channel):
        """View to kick off a channel's downsample process

        Args:
            request: DRF Request object
            collection (str): Unique Collection identifier, indicating which collection you want to access
            experiment (str): Experiment identifier, indicating which experiment you want to access
            channel (str): Channel identifier, indicating which channel you want to access

        Returns:

        """
        # Process request and validate
        try:
            request_args = {
                "service": "downsample",
                "collection_name": collection,
                "experiment_name": experiment,
                "channel_name": channel
            }
            req = BossRequest(request, request_args)
        except BossError as err:
            return err.to_http()

        # Convert to Resource
        resource = project.BossResourceDjango(req)

        channel = resource.get_channel()
        if channel.downsample_status.upper() == "IN_PROGRESS":
            return BossHTTPError("Channel is currently being downsampled. Invalid Request.", ErrorCodes.INVALID_STATE)
        elif channel.downsample_status.upper() == "DOWNSAMPLED" and \
             not request.user.is_staff:
            return BossHTTPError("Channel is already downsampled. Invalid Request.", ErrorCodes.INVALID_STATE)

        if request.user.is_staff:
            # DP HACK: allow admin users to override the coordinate frame
            frame = request.data
        else:
            frame = {}

        # Call Step Function
        boss_config = bossutils.configuration.BossConfig()
        experiment = resource.get_experiment()
        coord_frame = resource.get_coord_frame()
        lookup_key = resource.get_lookup_key()
        col_id, exp_id, ch_id = lookup_key.split("&")

        def get_frame(idx):
            return int(frame.get(idx, getattr(coord_frame, idx)))

        args = {
            'collection_id': int(col_id),
            'experiment_id': int(exp_id),
            'channel_id': int(ch_id),
            'annotation_channel': not channel.is_image(),
            'data_type': resource.get_data_type(),

            's3_bucket': boss_config["aws"]["cuboid_bucket"],
            's3_index': boss_config["aws"]["s3-index-table"],

            'x_start': get_frame('x_start'),
            'y_start': get_frame('y_start'),
            'z_start': get_frame('z_start'),

            'x_stop': get_frame('x_stop'),
            'y_stop': get_frame('y_stop'),
            'z_stop': get_frame('z_stop'),

            'resolution': int(channel.base_resolution),
            'resolution_max': int(experiment.num_hierarchy_levels),
            'res_lt_max': int(channel.base_resolution) + 1 < int(experiment.num_hierarchy_levels),

            'type': experiment.hierarchy_method,
            'iso_resolution': int(resource.get_isotropic_level()),

            # This step function executes: boss-tools/activities/resolution_hierarchy.py
            'downsample_volume_lambda': boss_config['lambda']['downsample_volume'],

            'aws_region': get_region(),

        }

        session = bossutils.aws.get_session()
        downsample_sfn = boss_config['sfn']['downsample_sfn']
        arn = bossutils.aws.sfn_execute(session, downsample_sfn, dict(args))

        # Change Status and Save ARN
        channel_obj = Channel.objects.get(name=channel.name, experiment=int(exp_id))
        channel_obj.downsample_status = "IN_PROGRESS"
        channel_obj.downsample_arn = arn
        channel_obj.save()

        return HttpResponse(status=201)
示例#27
0
文件: setup.py 项目: jhuapl-boss/boss
 def _delete_upload_queue(self, queue_url):
     """Method to delete a test sqs for uploading tiles for the ingest"""
     client = boto3.client('sqs', region_name=get_region())
     client.delete_queue(QueueUrl=queue_url)
示例#28
0
文件: setup.py 项目: jhuapl-boss/boss
 def _create_upload_queue(self, queue_name):
     """Method to create a test sqs for uploading tiles for the ingest"""
     client = boto3.client('sqs', region_name=get_region())
     response = client.create_queue(QueueName=queue_name)
     url = response['QueueUrl']
     return url
示例#29
0
 def _create_flush_queue(self, queue_name):
     """Method to create a test sqs for flushing cubes"""
     client = boto3.client('sqs', region_name=get_region())
     response = client.create_queue(QueueName=queue_name)
     url = response['QueueUrl']
     return url
示例#30
0
 def _create_cuboid_bucket(self, bucket_name):
     """Method to create the S3 bucket for cuboid storage"""
     client = boto3.client('s3', region_name=get_region())
     _ = client.create_bucket(ACL='private', Bucket=bucket_name)
     return client.get_waiter('bucket_exists')
示例#31
0
 def _delete_flush_queue(self, queue_url):
     """Method to delete a test sqs for flushing cubes"""
     client = boto3.client('sqs', region_name=get_region())
     client.delete_queue(QueueUrl=queue_url)
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test use setUpClass()
            and store the result as class variable
        """

        # Suppress ResourceWarning messages about unclosed connections.
        warnings.simplefilter('ignore')

        cls.setUpParams(cls)

        lambda_client = boto3.client('lambda', region_name=get_region())
        cls.test_lambda = 'IntTest-{}'.format(cls.domain).replace('.', '-')
        try:
            lambda_client.delete_function(FunctionName=cls.test_lambda)
        except botocore.exceptions.ClientError:
            pass

        resp = lambda_client.get_function(FunctionName=cls.object_store_config['page_out_lambda_function'])
        lambda_cfg = resp['Configuration']
        vpc_cfg = lambda_cfg['VpcConfig']
        # VpcId is not a valid field when creating a lambda fcn.
        del vpc_cfg['VpcId']

        temp_file = tempfile.NamedTemporaryFile()
        temp_name = temp_file.name + '.zip'
        temp_file.close()
        with ZipFile(temp_name, mode='w') as zip:
            t = time.localtime()
            lambda_file = ZipInfo('lambda_function.py', date_time=(
                t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec))
            # Set file permissions.
            lambda_file.external_attr = 0o777 << 16
            code = 'def handler(event, context):\n    return\n'
            zip.writestr(lambda_file, code)

        with open(temp_name, 'rb') as zip2:
            lambda_bytes = zip2.read()

        lambda_client.create_function(
            FunctionName=cls.test_lambda,
            VpcConfig=vpc_cfg,
            Role=lambda_cfg['Role'],
            Runtime=lambda_cfg['Runtime'],
            Handler='lambda_function.handler',
            MemorySize=128,
            Code={'ZipFile': lambda_bytes }
         )

        # Set page out function to the test lambda.
        cls.object_store_config['page_out_lambda_function'] = cls.test_lambda

        print('standby for queue creation (slow ~30s)')
        try:
            cls.object_store_config["s3_flush_queue"] = cls.setup_helper.create_flush_queue(cls.s3_flush_queue_name)
        except ClientError:
            try:
                cls.setup_helper.delete_flush_queue(cls.object_store_config["s3_flush_queue"])
            except:
                pass
            time.sleep(61)
            cls.object_store_config["s3_flush_queue"] = cls.setup_helper.create_flush_queue(cls.s3_flush_queue_name)

        print('done')
示例#33
0
文件: views.py 项目: jhuapl-boss/boss
    def post(self, request, collection, experiment, channel):
        """View to kick off a channel's downsample process

        Args:
            request: DRF Request object
            collection (str): Unique Collection identifier, indicating which collection you want to access
            experiment (str): Experiment identifier, indicating which experiment you want to access
            channel (str): Channel identifier, indicating which channel you want to access

        Returns:

        """
        # Process request and validate
        try:
            request_args = {
                "service": "downsample",
                "collection_name": collection,
                "experiment_name": experiment,
                "channel_name": channel
            }
            req = BossRequest(request, request_args)
        except BossError as err:
            return err.to_http()

        # Convert to Resource
        resource = project.BossResourceDjango(req)

        channel = resource.get_channel()
        if channel.downsample_status.upper() == "IN_PROGRESS":
            return BossHTTPError("Channel is currently being downsampled. Invalid Request.", ErrorCodes.INVALID_STATE)
        elif channel.downsample_status.upper() == "DOWNSAMPLED" and \
             not request.user.is_staff:
            return BossHTTPError("Channel is already downsampled. Invalid Request.", ErrorCodes.INVALID_STATE)

        if request.user.is_staff:
            # DP HACK: allow admin users to override the coordinate frame
            frame = request.data
        else:
            frame = {}

        # Call Step Function
        boss_config = bossutils.configuration.BossConfig()
        experiment = resource.get_experiment()
        coord_frame = resource.get_coord_frame()
        lookup_key = resource.get_lookup_key()
        col_id, exp_id, ch_id = lookup_key.split("&")

        def get_frame(idx):
            return int(frame.get(idx, getattr(coord_frame, idx)))

        args = {
            'collection_id': int(col_id),
            'experiment_id': int(exp_id),
            'channel_id': int(ch_id),
            'annotation_channel': not channel.is_image(),
            'data_type': resource.get_data_type(),

            's3_bucket': boss_config["aws"]["cuboid_bucket"],
            's3_index': boss_config["aws"]["s3-index-table"],

            'x_start': get_frame('x_start'),
            'y_start': get_frame('y_start'),
            'z_start': get_frame('z_start'),

            'x_stop': get_frame('x_stop'),
            'y_stop': get_frame('y_stop'),
            'z_stop': get_frame('z_stop'),

            'resolution': int(channel.base_resolution),
            'resolution_max': int(experiment.num_hierarchy_levels),
            'res_lt_max': int(channel.base_resolution) + 1 < int(experiment.num_hierarchy_levels),

            'type': experiment.hierarchy_method,
            'iso_resolution': int(resource.get_isotropic_level()),

            # This step function executes: boss-tools/activities/resolution_hierarchy.py
            'downsample_volume_lambda': boss_config['lambda']['downsample_volume'],

            'aws_region': get_region(),

        }

        session = bossutils.aws.get_session()
        downsample_sfn = boss_config['sfn']['downsample_sfn']
        arn = bossutils.aws.sfn_execute(session, downsample_sfn, dict(args))

        # Change Status and Save ARN
        channel_obj = Channel.objects.get(name=channel.name, experiment=int(exp_id))
        channel_obj.downsample_status = "IN_PROGRESS"
        channel_obj.downsample_arn = arn
        channel_obj.save()

        return HttpResponse(status=201)
示例#34
0
    def test_sqs_watcher_send_message(self):
        """Inject message into queue and test that SqsWatcher kicks off a lambda and writes cuboid to s3."""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config,
                       self.object_store_config)

        base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(
            self.resource.get_lookup_key(), 0)
        morton_idx = ndlib.XYZMorton([0, 0, 0])
        t = 0
        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(
            base_write_cuboid_key, t, morton_idx,
            cube1.to_blosc_by_time_index(t))

        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {
            "config": self.config_data,
            "write_cuboid_key": write_cuboid_key,
            "lambda-name": "s3_flush",
            "resource": self.resource.to_dict()
        }

        response = sqs.send_message(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            MessageBody=json.dumps(msg_data))
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200

        watcher = SqsWatcher(self.lambda_data)
        #  verify_queue() needs the be run multiple times to verify that the queue is not changing
        #  only then does it send off a lambda message.
        time.sleep(5)
        watcher.verify_queue()
        time.sleep(5)
        lambdas_invoked = watcher.verify_queue()
        if lambdas_invoked < 1:
            time.sleep(5)
            watcher.verify_queue()
        time.sleep(15)

        client = boto3.client('sqs', region_name=get_region())
        response = client.get_queue_attributes(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            AttributeNames=[
                'ApproximateNumberOfMessages',
                'ApproximateNumberOfMessagesNotVisible'
            ])
        https_status_code = response['ResponseMetadata']['HTTPStatusCode']
        queue_count = int(
            response['Attributes']['ApproximateNumberOfMessages'])
        # test that the queue count is now 0
        assert queue_count == 0

        s3 = boto3.client('s3', region_name=get_region())
        objects_list = s3.list_objects(
            Bucket=self.object_store_config['cuboid_bucket'])
        # tests that bucket has some Contents.
        assert "Contents" in objects_list.keys()
示例#35
0
 def _delete_upload_queue(self, queue_url):
     """Method to delete a test sqs for uploading tiles for the ingest"""
     client = boto3.client('sqs', region_name=get_region())
     client.delete_queue(QueueUrl=queue_url)
示例#36
0
    def post(self, request, collection, experiment, channel):
        """View to kick off a channel's downsample process

        Args:
            request: DRF Request object
            collection (str): Unique Collection identifier, indicating which collection you want to access
            experiment (str): Experiment identifier, indicating which experiment you want to access
            channel (str): Channel identifier, indicating which channel you want to access

        Returns:

        """
        # Process request and validate
        try:
            request_args = {
                "service": "downsample",
                "collection_name": collection,
                "experiment_name": experiment,
                "channel_name": channel
            }
            req = BossRequest(request, request_args)
        except BossError as err:
            return err.to_http()

        # Convert to Resource
        resource = project.BossResourceDjango(req)

        channel = resource.get_channel()
        if channel.downsample_status.upper() == "IN_PROGRESS":
            return BossHTTPError(
                "Channel is currently being downsampled. Invalid Request.",
                ErrorCodes.INVALID_STATE)
        elif channel.downsample_status.upper() == "DOWNSAMPLED" and \
             not request.user.is_staff:
            return BossHTTPError(
                "Channel is already downsampled. Invalid Request.",
                ErrorCodes.INVALID_STATE)

        session = bossutils.aws.get_session()

        # Make sure only one Channel is downsampled at a time
        channel_objs = Channel.objects.filter(downsample_status='IN_PROGRESS')
        for channel_obj in channel_objs:
            # Verify that the channel is still being downsampled
            status = bossutils.aws.sfn_status(session,
                                              channel_obj.downsample_arn)
            if status == 'RUNNING':
                return BossHTTPError(
                    "Another Channel is currently being downsampled. Invalid Request.",
                    ErrorCodes.INVALID_STATE)

        if request.user.is_staff:
            # DP HACK: allow admin users to override the coordinate frame
            frame = request.data
        else:
            frame = {}

        # Call Step Function
        boss_config = bossutils.configuration.BossConfig()
        experiment = resource.get_experiment()
        coord_frame = resource.get_coord_frame()
        lookup_key = resource.get_lookup_key()
        col_id, exp_id, ch_id = lookup_key.split("&")

        def get_frame(idx):
            return int(frame.get(idx, getattr(coord_frame, idx)))

        args = {
            'collection_id':
            int(col_id),
            'experiment_id':
            int(exp_id),
            'channel_id':
            int(ch_id),
            'annotation_channel':
            not channel.is_image(),
            'data_type':
            resource.get_data_type(),
            's3_bucket':
            boss_config["aws"]["cuboid_bucket"],
            's3_index':
            boss_config["aws"]["s3-index-table"],
            'x_start':
            get_frame('x_start'),
            'y_start':
            get_frame('y_start'),
            'z_start':
            get_frame('z_start'),
            'x_stop':
            get_frame('x_stop'),
            'y_stop':
            get_frame('y_stop'),
            'z_stop':
            get_frame('z_stop'),
            'resolution':
            int(channel.base_resolution),
            'resolution_max':
            int(experiment.num_hierarchy_levels),
            'res_lt_max':
            int(channel.base_resolution) + 1 < int(
                experiment.num_hierarchy_levels),
            'type':
            experiment.hierarchy_method,
            'iso_resolution':
            int(resource.get_isotropic_level()),

            # This step function executes: boss-tools/activities/resolution_hierarchy.py
            'downsample_volume_lambda':
            boss_config['lambda']['downsample_volume'],
            'aws_region':
            get_region(),
        }

        # Check that only administrators are triggering extra large downsamples
        if (not request.user.is_staff) and \
           ((args['x_stop'] - args['x_start']) * \
            (args['y_stop'] - args['y_start']) * \
            (args['z_stop'] - args['z_start']) > settings.DOWNSAMPLE_MAX_SIZE):
            return BossHTTPError(
                "Large downsamples require admin permissions to trigger. Invalid Request.",
                ErrorCodes.INVALID_STATE)

        # Add metrics to CloudWatch
        def get_cubes(axis, dim):
            extent = args['{}_stop'.format(axis)] - args['{}_start'.format(
                axis)]
            return -(-extent // dim)  ## ceil div

        cost = (
            get_cubes('x', 512) * get_cubes('y', 512) * get_cubes('z', 16) /
            4  # Number of cubes for a downsampled volume
            * 0.75  # Assume the frame is only 75% filled
            * 2  # 1 for invoking a lambda
            # 1 for time it takes lambda to run
            *
            1.33  # Add 33% overhead for all other non-base resolution downsamples
        )

        dimensions = [
            {
                'Name': 'User',
                'Value': request.user.username
            },
            {
                'Name':
                'Resource',
                'Value':
                '{}/{}/{}'.format(collection, experiment.name, channel.name)
            },
            {
                'Name': 'Stack',
                'Value': boss_config['system']['fqdn']
            },
        ]

        client = session.client('cloudwatch')
        client.put_metric_data(Namespace="BOSS/Downsample",
                               MetricData=[{
                                   'MetricName': 'InvokeCount',
                                   'Dimensions': dimensions,
                                   'Value': 1.0,
                                   'Unit': 'Count'
                               }, {
                                   'MetricName': 'ComputeCost',
                                   'Dimensions': dimensions,
                                   'Value': cost,
                                   'Unit': 'Count'
                               }])

        # Start downsample
        downsample_sfn = boss_config['sfn']['downsample_sfn']
        arn = bossutils.aws.sfn_execute(session, downsample_sfn, dict(args))

        # Change Status and Save ARN
        channel_obj = Channel.objects.get(name=channel.name,
                                          experiment=int(exp_id))
        channel_obj.downsample_status = "IN_PROGRESS"
        channel_obj.downsample_arn = arn
        channel_obj.save()

        return HttpResponse(status=201)
示例#37
0
def start(request, resource):
    """Main code to start a downsample

    Args:
        request: DRF Request object
        resource (BossResourceDjango): The channel to downsample

    Returns:
        (HttpResponse)
    """

    channel = resource.get_channel()
    chan_status = channel.downsample_status.upper()
    if chan_status == Channel.DownsampleStatus.IN_PROGRESS:
        return BossHTTPError(
            "Channel is currently being downsampled. Invalid Request.",
            ErrorCodes.INVALID_STATE)
    elif chan_status == Channel.DownsampleStatus.QUEUED:
        return BossHTTPError(
            "Channel is already waiting to be downsampled. Invalid Request.",
            ErrorCodes.INVALID_STATE)
    elif chan_status == Channel.DownsampleStatus.DOWNSAMPLED and not request.user.is_staff:
        return BossHTTPError(
            "Channel is already downsampled. Invalid Request.",
            ErrorCodes.INVALID_STATE)

    if request.user.is_staff:
        # DP HACK: allow admin users to override the coordinate frame
        frame = request.data
    else:
        frame = {}

    boss_config = BossConfig()
    collection = resource.get_collection()
    experiment = resource.get_experiment()
    coord_frame = resource.get_coord_frame()
    lookup_key = resource.get_lookup_key()
    col_id, exp_id, ch_id = lookup_key.split("&")

    def get_frame(idx):
        return int(frame.get(idx, getattr(coord_frame, idx)))

    downsample_sfn = boss_config['sfn']['downsample_sfn']
    db_host = boss_config['aws']['db']

    args = {
        'lookup_key':
        lookup_key,
        'collection_id':
        int(col_id),
        'experiment_id':
        int(exp_id),
        'channel_id':
        int(ch_id),
        'annotation_channel':
        not channel.is_image(),
        'data_type':
        resource.get_data_type(),
        's3_bucket':
        boss_config["aws"]["cuboid_bucket"],
        's3_index':
        boss_config["aws"]["s3-index-table"],
        'x_start':
        get_frame('x_start'),
        'y_start':
        get_frame('y_start'),
        'z_start':
        get_frame('z_start'),
        'x_stop':
        get_frame('x_stop'),
        'y_stop':
        get_frame('y_stop'),
        'z_stop':
        get_frame('z_stop'),
        'resolution':
        int(channel.base_resolution),
        'resolution_max':
        int(experiment.num_hierarchy_levels),
        'res_lt_max':
        int(channel.base_resolution) + 1 < int(
            experiment.num_hierarchy_levels),
        'type':
        experiment.hierarchy_method,
        'iso_resolution':
        int(resource.get_isotropic_level()),

        # This step function executes: boss-tools/activities/resolution_hierarchy.py
        'downsample_volume_lambda':
        boss_config['lambda']['downsample_volume'],

        # Need to pass step function's ARN to itself, so it can start another
        # instance of itself after finishing a downsample.
        'sfn_arn':
        downsample_sfn,
        'db_host':
        db_host,
        'aws_region':
        get_region(),
    }

    # Check that only administrators are triggering extra large downsamples
    if ((not request.user.is_staff) and
       ((args['x_stop'] - args['x_start']) *\
        (args['y_stop'] - args['y_start']) *\
        (args['z_stop'] - args['z_start']) > settings.DOWNSAMPLE_MAX_SIZE)):
        return BossHTTPError(
            "Large downsamples require admin permissions to trigger. Invalid Request.",
            ErrorCodes.INVALID_STATE)

    session = get_session()

    downsample_sqs = boss_config['aws']['downsample-queue']

    try:
        enqueue_job(session, args, downsample_sqs)
    except BossError as be:
        return BossHTTPError(be.message, be.error_code)

    compute_usage_metrics(session, args, boss_config['system']['fqdn'],
                          request.user.username or "public", collection.name,
                          experiment.name, channel.name)

    region = get_region()
    account_id = get_account_id()
    downsample_sfn_arn = f'arn:aws:states:{region}:{account_id}:stateMachine:{downsample_sfn}'
    if not check_for_running_sfn(session, downsample_sfn_arn):
        bossutils.aws.sfn_run(session, downsample_sfn_arn, {
            'queue_url': downsample_sqs,
            'sfn_arn': downsample_sfn_arn,
        })

    return HttpResponse(status=201)