Exemplo n.º 1
0
    def invoke_ingest_lambda(self, ingest_job, num_invokes=1):
        """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated
        kick through

        Args:
            ingest_job: Ingest job object
            num_invokes(int): number of invocations to fire

        Returns:

        """
        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]
        fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(
            16, project_info, ingest_job.resolution, 0, 0, 0, 0)

        event = {
            "ingest_job": ingest_job.id,
            "chunk_key": fake_chunk_key,
            "lambda-name": "ingest"
        }

        # Invoke Ingest lambda functions
        lambda_client = boto3.client('lambda',
                                     region_name=bossutils.aws.get_region())
        for _ in range(0, num_invokes):
            lambda_client.invoke(FunctionName=INGEST_LAMBDA,
                                 InvocationType='Event',
                                 Payload=json.dumps(event).encode())
Exemplo n.º 2
0
    def invoke_ingest_lambda(self, ingest_job, num_invokes=1):
        """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated
        kick through

        Args:
            ingest_job: Ingest job object
            num_invokes(int): number of invocations to fire

        Returns:

        """
        bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]
        fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(16, project_info,
                                                                     ingest_job.resolution,
                                                                     0, 0, 0, 0)

        event = {"ingest_job": ingest_job.id,
                 "chunk_key": fake_chunk_key,
                 "function-name": INGEST_LAMBDA,
                 "lambda-name": "ingest"}

        # Invoke Ingest lambda functions
        lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region())
        for _ in range(0, num_invokes):
            lambda_client.invoke(FunctionName=INGEST_LAMBDA,
                                 InvocationType='Event',
                                 Payload=json.dumps(event).encode())
Exemplo n.º 3
0
    def _generate_upload_queue_args(self, ingest_job):
        """
        Generate dictionary to include in messages placed in the tile upload queue.
        
        Args:
            ingest_job (IngestJob):

        Returns:
            (dict)

        Raises:
            (BossError): If ingest_job.ingest_type invalid.
        """

        bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')

        args = {
            'job_id': ingest_job.id,
            'upload_queue': ingest_job.upload_queue,
            'ingest_queue': ingest_job.ingest_queue,

            'resolution': ingest_job.resolution,
            'project_info': lookup_key.split(CONNECTOR),
            'ingest_type': ingest_job.ingest_type,

            't_start': ingest_job.t_start,
            't_stop': ingest_job.t_stop,
            't_tile_size': 1,

            'x_start': ingest_job.x_start,
            'x_stop': ingest_job.x_stop,
            'x_tile_size': ingest_job.tile_size_x,

            'y_start': ingest_job.y_start,
            'y_stop': ingest_job.y_stop,
            'y_tile_size': ingest_job.tile_size_y,

            'z_start': ingest_job.z_start,
            'z_stop': ingest_job.z_stop,
            'z_tile_size': 1
        }


        if ingest_job.ingest_type == IngestJob.TILE_INGEST:
            # Always the Boss cuboid z size for tile jobs.
            args['z_chunk_size'] = 16
        elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST:
            # tile_size_* holds the chunk size dimensions for volumetric jobs.
            args['z_chunk_size'] = ingest_job.tile_size_z
        else:
            raise BossError(
                "Ingest job's ingest_type has invalid value: {}".format(
                    self.job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE)
        return args
Exemplo n.º 4
0
    def _generate_upload_queue_args(self, ingest_job):
        """
        Generate dictionary to include in messages placed in the tile upload queue.
        
        Args:
            ingest_job (IngestJob):

        Returns:
            (dict)

        Raises:
            (BossError): If ingest_job.ingest_type invalid.
        """

        bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')

        args = {
            'job_id': ingest_job.id,
            'upload_queue': ingest_job.upload_queue,
            'ingest_queue': ingest_job.ingest_queue,

            'resolution': ingest_job.resolution,
            'project_info': lookup_key.split(CONNECTOR),
            'ingest_type': ingest_job.ingest_type,

            't_start': ingest_job.t_start,
            't_stop': ingest_job.t_stop,
            't_tile_size': 1,

            'x_start': ingest_job.x_start,
            'x_stop': ingest_job.x_stop,
            'x_tile_size': ingest_job.tile_size_x,

            'y_start': ingest_job.y_start,
            'y_stop': ingest_job.y_stop,
            'y_tile_size': ingest_job.tile_size_y,

            'z_start': ingest_job.z_start,
            'z_stop': ingest_job.z_stop,
            'z_tile_size': 1
        }


        if ingest_job.ingest_type == IngestJob.TILE_INGEST:
            # Always the Boss cuboid z size for tile jobs.
            args['z_chunk_size'] = 16
        elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST:
            # tile_size_* holds the chunk size dimensions for volumetric jobs.
            args['z_chunk_size'] = ingest_job.tile_size_z
        else:
            raise BossError(
                "Ingest job's ingest_type has invalid value: {}".format(
                    self.job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE)
        return args
Exemplo n.º 5
0
    def populate_upload_queue(self):
        """Execute the populate_upload_queue Step Function

        Returns:
            string: ARN of the StepFunction Execution started

        Raises:
            BossError : if there is no valid ingest job
        """

        if self.job is None:
            raise BossError(
                "Unable to generate upload tasks for the ingest service. Please specify a ingest job",
                ErrorCodes.UNABLE_TO_VALIDATE)

        ingest_job = self.job

        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]

        # TODO DP ???: create IngestJob method that creates the StepFunction arguments?
        args = {
            'upload_sfn': config['sfn']['upload_sfn'],
            'job_id': ingest_job.id,
            'upload_queue': ingest_job.upload_queue,
            'ingest_queue': ingest_job.ingest_queue,
            'resolution': ingest_job.resolution,
            'project_info': lookup_key.split(CONNECTER),
            't_start': ingest_job.t_start,
            't_stop': ingest_job.t_stop,
            't_tile_size': 1,
            'x_start': ingest_job.x_start,
            'x_stop': ingest_job.x_stop,
            'x_tile_size': ingest_job.tile_size_x,
            'y_start': ingest_job.y_start,
            'y_stop': ingest_job.y_stop,
            'y_tile_size': ingest_job.tile_size_y,
            'z_start': ingest_job.z_start,
            'z_stop': ingest_job.z_stop,
            'z_tile_size': 16,
        }

        session = bossutils.aws.get_session()
        populate_sfn = config['sfn']['populate_upload_queue']
        arn = bossutils.aws.sfn_execute(session, populate_sfn, args)

        return arn
Exemplo n.º 6
0
    def generate_upload_tasks(self, job_id=None):
        """
        Generate upload tasks for the ingest job. This creates once task for each tile that has to be uploaded in the
        ingest queue

        Args:
            job_id: Job id of the ingest queue. If not included this takes the current ingest job

        Returns:
            None
        Raises:
            BossError : if there is no valid ingest job

        """

        if job_id is None and self.job is None:
            raise BossError(
                "Unable to generate upload tasks for the ingest service. Please specify a ingest job",
                ErrorCodes.UNABLE_TO_VALIDATE)
        elif job_id:
            # Using the job id to get the job
            try:
                ingest_job = IngestJob.objects.get(id=job_id)
            except IngestJob.DoesNotExist:
                raise BossError(
                    "Ingest job with id {} does not exist".format(job_id),
                    ErrorCodes.RESOURCE_NOT_FOUND)
        else:
            ingest_job = self.job

        # Generate upload tasks for the ingest job
        # Get the project information
        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]

        # Batch messages and write to file
        base_file_name = 'tasks_' + lookup_key + '_' + str(ingest_job.id)
        self.file_index = 0

        # open file
        f = io.StringIO()
        header = {
            'job_id': ingest_job.id,
            'upload_queue_url': ingest_job.upload_queue,
            'ingest_queue_url': ingest_job.ingest_queue
        }
        f.write(json.dumps(header))
        f.write('\n')
        num_msg_per_file = 0

        for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1):
            # For each time step, compute the chunks and tile keys

            for z in range(ingest_job.z_start, ingest_job.z_stop, 16):
                for y in range(ingest_job.y_start, ingest_job.y_stop,
                               ingest_job.tile_size_y):
                    for x in range(ingest_job.x_start, ingest_job.x_stop,
                                   ingest_job.tile_size_x):

                        # compute the chunk indices
                        chunk_x = int(x / ingest_job.tile_size_x)
                        chunk_y = int(y / ingest_job.tile_size_y)
                        chunk_z = int(z / 16)

                        # Compute the number of tiles in the chunk
                        if ingest_job.z_stop - z >= 16:
                            num_of_tiles = 16
                        else:
                            num_of_tiles = ingest_job.z_stop - z

                        # Generate the chunk key
                        chunk_key = (BossBackend(
                            self.config)).encode_chunk_key(
                                num_of_tiles, project_info,
                                ingest_job.resolution, chunk_x, chunk_y,
                                chunk_z, time_step)

                        self.num_of_chunks += 1

                        # get the tiles keys for this chunk
                        for tile in range(z, z + num_of_tiles):
                            # get the tile key
                            tile_key = (BossBackend(
                                self.config)).encode_tile_key(
                                    project_info, ingest_job.resolution,
                                    chunk_x, chunk_y, tile, time_step)
                            self.count_of_tiles += 1

                            # Generate the upload task msg
                            msg = chunk_key + ',' + tile_key + '\n'
                            f.write(msg)
                            num_msg_per_file += 1

                            # if there are 10 messages in the batch send it to the upload queue.
                            if num_msg_per_file == MAX_NUM_MSG_PER_FILE:
                                fname = base_file_name + '_' + str(
                                    self.file_index + 1) + '.txt'
                                self.upload_task_file(fname, f.getvalue())
                                self.file_index += 1
                                f.close()
                                # status = self.send_upload_message_batch(batch_msg)

                                fname = base_file_name + '_' + str(
                                    self.file_index + 1) + '.txt'
                                f = io.StringIO()
                                header = {
                                    'job_id': ingest_job.id,
                                    'upload_queue_url':
                                    ingest_job.upload_queue,
                                    'ingest_queue_url': ingest_job.ingest_queue
                                }
                                f.write(json.dumps(header))
                                f.write('\n')
                                num_msg_per_file = 0

        # Edge case: the last batch size maybe smaller than 10
        if num_msg_per_file != 0:
            fname = base_file_name + '_' + str(self.file_index + 1) + '.txt'
            self.upload_task_file(fname, f.getvalue())
            f.close()
            self.file_index += 1
            num_msg_per_file = 0

        # Update status
        self.job.tile_count = self.count_of_tiles
        self.job.save()
Exemplo n.º 7
0
    def generate_upload_tasks(self, job_id=None):
        """

        Args:
            job_id:

        Returns:

        """

        if job_id is None and self.job is None:
            raise BossError(
                "Unable to generate upload tasks for the ingest service. Please specify a ingest job",
                ErrorCodes.UNABLE_TO_VALIDATE)
        elif job_id:
            # Using the job id to get the job
            try:
                ingest_job = IngestJob.objects.get(id=job_id)
            except IngestJob.DoesNotExist:
                raise BossError(
                    "Ingest job with id {} does not exist".format(job_id),
                    ErrorCodes.RESOURCE_NOT_FOUND)
        else:
            ingest_job = self.job

        # Generate upload tasks for the ingest job
        # Get the project information
        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel_layer
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]

        for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1):
            # For each time step, compute the chunks and tile keys

            for z in range(ingest_job.z_start, ingest_job.z_stop, 16):
                for y in range(ingest_job.y_start, ingest_job.y_stop,
                               ingest_job.tile_size_y):
                    for x in range(ingest_job.x_start, ingest_job.x_stop,
                                   ingest_job.tile_size_x):

                        # compute the chunk indices
                        chunk_x = int(x / ingest_job.tile_size_x)
                        chunk_y = int(y / ingest_job.tile_size_y)
                        chunk_z = int(z / 16)

                        # Compute the number of tiles in the chunk
                        if ingest_job.z_stop - z >= 16:
                            num_of_tiles = 16
                        else:
                            num_of_tiles = ingest_job.z_stop - z

                        # Generate the chunk key
                        chunk_key = (BossBackend(
                            self.config)).encode_chunk_key(
                                num_of_tiles, project_info,
                                ingest_job.resolution, chunk_x, chunk_y,
                                chunk_z, time_step)
                        # get the tiles keys for this chunk
                        for tile in range(0, num_of_tiles):
                            # get the tile key
                            tile_key = (BossBackend(
                                self.config)).encode_tile_key(
                                    project_info, ingest_job.resolution,
                                    chunk_x, chunk_y, tile, time_step)

                            # Generate the upload task msg
                            msg = self.create_upload_task_message(
                                ingest_job.id, chunk_key, tile_key,
                                ingest_job.upload_queue,
                                ingest_job.ingest_queue)

                            # Upload the message
                            self.send_upload_task_message(msg)
Exemplo n.º 8
0
    def test_upload_tile_index_table(self):
        """"""
        ingest_mgmr = IngestManager()
        ingest_mgmr.validate_config_file(self.example_config_data)
        ingest_mgmr.validate_properties()
        ingest_mgmr.owner = self.user.pk
        ingest_job = ingest_mgmr.create_ingest_job()
        assert (ingest_job.id is not None)

        # Get the chunks in this job
        # Get the project information
        bosskey = ingest_job.collection + '&' + ingest_job.experiment + '&' + ingest_job.channel_layer
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]
        proj_name = ingest_job.collection + '&' + ingest_job.experiment
        tile_index_db = BossTileIndexDB(proj_name)
        tilebucket = TileBucket(str(col_id) + '&' + str(exp_id))

        for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1):
            # For each time step, compute the chunks and tile keys

            for z in range(ingest_job.z_start, ingest_job.z_stop, 16):
                for y in range(ingest_job.y_start, ingest_job.y_stop,
                               ingest_job.tile_size_y):
                    for x in range(ingest_job.x_start, ingest_job.x_stop,
                                   ingest_job.tile_size_x):

                        # compute the chunk indices
                        chunk_x = int(x / ingest_job.tile_size_x)
                        chunk_y = int(y / ingest_job.tile_size_y)
                        chunk_z = int(z / 16)

                        # Compute the number of tiles in the chunk
                        if ingest_job.z_stop - z >= 16:
                            num_of_tiles = 16
                        else:
                            num_of_tiles = ingest_job.z_stop - z

                        # Generate the chunk key
                        chunk_key = (BossBackend(
                            ingest_mgmr.config)).encode_chunk_key(
                                num_of_tiles, project_info,
                                ingest_job.resolution, chunk_x, chunk_y,
                                chunk_z, time_step)
                        # Upload the chunk to the tile index db
                        tile_index_db.createCuboidEntry(
                            chunk_key, ingest_job.id)
                        key_map = {}
                        for tile in range(0, num_of_tiles):
                            # get the object key and upload it
                            #tile_key = tilebucket.encodeObjectKey(ch_id, ingest_job.resolution,
                            #                              chunk_x, chunk_y, tile, time_step)
                            tile_key = 'fakekey' + str(tile)
                            tile_index_db.markTileAsUploaded(
                                chunk_key, tile_key)

                        # for each chunk key, delete entries from the tile_bucket

        # Check if data has been uploaded
        chunks = list(tile_index_db.getTaskItems(ingest_job.id))
        assert (len(chunks) != 0)

        ingest_mgmr.delete_tiles(ingest_job)
        chunks = list(tile_index_db.getTaskItems(ingest_job.id))
        assert (len(chunks) == 0)