def invoke_ingest_lambda(self, ingest_job, num_invokes=1): """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated kick through Args: ingest_job: Ingest job object num_invokes(int): number of invocations to fire Returns: """ bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] fake_chunk_key = (BossBackend(self.config)).encode_chunk_key( 16, project_info, ingest_job.resolution, 0, 0, 0, 0) event = { "ingest_job": ingest_job.id, "chunk_key": fake_chunk_key, "lambda-name": "ingest" } # Invoke Ingest lambda functions lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region()) for _ in range(0, num_invokes): lambda_client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(event).encode())
def invoke_ingest_lambda(self, ingest_job, num_invokes=1): """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated kick through Args: ingest_job: Ingest job object num_invokes(int): number of invocations to fire Returns: """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(16, project_info, ingest_job.resolution, 0, 0, 0, 0) event = {"ingest_job": ingest_job.id, "chunk_key": fake_chunk_key, "function-name": INGEST_LAMBDA, "lambda-name": "ingest"} # Invoke Ingest lambda functions lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region()) for _ in range(0, num_invokes): lambda_client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(event).encode())
def _generate_upload_queue_args(self, ingest_job): """ Generate dictionary to include in messages placed in the tile upload queue. Args: ingest_job (IngestJob): Returns: (dict) Raises: (BossError): If ingest_job.ingest_type invalid. """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') args = { 'job_id': ingest_job.id, 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'resolution': ingest_job.resolution, 'project_info': lookup_key.split(CONNECTOR), 'ingest_type': ingest_job.ingest_type, 't_start': ingest_job.t_start, 't_stop': ingest_job.t_stop, 't_tile_size': 1, 'x_start': ingest_job.x_start, 'x_stop': ingest_job.x_stop, 'x_tile_size': ingest_job.tile_size_x, 'y_start': ingest_job.y_start, 'y_stop': ingest_job.y_stop, 'y_tile_size': ingest_job.tile_size_y, 'z_start': ingest_job.z_start, 'z_stop': ingest_job.z_stop, 'z_tile_size': 1 } if ingest_job.ingest_type == IngestJob.TILE_INGEST: # Always the Boss cuboid z size for tile jobs. args['z_chunk_size'] = 16 elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # tile_size_* holds the chunk size dimensions for volumetric jobs. args['z_chunk_size'] = ingest_job.tile_size_z else: raise BossError( "Ingest job's ingest_type has invalid value: {}".format( self.job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE) return args
def populate_upload_queue(self): """Execute the populate_upload_queue Step Function Returns: string: ARN of the StepFunction Execution started Raises: BossError : if there is no valid ingest job """ if self.job is None: raise BossError( "Unable to generate upload tasks for the ingest service. Please specify a ingest job", ErrorCodes.UNABLE_TO_VALIDATE) ingest_job = self.job bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] # TODO DP ???: create IngestJob method that creates the StepFunction arguments? args = { 'upload_sfn': config['sfn']['upload_sfn'], 'job_id': ingest_job.id, 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'resolution': ingest_job.resolution, 'project_info': lookup_key.split(CONNECTER), 't_start': ingest_job.t_start, 't_stop': ingest_job.t_stop, 't_tile_size': 1, 'x_start': ingest_job.x_start, 'x_stop': ingest_job.x_stop, 'x_tile_size': ingest_job.tile_size_x, 'y_start': ingest_job.y_start, 'y_stop': ingest_job.y_stop, 'y_tile_size': ingest_job.tile_size_y, 'z_start': ingest_job.z_start, 'z_stop': ingest_job.z_stop, 'z_tile_size': 16, } session = bossutils.aws.get_session() populate_sfn = config['sfn']['populate_upload_queue'] arn = bossutils.aws.sfn_execute(session, populate_sfn, args) return arn
def generate_upload_tasks(self, job_id=None): """ Generate upload tasks for the ingest job. This creates once task for each tile that has to be uploaded in the ingest queue Args: job_id: Job id of the ingest queue. If not included this takes the current ingest job Returns: None Raises: BossError : if there is no valid ingest job """ if job_id is None and self.job is None: raise BossError( "Unable to generate upload tasks for the ingest service. Please specify a ingest job", ErrorCodes.UNABLE_TO_VALIDATE) elif job_id: # Using the job id to get the job try: ingest_job = IngestJob.objects.get(id=job_id) except IngestJob.DoesNotExist: raise BossError( "Ingest job with id {} does not exist".format(job_id), ErrorCodes.RESOURCE_NOT_FOUND) else: ingest_job = self.job # Generate upload tasks for the ingest job # Get the project information bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] # Batch messages and write to file base_file_name = 'tasks_' + lookup_key + '_' + str(ingest_job.id) self.file_index = 0 # open file f = io.StringIO() header = { 'job_id': ingest_job.id, 'upload_queue_url': ingest_job.upload_queue, 'ingest_queue_url': ingest_job.ingest_queue } f.write(json.dumps(header)) f.write('\n') num_msg_per_file = 0 for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1): # For each time step, compute the chunks and tile keys for z in range(ingest_job.z_start, ingest_job.z_stop, 16): for y in range(ingest_job.y_start, ingest_job.y_stop, ingest_job.tile_size_y): for x in range(ingest_job.x_start, ingest_job.x_stop, ingest_job.tile_size_x): # compute the chunk indices chunk_x = int(x / ingest_job.tile_size_x) chunk_y = int(y / ingest_job.tile_size_y) chunk_z = int(z / 16) # Compute the number of tiles in the chunk if ingest_job.z_stop - z >= 16: num_of_tiles = 16 else: num_of_tiles = ingest_job.z_stop - z # Generate the chunk key chunk_key = (BossBackend( self.config)).encode_chunk_key( num_of_tiles, project_info, ingest_job.resolution, chunk_x, chunk_y, chunk_z, time_step) self.num_of_chunks += 1 # get the tiles keys for this chunk for tile in range(z, z + num_of_tiles): # get the tile key tile_key = (BossBackend( self.config)).encode_tile_key( project_info, ingest_job.resolution, chunk_x, chunk_y, tile, time_step) self.count_of_tiles += 1 # Generate the upload task msg msg = chunk_key + ',' + tile_key + '\n' f.write(msg) num_msg_per_file += 1 # if there are 10 messages in the batch send it to the upload queue. if num_msg_per_file == MAX_NUM_MSG_PER_FILE: fname = base_file_name + '_' + str( self.file_index + 1) + '.txt' self.upload_task_file(fname, f.getvalue()) self.file_index += 1 f.close() # status = self.send_upload_message_batch(batch_msg) fname = base_file_name + '_' + str( self.file_index + 1) + '.txt' f = io.StringIO() header = { 'job_id': ingest_job.id, 'upload_queue_url': ingest_job.upload_queue, 'ingest_queue_url': ingest_job.ingest_queue } f.write(json.dumps(header)) f.write('\n') num_msg_per_file = 0 # Edge case: the last batch size maybe smaller than 10 if num_msg_per_file != 0: fname = base_file_name + '_' + str(self.file_index + 1) + '.txt' self.upload_task_file(fname, f.getvalue()) f.close() self.file_index += 1 num_msg_per_file = 0 # Update status self.job.tile_count = self.count_of_tiles self.job.save()
def generate_upload_tasks(self, job_id=None): """ Args: job_id: Returns: """ if job_id is None and self.job is None: raise BossError( "Unable to generate upload tasks for the ingest service. Please specify a ingest job", ErrorCodes.UNABLE_TO_VALIDATE) elif job_id: # Using the job id to get the job try: ingest_job = IngestJob.objects.get(id=job_id) except IngestJob.DoesNotExist: raise BossError( "Ingest job with id {} does not exist".format(job_id), ErrorCodes.RESOURCE_NOT_FOUND) else: ingest_job = self.job # Generate upload tasks for the ingest job # Get the project information bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel_layer lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1): # For each time step, compute the chunks and tile keys for z in range(ingest_job.z_start, ingest_job.z_stop, 16): for y in range(ingest_job.y_start, ingest_job.y_stop, ingest_job.tile_size_y): for x in range(ingest_job.x_start, ingest_job.x_stop, ingest_job.tile_size_x): # compute the chunk indices chunk_x = int(x / ingest_job.tile_size_x) chunk_y = int(y / ingest_job.tile_size_y) chunk_z = int(z / 16) # Compute the number of tiles in the chunk if ingest_job.z_stop - z >= 16: num_of_tiles = 16 else: num_of_tiles = ingest_job.z_stop - z # Generate the chunk key chunk_key = (BossBackend( self.config)).encode_chunk_key( num_of_tiles, project_info, ingest_job.resolution, chunk_x, chunk_y, chunk_z, time_step) # get the tiles keys for this chunk for tile in range(0, num_of_tiles): # get the tile key tile_key = (BossBackend( self.config)).encode_tile_key( project_info, ingest_job.resolution, chunk_x, chunk_y, tile, time_step) # Generate the upload task msg msg = self.create_upload_task_message( ingest_job.id, chunk_key, tile_key, ingest_job.upload_queue, ingest_job.ingest_queue) # Upload the message self.send_upload_task_message(msg)
def test_upload_tile_index_table(self): """""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk ingest_job = ingest_mgmr.create_ingest_job() assert (ingest_job.id is not None) # Get the chunks in this job # Get the project information bosskey = ingest_job.collection + '&' + ingest_job.experiment + '&' + ingest_job.channel_layer lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] proj_name = ingest_job.collection + '&' + ingest_job.experiment tile_index_db = BossTileIndexDB(proj_name) tilebucket = TileBucket(str(col_id) + '&' + str(exp_id)) for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1): # For each time step, compute the chunks and tile keys for z in range(ingest_job.z_start, ingest_job.z_stop, 16): for y in range(ingest_job.y_start, ingest_job.y_stop, ingest_job.tile_size_y): for x in range(ingest_job.x_start, ingest_job.x_stop, ingest_job.tile_size_x): # compute the chunk indices chunk_x = int(x / ingest_job.tile_size_x) chunk_y = int(y / ingest_job.tile_size_y) chunk_z = int(z / 16) # Compute the number of tiles in the chunk if ingest_job.z_stop - z >= 16: num_of_tiles = 16 else: num_of_tiles = ingest_job.z_stop - z # Generate the chunk key chunk_key = (BossBackend( ingest_mgmr.config)).encode_chunk_key( num_of_tiles, project_info, ingest_job.resolution, chunk_x, chunk_y, chunk_z, time_step) # Upload the chunk to the tile index db tile_index_db.createCuboidEntry( chunk_key, ingest_job.id) key_map = {} for tile in range(0, num_of_tiles): # get the object key and upload it #tile_key = tilebucket.encodeObjectKey(ch_id, ingest_job.resolution, # chunk_x, chunk_y, tile, time_step) tile_key = 'fakekey' + str(tile) tile_index_db.markTileAsUploaded( chunk_key, tile_key) # for each chunk key, delete entries from the tile_bucket # Check if data has been uploaded chunks = list(tile_index_db.getTaskItems(ingest_job.id)) assert (len(chunks) != 0) ingest_mgmr.delete_tiles(ingest_job) chunks = list(tile_index_db.getTaskItems(ingest_job.id)) assert (len(chunks) == 0)