def compute_task(self, channel_name, tuple_type, subtuple, compute_plan_key): try: worker = self.request.hostname.split('@')[1] queue = self.request.delivery_info['routing_key'] except Exception: worker = f"{settings.ORG_NAME}.worker" queue = f"{settings.ORG_NAME}" result = { 'worker': worker, 'queue': queue, 'compute_plan_key': compute_plan_key } try: prepare_materials(channel_name, subtuple, tuple_type) res = do_task(channel_name, subtuple, tuple_type) result['result'] = res except Exception as e: raise self.retry( exc=e, countdown=int(getattr(settings, 'CELERY_TASK_RETRY_DELAY_SECONDS')), max_retries=int(getattr(settings, 'CELERY_TASK_MAX_RETRIES'))) finally: if settings.TASK['CLEAN_EXECUTION_ENVIRONMENT']: try: subtuple_directory = get_subtuple_directory(subtuple['key']) if os.path.exists(subtuple_directory): remove_subtuple_materials(subtuple_directory) except Exception as e_removal: logger.exception(e_removal) return result
def prepare_materials(channel_name, subtuple, tuple_type): logger.info( f'Prepare materials for task [{tuple_type}:{subtuple["key"]}]: Started.' ) # clean directory if exists (on retry) subtuple_directory = get_subtuple_directory(subtuple['key']) if os.path.exists(subtuple_directory): remove_subtuple_materials(subtuple_directory) # create directory directory = build_subtuple_folders(subtuple) # metrics if tuple_type == TESTTUPLE_TYPE: prepare_objective(channel_name, directory, subtuple) # algo traintuple_type = (subtuple['traintuple_type'] if tuple_type == TESTTUPLE_TYPE else tuple_type) prepare_algo(channel_name, directory, traintuple_type, subtuple) # opener if tuple_type in (TESTTUPLE_TYPE, TRAINTUPLE_TYPE, COMPOSITE_TRAINTUPLE_TYPE): prepare_opener(directory, subtuple) prepare_data_sample(directory, subtuple) # input models prepare_models(channel_name, directory, tuple_type, subtuple) logger.info( f'Prepare materials for task [{tuple_type}:{subtuple["key"]}]: Success. {list_files(directory)}' )
def build_subtuple_folders(subtuple): # create a folder named `subtuple['key']` in /medias/subtuple/ with 5 subfolders opener, data, model, pred, metrics subtuple_directory = get_subtuple_directory(subtuple['key']) create_directory(subtuple_directory) for folder in [ 'opener', 'data', 'model', 'output_model', 'pred', 'perf', 'metrics', 'export' ]: create_directory(path.join(subtuple_directory, folder)) return subtuple_directory
def do_task(channel_name, subtuple, tuple_type): subtuple_directory = get_subtuple_directory(subtuple['key']) # compute plan / federated learning variables compute_plan_key = None rank = None compute_plan_tag = None if 'compute_plan_key' in subtuple and subtuple['compute_plan_key']: compute_plan_key = subtuple['compute_plan_key'] rank = int(subtuple['rank']) compute_plan = get_object_from_ledger(channel_name, compute_plan_key, 'queryComputePlan') compute_plan_tag = compute_plan['tag'] common_volumes, compute_volumes = prepare_volumes(subtuple_directory, tuple_type, compute_plan_key, compute_plan_tag) # Add node index to environment variable for the compute node_index = os.getenv('NODE_INDEX') if node_index: environment = {'NODE_INDEX': node_index} else: environment = {} # Use tag to tranfer or not performances and models tag = subtuple.get("tag") if tuple_type == TESTTUPLE_TYPE: if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag: environment['TESTTUPLE_TAG'] = TAG_VALUE_FOR_TRANSFER_BUCKET job_name = f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-{TUPLE_COMMANDS[tuple_type]}'.lower( ) command = generate_command(tuple_type, subtuple, rank) # train or predict compute_job(subtuple_key=subtuple["key"], compute_plan_key=compute_plan_key, dockerfile_path=subtuple_directory, image_name=get_algo_image_name(subtuple['algo']['key']), job_name=job_name, volumes={ **common_volumes, **compute_volumes }, command=command, remove_image=compute_plan_key is None and not settings.TASK['CACHE_DOCKER_IMAGES'], remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'], capture_logs=settings.TASK['CAPTURE_LOGS'], environment=environment) # Handle model and result from tuple models = save_models(subtuple_directory, tuple_type, subtuple['key']) # Can be empty if testtuple result = extract_result_from_models(tuple_type, models) # Can be empty if testtuple # Evaluation if tuple_type == TESTTUPLE_TYPE: # We set pred folder to ro during evalutation pred_path = path.join(subtuple_directory, 'pred') common_volumes[pred_path]['mode'] = 'ro' # eval compute_job( subtuple_key=subtuple["key"], compute_plan_key=compute_plan_key, dockerfile_path=f'{subtuple_directory}/metrics', image_name=f'substra/metrics_{subtuple["objective"]["key"][0:8]}'. lower(), job_name= f'{tuple_type.replace("_", "-")}-{subtuple["key"][0:8]}-eval'. lower(), volumes=common_volumes, command=f'--output-perf-path {OUTPUT_PERF_PATH}', remove_image=compute_plan_key is None and not settings.TASK['CACHE_DOCKER_IMAGES'], remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'], capture_logs=settings.TASK['CAPTURE_LOGS'], environment=environment) pred_path = path.join(subtuple_directory, 'pred') export_path = path.join(subtuple_directory, 'export') perf_path = path.join(subtuple_directory, 'perf') # load performance with open(path.join(perf_path, 'perf.json'), 'r') as perf_file: perf = json.load(perf_file) result['global_perf'] = perf['all'] if tag and TAG_VALUE_FOR_TRANSFER_BUCKET in tag: transfer_to_bucket(subtuple['key'], [pred_path, perf_path, export_path]) return result