def __init__(self, name, active=True, batch_args=None, submission_args=None): ''' Parameters ---------- name: str name of the step active: bool, optional whether the step should be processed batch_args: tmlib.workflow.args.BatchArguments, optional batch arguments submission_args: tmlib.workflow.args.SubmissionArguments, optional submission arguments Raises ------ WorkflowDescriptionError when a provided argument is not a valid argument for the given step ''' self.name = str(name) self.fullname, self.help = get_step_information(name) self.active = active BatchArgs, SubmissionArgs = get_step_args(name) if batch_args is None: self.batch_args = BatchArgs() else: self.batch_args = batch_args if submission_args is None: self.submission_args = SubmissionArgs() else: self.submission_args = submission_args
def __init__(self, type, name, mode='sequential', active=True, steps=None): ''' Parameters ---------- type: str name of the workflow type name: str name of the stage mode: str, optional mode of workflow stage submission, i.e. whether steps are submitted simultaneously or one after another (options: ``{"sequential", "parallel"}``) active: bool, optional whether the stage should be processed steps: List[dict] description of steps in form of key-value pairs Raises ------ TypeError when `name` or `steps` have the wrong type ''' self.type = type self.dependencies = get_workflow_dependencies(self.type) self.name = str(name) self.mode = mode self.active = active if self.mode not in {'parallel', 'sequential'}: raise ValueError( 'Attribute "mode" must be either "parallel" or "sequential"' ) self.steps = list() if steps is not None: for step in steps: BatchArgs, SubmissionArgs = get_step_args(step['name']) batch_arg_values = { a['name']: a['value'] for a in step['batch_args'] } batch_args = BatchArgs(**batch_arg_values) submission_arg_values = { a['name']: a['value'] for a in step['submission_args'] } submission_args = SubmissionArgs(**submission_arg_values) # NOTE: not every step has extra arguments self.add_step( WorkflowStepDescription( step['name'], step.get('active', True), batch_args, submission_args ) ) else: for name in self.dependencies.STEPS_PER_STAGE[self.name]: self.add_step( WorkflowStepDescription(name, True) )
def __init__(self, type, name, mode='sequential', active=True, steps=None): ''' Parameters ---------- type: str name of the workflow type name: str name of the stage mode: str, optional mode of workflow stage submission, i.e. whether steps are submitted simultaneously or one after another (options: ``{"sequential", "parallel"}``) active: bool, optional whether the stage should be processed steps: List[dict] description of steps in form of key-value pairs Raises ------ TypeError when `name` or `steps` have the wrong type ''' self.type = type self.dependencies = get_workflow_dependencies(self.type) self.name = str(name) self.mode = mode self.active = active if self.mode not in {'parallel', 'sequential'}: raise ValueError( 'Attribute "mode" must be either "parallel" or "sequential"' ) self.steps = [] if steps is not None: for step in steps: BatchArgs, SubmissionArgs = get_step_args(step['name']) batch_arg_values = { a['name']: a['value'] for a in step['batch_args'] } batch_args = BatchArgs(**batch_arg_values) submission_arg_values = { a['name']: a['value'] for a in step['submission_args'] } submission_args = SubmissionArgs(**submission_arg_values) # NOTE: not every step has extra arguments self.add_step( WorkflowStepDescription( step['name'], step.get('active', True), batch_args, submission_args ) ) else: for name in self.dependencies.STEPS_PER_STAGE[self.name]: self.add_step( WorkflowStepDescription(name, True) )
def run_jobs(experiment_id): '''Runs one or more jobs of the current project with pipeline and module descriptions provided by the UI. This requires the pipeline and module descriptions to be saved to *pipe* and *handles* files, respectively. ''' logger.info('submit jobs for jterator project of experiment %d', experiment_id) data = json.loads(request.data) job_ids = map(int, data['job_ids']) project = yaml.load(data['project']) pipeline_description = PipelineDescription( **project['pipe']['description']) handles_descriptions = { h['name']: HandleDescriptions(**h['description']) for h in project['handles'] } jt = ImageAnalysisPipelineEngine( experiment_id, pipeline_description=pipeline_description, handles_descriptions=handles_descriptions, ) # 1. Delete figures and logs from previous submission # since they are not tracked per submission. jt.remove_previous_pipeline_output() # TODO: remove figure files of previous runs!! # 2. Build job descriptions channel_names = [ ch.name for ch in jt.project.pipe.description.input.channels ] job_descriptions = list() with tm.utils.ExperimentSession(experiment_id) as session: sites = session.query(tm.Site.id).\ order_by(tm.Site.id).\ all() for j in job_ids: site_id = sites[j].id image_file_count = session.query(tm.ChannelImageFile.id).\ join(tm.Channel).\ filter(tm.Channel.name.in_(channel_names)).\ filter(tm.ChannelImageFile.site_id == site_id).\ count() if image_file_count == 0: raise MalformedRequestError( 'No images found for job ID {j}.'.format(j=j)) job_descriptions.append({'site_id': site_id, 'plot': True}) with tm.utils.MainSession() as session: submission = tm.Submission(experiment_id=experiment_id, program='jtui', user_id=current_identity.id) session.add(submission) session.flush() SubmitArgs = get_step_args('jterator')[1] submit_args = SubmitArgs() job_collection = jt.create_debug_run_phase(submission.id) jobs = jt.create_debug_run_jobs(user_name=current_identity.name, batches=job_descriptions, job_collection=job_collection, verbosity=2, duration=submit_args.duration, memory=submit_args.memory, cores=submit_args.cores) # 3. Store jobs in session gc3pie.store_task(jobs) # session.remove(data['previousSubmissionId']) gc3pie.submit_task(jobs) return jsonify(submission_id=jobs.submission_id)
def __init__(cls, clsname, bases, attrs): super(_CliMeta, cls).__init__(clsname, bases, attrs) if '__abstract__' in vars(cls).keys(): return pkg_name = '.'.join(cls.__module__.split('.')[:-1]) pkg = importlib.import_module(pkg_name) cls.__doc__ = pkg.__description__ cls.__logo__ = pkg.__logo__ parser = argparse.ArgumentParser() parser.description = pkg.__description__ parser.version = __version__ # The parser for each step receives at least two arguments, which are # passed to the corresponding API class. parser.add_argument( 'experiment_id', type=int, help='ID of the experiment that should be processed') parser.add_argument('--verbosity', '-v', action='count', default=0, help='increase logging verbosity') # Extra arguments are added to the main parser as well because they # also need to be parssed to the constructor of the API class. step_name = cls.__name__.lower() BatchArgs, SubmissionArgs = get_step_args(step_name) subparsers = parser.add_subparsers(dest='method', help='methods') subparsers.required = True # flags = collections.defaultdict(list) for attr_name in dir(cls): if attr_name.startswith('__'): continue attr_value = getattr(cls, attr_name) # The climethod decorator provides argument descriptions via # the "args" attribute of the decoreated method. # These arguments are added to the method-specific subparser. if isinstance(attr_value, types.MethodType): if getattr(attr_value, 'is_climethod', False): method_parser = subparsers.add_parser(attr_name, help=attr_value.help) method_parser.description = attr_value.help for arg in attr_value.args.iterargs(): arg.add_to_argparser(method_parser) # if arg.flag is not None: # flags[attr_name].append(arg.flag) # if arg.short_flag is not None: # flags[attr_name].append(arg.short_flag) # The "init" and "submit" methods require additional arguments # that also need to be accessible outside the scope of the # command line interface. Therefore, they are handled separately. # Each workflow step must implement BatchArguments and # SubmissionArguments and register them using the batch_args and # submission_args decorator, respectively. # These arguments are added to the corresponding method-specific # subparser as a separate group to highlight that they represent a # different type of argument. def add_step_specific_method_args(step_name, method_name, args_class): method_parser = subparsers.choices[method_name] parser_group = method_parser.add_argument_group( 'step-specific arguments') for arg in args_class.iterargs(): arg.add_to_argparser(parser_group) # if arg.flag is not None: # flags[attr_name].append(arg.flag) # if arg.short_flag is not None: # flags[attr_name].append(arg.short_flag) add_step_specific_method_args(step_name, 'init', BatchArgs) setattr(cls, '_batch_args_class', BatchArgs) add_step_specific_method_args(step_name, 'submit', SubmissionArgs) setattr(cls, '_submission_args_class', SubmissionArgs) api = get_step_api(step_name) setattr(cls, '_api_class', api) setattr(cls, '_parser', parser)
def run_jobs(experiment_id): '''Runs one or more jobs of the current project with pipeline and module descriptions provided by the UI. This requires the pipeline and module descriptions to be saved to *pipe* and *handles* files, respectively. ''' logger.info( 'submit jobs for jterator project of experiment %d', experiment_id ) data = json.loads(request.data) job_ids = map(int, data['job_ids']) project = yaml.load(data['project']) pipeline_description = PipelineDescription(**project['pipe']['description']) handles_descriptions = { h['name']: HandleDescriptions(**h['description']) for h in project['handles'] } jt = ImageAnalysisPipelineEngine( experiment_id, pipeline_description=pipeline_description, handles_descriptions=handles_descriptions, ) # 1. Delete figures and logs from previous submission # since they are not tracked per submission. jt.remove_previous_pipeline_output() # TODO: remove figure files of previous runs!! # 2. Build job descriptions channel_names = [ ch.name for ch in jt.project.pipe.description.input.channels ] object_names = [ ob.name for ob in jt.project.pipe.description.input.objects ] job_descriptions = list() with tm.utils.ExperimentSession(experiment_id) as session: sites = session.query(tm.Site.id).\ order_by(tm.Site.id).\ all() for j in job_ids: site_id = sites[j-1].id # user-input is expected between [1..] image_file_count = 0 image_file_count += session.query(tm.ChannelImageFile.id).\ join(tm.Channel).\ filter(tm.Channel.name.in_(channel_names)).\ filter(tm.ChannelImageFile.site_id == site_id).\ count() image_file_count += session.query(tm.ChannelImageFile.id).\ join(tm.Site).\ join(tm.Well).\ join(tm.Plate).\ join(tm.Experiment).\ join(tm.MapobjectType).\ filter(tm.MapobjectType.name.in_(object_names)).\ filter(tm.ChannelImageFile.site_id == site_id).\ count() if image_file_count == 0: raise MalformedRequestError( 'No images found for job ID {j}.' .format(j=j)) job_descriptions.append({'site_id': site_id, 'plot': True}) with tm.utils.MainSession() as session: submission = tm.Submission( experiment_id=experiment_id, program='jtui', user_id=current_identity.id ) session.add(submission) session.flush() SubmitArgs = get_step_args('jterator')[1] submit_args = SubmitArgs() job_collection = jt.create_debug_run_phase(submission.id) jobs = jt.create_debug_run_jobs( user_name=current_identity.name, batches=job_descriptions, job_collection=job_collection, verbosity=2, duration=submit_args.duration, memory=submit_args.memory, cores=submit_args.cores ) # 3. Store jobs in session gc3pie.store_task(jobs) # session.remove(data['previousSubmissionId']) gc3pie.submit_task(jobs) return jsonify(submission_id=jobs.submission_id)