def payload4(task): """ run merge from N inputs input: Makefile.all, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq, {N}reads.tgz, {N}maps.tgz output: bam file + results.tgz? :param task: :return: """ logger.debug("payload4: Start") #### Prepare # Check type of task task_type = task.task_type # Get user user = users_.get(task.owner_id) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) n = 10 if task.params is not None: n = int(task.params) if n == 0: n = 10 # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) # Get container container = Container() container.guid = task.tag conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, 'output.bam', container, 'output') fc.reg_file_in_cont_byname(user, 'myresults.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name + '/' script = "/bin/bash " + swdir + "runmerge.sh -t " + str(n) send_job_(task, container, script) return True
def get_start_task(p): """ Returns start task object :param p: Pipeline obj :return: Task obj """ if not isinstance(p, Pipeline): raise Exception("Illegal pipeline class: not Pipeline") # Create start_task start_task_type = task_types_.first(method='start') start_task = new_task(start_task_type) start_task.owner_id = p.owner_id tasks_.save(start_task) # Update Pipeline obj set_current_task(p, start_task) return start_task
def check_running_tasks(): """ Checks PanDA jobs statuses for all running tasks :return: """ # Get tasks in running state tasks = tasks_.find(status='running') for task in tasks: # Check if tag defined if task.tag is not None and task.tag != "": # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='failed') if jobs.count() > 0: task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = "Failed task due to {n} failed jobs".format( n=jobs.count()) tasks_.save(task) return False # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='canceled') if jobs.count() > 0: task.status = 'cancelled' task.modification_time = datetime.utcnow() tasks_.save(task) return False # Check finished Panda jobs jobs = jobs_.find(tags=task.tag, status='finished') jobs_all = jobs_.find(tags=task.tag) if jobs.count() == jobs_all.count(): # Register files from jobs into task container cont = conts_.get(task.input) for job in jobs: files_catalog = job.container.files for f in files_catalog: if f.type == 'output': # Register file in container fc.reg_file_in_cont(f.file, cont, 'intermediate') # Change task status task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True else: # If tag is not defined task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True
def check_running_tasks(): """ Checks PanDA jobs statuses for all running tasks :return: """ # Get tasks in running state tasks = tasks_.find(status='running') for task in tasks: # Check if tag defined if task.tag is not None and task.tag != "": # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='failed') if jobs.count() > 0: task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = "Failed task due to {n} failed jobs".format(n=jobs.count()) tasks_.save(task) return False # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='canceled') if jobs.count() > 0: task.status = 'cancelled' task.modification_time = datetime.utcnow() tasks_.save(task) return False # Check finished Panda jobs jobs = jobs_.find(tags=task.tag, status='finished') jobs_all = jobs_.find(tags=task.tag) if jobs.count() == jobs_all.count(): # Register files from jobs into task container cont = conts_.get(task.input) for job in jobs: files_catalog = job.container.files for f in files_catalog: if f.type == 'output': # Register file in container fc.reg_file_in_cont(f.file, cont, 'intermediate') # Change task status task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True else: # If tag is not defined task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True
def new_pipeline(): form = NewPipelineForm(request.form) if request.method == 'POST': ifiles = request.form.getlist('iguids[]') current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in ifiles: if item != '': f = files_.first(guid=item) if f is not None: # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') else: pp_cont.status = 'broken' conts_.save(pp_cont) return make_response(jsonify({'error': "GUID {} not found".format(f)})) # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def get_next_task(p): """ Returns next task object :param p: Pipeline obj :return: Task obj """ if not isinstance(p, Pipeline): raise Exception("Illegal pipeline class: not Pipeline") # Fetch current_task current_task_id = p.current_task_id current_task = tasks_.get(current_task_id) if not isinstance(current_task, Task): raise Exception("Unable to fetch current_task by id") current_task_type = current_task.task_type # Fetch pipeline_type pipeline_type = p.pipeline_type if not isinstance(pipeline_type, PipelineType): raise Exception("Illegal pipeline_type class: not PipelineType") # Fetch pipeline_catalog item pp_c = pipeline_catalog_.first(pipeline_type_id=pipeline_type.id, current_task_type_id=current_task_type.id) if not isinstance(pp_c, PipelineCatalog): raise Exception("Unable to fetch PipelineCatalog item") # Return next_task next_task = new_task(pp_c.next_task_type) next_task.owner_id = p.owner_id next_task.input = current_task.input next_task.output = current_task.output next_task.params = current_task.params tasks_.save(next_task) # Update Pipeline obj set_current_task(p, next_task) return next_task
def new_pipeline_from_cont(): form = RunForm(request.form) if request.method == 'POST': icont = conts_.first(guid=form.guid.data) if icont is None: raise WebpandaError("Container not found") current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in icont.files: f = item.file # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def run(task): try: method = task.task_type.method if task.status != 'sent': return False # raise WebpandaError('Illegal task status to start') # Change task state to 'running' task.status = 'preparing' task.modification_time = datetime.utcnow() tasks_.save(task) # Custom payload if method == 'init_task': payload1(task) elif method == 'split_task': payload2(task) elif method == 'run1_task': payload3(task) elif method == 'merge_task': payload4(task) else: raise WebpandaError("Task payload error: method not found") # Change task state to 'finished' task.status = 'running' task.modification_time = datetime.utcnow() tasks_.save(task) return True except WebpandaError as e: # Change task state to 'finished' task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = e.msg tasks_.save(task) return False
def payload3(task): """ run1 - N parallel jobs. {N} = sequence 0..01,0..02,...,N, not less than 2 placeholders #TODO deal with {N}.fastq.bz2 ?? input: Makefile.{N}, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq output: likely reads{N}.tgz, maps{N}.tgz :param task: :return: """ logger.debug("payload3: Start") #### Prepare # Check type of task task_type = task.task_type # if task_type.id != 3or6?: # raise WebpandaError("Illegal task_type.id") # Get user user = users_.get(task.owner_id) n=10 if task.params is not None: n=int(task.params) if n==0: n=10 task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) for jobname in gen_sfx("a",n): # Get container container = Container() container.guid = task.tag + "."+jobname conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, jobname+'.reads.bz2', container, 'output') fc.reg_file_in_cont_byname(user, jobname + '.maps.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash "+swdir+"run11.sh -t " +jobname send_job_(task, container, script) return True
def payload2(task): """ split_task Split input *.1.fastq and *.2.fastq into 'rn' pieces= run panda /bin/bash split.sh :param task: :return: """ logger.debug("payload2: Start") #### Prepare # Check type of task task_type = task.task_type if task_type.id != 1: raise WebpandaError("Illegal task_type.id") logger.debug("payload2: tasktype " + str(task_type.id)) # Get user user = users_.get(task.owner_id) logger.debug("payload2: user " + str(user.id)) # Get containers input_cont = conts_.get(task.input) #TODO do smth with output container? output_cont = conts_.get(task.output) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) logger.debug("payload2: tag " + task.tag) # Get container container = Container() container.guid = task.tag + ".0" conts_.save(container) logger.debug("payload2: cont " + container.guid) script_add = "" rn = 0 # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file if rn == 0: if f.lfn.endswith('fastq'): rn = getn(f.fsize) elif f.lfn.endswith('fastq.bz2'): rn = getn2(f.fsize) for file_template in files_template_list: # TODO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') if f.lfn.endswith('.fastq'): for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fastq.bz2'): for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fasta'): fn=f.lfn+'.' fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output') # itert: validated file has null size #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'): for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'): fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output') script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated") logger.debug("payload2: reg Makefile") #reg additional output for fi in gen_sfx('Makefile.a', rn, '.yaml'): fc.reg_file_in_cont_byname(user, fi, container, 'output') #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2", # "web.it_3bc78e60-241b-418a-a631-2461d4ba1977", # "web.it_1b88049e-463b-4b4f-8454-9587301a53e5", # "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e", # "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56", # "web.it_3930f596-25ea-49b0-8943-7a83c84c7940", # "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346", # "web.it_211f2187-41f2-489f-ba63-73f004f21c66" # ] #for guid in guids: # fc.reg_file_in_cont(files_.first(guid=guid), container, 'input') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;" script += "/bin/bash " + swdir + "split.sh -t " + str(rn) script += script_add # Save rn as task param task.params = str(rn) tasks_.save(task) logger.debug("payload2: script " + script) logger.debug("payload2: send_job " + container.guid) send_job_(task, container, script) return True
def run(): """ Starts current defined task :return: """ print "main started" # Fetch pipelines (init state) #TODO add SQL filter on status if possible pipelines = pipelines_.all() for pipeline in pipelines: # Check if finished if pipeline.status in ['finished', 'failed', 'cancelled']: continue # Fetch task object current_task = pclient.get_current_task(pipeline) if current_task is None: return WebpandaError('Illegal task ID') if current_task.status == 'failed': #TODO: What to do if failed? pipeline.status = 'failed' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'cancelled': #TODO: What to do if cancelled? Who or by whom? If by system - resubmit, if by user -nothing? pipeline.status = 'cancelled' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'finished': # Get next_task current_task = pclient.get_next_task(pipeline) if current_task.status == 'defined': if current_task.task_type.method == 'start': # Do some general pipeline checks current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) continue elif current_task.task_type.method == 'finish': current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) # Process system finish task pipeline.status = 'finished' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue else: # Process system start task # Do some checks if it usefull - or we have all files already, or there would be never enough of them. if not paleomix.has_input(current_task): current_task.status = "failed" current_task.modification_time = datetime.utcnow() current_task.comment = "Input files check failed" tasks_.save(current_task) pipeline.status = 'failed' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue # Run task if defined current_task.status = 'sent' tasks_.save(current_task) #TO_DO: Run async regime paleomix.run(current_task) #, current_task.task_type.method) - we already get task from id. Not need to obtain again, is it? # if we use async run, all params must be serializable (BaseQuery is not) continue
def run(): """ Starts current defined task :return: """ print "main started" # Fetch pipelines (init state) #TODO add SQL filter on status if possible pipelines = pipelines_.all() for pipeline in pipelines: # Check if finished if pipeline.status in ['finished', 'failed', 'cancelled']: continue # Fetch task object current_task = pclient.get_current_task(pipeline) if current_task is None: return WebpandaError('Illegal task ID') if current_task.status == 'failed': #TODO: What to do if failed? pipeline.status = 'failed' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'cancelled': #TODO: What to do if cancelled? Who or by whom? If by system - resubmit, if by user -nothing? pipeline.status = 'cancelled' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'finished': # Get next_task current_task = pclient.get_next_task(pipeline) if current_task.status == 'defined': if current_task.task_type.method == 'start': # Do some general pipeline checks current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) continue elif current_task.task_type.method == 'finish': current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) # Process system finish task pipeline.status = 'finished' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue else: # Process system start task # Do some checks if it usefull - or we have all files already, or there would be never enough of them. if not paleomix.has_input(current_task): current_task.status = "failed" current_task.modification_time = datetime.utcnow() current_task.comment = "Input files check failed" tasks_.save(current_task) pipeline.status = 'failed' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue # Run task if defined current_task.status = 'sent' tasks_.save(current_task) #TO_DO: Run async regime paleomix.run( current_task ) #, current_task.task_type.method) - we already get task from id. Not need to obtain again, is it? # if we use async run, all params must be serializable (BaseQuery is not) continue