def payload4(task): """ run merge from N inputs input: Makefile.all, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq, {N}reads.tgz, {N}maps.tgz output: bam file + results.tgz? :param task: :return: """ logger.debug("payload4: Start") #### Prepare # Check type of task task_type = task.task_type # Get user user = users_.get(task.owner_id) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) n = 10 if task.params is not None: n = int(task.params) if n == 0: n = 10 # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) # Get container container = Container() container.guid = task.tag conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, 'output.bam', container, 'output') fc.reg_file_in_cont_byname(user, 'myresults.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name + '/' script = "/bin/bash " + swdir + "runmerge.sh -t " + str(n) send_job_(task, container, script) return True
def check_running_tasks(): """ Checks PanDA jobs statuses for all running tasks :return: """ # Get tasks in running state tasks = tasks_.find(status='running') for task in tasks: # Check if tag defined if task.tag is not None and task.tag != "": # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='failed') if jobs.count() > 0: task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = "Failed task due to {n} failed jobs".format( n=jobs.count()) tasks_.save(task) return False # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='canceled') if jobs.count() > 0: task.status = 'cancelled' task.modification_time = datetime.utcnow() tasks_.save(task) return False # Check finished Panda jobs jobs = jobs_.find(tags=task.tag, status='finished') jobs_all = jobs_.find(tags=task.tag) if jobs.count() == jobs_all.count(): # Register files from jobs into task container cont = conts_.get(task.input) for job in jobs: files_catalog = job.container.files for f in files_catalog: if f.type == 'output': # Register file in container fc.reg_file_in_cont(f.file, cont, 'intermediate') # Change task status task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True else: # If tag is not defined task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True
def check_running_tasks(): """ Checks PanDA jobs statuses for all running tasks :return: """ # Get tasks in running state tasks = tasks_.find(status='running') for task in tasks: # Check if tag defined if task.tag is not None and task.tag != "": # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='failed') if jobs.count() > 0: task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = "Failed task due to {n} failed jobs".format(n=jobs.count()) tasks_.save(task) return False # Check failed Panda jobs jobs = jobs_.find(tags=task.tag, status='canceled') if jobs.count() > 0: task.status = 'cancelled' task.modification_time = datetime.utcnow() tasks_.save(task) return False # Check finished Panda jobs jobs = jobs_.find(tags=task.tag, status='finished') jobs_all = jobs_.find(tags=task.tag) if jobs.count() == jobs_all.count(): # Register files from jobs into task container cont = conts_.get(task.input) for job in jobs: files_catalog = job.container.files for f in files_catalog: if f.type == 'output': # Register file in container fc.reg_file_in_cont(f.file, cont, 'intermediate') # Change task status task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True else: # If tag is not defined task.status = 'finished' task.modification_time = datetime.utcnow() tasks_.save(task) return True
def new_pipeline(): form = NewPipelineForm(request.form) if request.method == 'POST': ifiles = request.form.getlist('iguids[]') current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in ifiles: if item != '': f = files_.first(guid=item) if f is not None: # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') else: pp_cont.status = 'broken' conts_.save(pp_cont) return make_response(jsonify({'error': "GUID {} not found".format(f)})) # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def new_pipeline_from_cont(): form = RunForm(request.form) if request.method == 'POST': icont = conts_.first(guid=form.guid.data) if icont is None: raise WebpandaError("Container not found") current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in icont.files: f = item.file # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def payload2(task): """ split_task Split input *.1.fastq and *.2.fastq into 'rn' pieces= run panda /bin/bash split.sh :param task: :return: """ logger.debug("payload2: Start") #### Prepare # Check type of task task_type = task.task_type if task_type.id != 1: raise WebpandaError("Illegal task_type.id") logger.debug("payload2: tasktype " + str(task_type.id)) # Get user user = users_.get(task.owner_id) logger.debug("payload2: user " + str(user.id)) # Get containers input_cont = conts_.get(task.input) #TODO do smth with output container? output_cont = conts_.get(task.output) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) logger.debug("payload2: tag " + task.tag) # Get container container = Container() container.guid = task.tag + ".0" conts_.save(container) logger.debug("payload2: cont " + container.guid) script_add = "" rn = 0 # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file if rn == 0: if f.lfn.endswith('fastq'): rn = getn(f.fsize) elif f.lfn.endswith('fastq.bz2'): rn = getn2(f.fsize) for file_template in files_template_list: # TODO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') if f.lfn.endswith('.fastq'): for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fastq.bz2'): for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fasta'): fn=f.lfn+'.' fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output') # itert: validated file has null size #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'): for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'): fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output') script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated") logger.debug("payload2: reg Makefile") #reg additional output for fi in gen_sfx('Makefile.a', rn, '.yaml'): fc.reg_file_in_cont_byname(user, fi, container, 'output') #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2", # "web.it_3bc78e60-241b-418a-a631-2461d4ba1977", # "web.it_1b88049e-463b-4b4f-8454-9587301a53e5", # "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e", # "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56", # "web.it_3930f596-25ea-49b0-8943-7a83c84c7940", # "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346", # "web.it_211f2187-41f2-489f-ba63-73f004f21c66" # ] #for guid in guids: # fc.reg_file_in_cont(files_.first(guid=guid), container, 'input') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;" script += "/bin/bash " + swdir + "split.sh -t " + str(rn) script += script_add # Save rn as task param task.params = str(rn) tasks_.save(task) logger.debug("payload2: script " + script) logger.debug("payload2: send_job " + container.guid) send_job_(task, container, script) return True
def send_job(jobid, siteid): _logger.debug('Jobid: ' + str(jobid)) site = sites_.get(siteid) job = jobs_.get(int(jobid)) cont = job.container files_catalog = cont.files fscope = getScope(job.owner.username) datasetName = '{}:{}'.format(fscope, cont.guid) distributive = job.distr.name release = job.distr.release # Prepare runScript parameters = job.distr.command parameters = parameters.replace("$COMMAND$", job.params) parameters = parameters.replace("$USERNAME$", job.owner.username) parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group) # Prepare metadata metadata = dict(user=job.owner.username) # Prepare PanDA Object pandajob = JobSpec() pandajob.jobDefinitionID = int(time.time()) % 10000 pandajob.jobName = cont.guid pandajob.transformation = client_config.DEFAULT_TRF pandajob.destinationDBlock = datasetName pandajob.destinationSE = site.se pandajob.currentPriority = 1000 pandajob.prodSourceLabel = 'user' pandajob.computingSite = site.ce pandajob.cloud = 'RU' pandajob.VO = 'atlas' pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName) pandajob.coreCount = job.corecount pandajob.metadata = json.dumps(metadata) #pandajob.workingGroup = job.owner.working_group if site.encode_commands: # It requires script wrapper on cluster side pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release, distributive, parameters) else: pandajob.jobParameters = parameters has_input = False for fcc in files_catalog: if fcc.type == 'input': f = fcc.file guid = f.guid fileIT = FileSpec() fileIT.lfn = f.lfn fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = guid pandajob.addFile(fileIT) has_input = True if fcc.type == 'output': f = fcc.file fileOT = FileSpec() fileOT.lfn = f.lfn fileOT.destinationDBlock = pandajob.prodDBlock fileOT.destinationSE = pandajob.destinationSE fileOT.dataset = pandajob.prodDBlock fileOT.type = 'output' fileOT.scope = fscope fileOT.GUID = f.guid pandajob.addFile(fileOT) # Save replica meta fc.new_replica(f, site) if not has_input: # Add fake input fileIT = FileSpec() fileIT.lfn = "fake.input" fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = "fake.guid" pandajob.addFile(fileIT) # Prepare lof file fileOL = FileSpec() fileOL.lfn = "%s.log.tgz" % pandajob.jobName fileOL.destinationDBlock = pandajob.destinationDBlock fileOL.destinationSE = pandajob.destinationSE fileOL.dataset = '{}:logs'.format(fscope) fileOL.type = 'log' fileOL.scope = 'panda' pandajob.addFile(fileOL) # Save log meta log = File() log.scope = fscope log.lfn = fileOL.lfn log.guid = getGUID(log.scope, log.lfn) log.type = 'log' log.status = 'defined' files_.save(log) # Save replica meta fc.new_replica(log, site) # Register file in container fc.reg_file_in_cont(log, cont, 'log') # Submit job o = submitJobs([pandajob]) x = o[0] try: #update PandaID PandaID = int(x[0]) job.pandaid = PandaID job.ce = site.ce except: job.status = 'submit_error' jobs_.save(job) return 0
def new_job(): """Creates new job """ g.user = request.oauth.user scope = getScope(request.oauth.user.username) js = request.json data = js['data'] distr_id = data['sw_id'] params = data['script'] corecount = data['cores'] site = sites_.first(ce=current_app.config['DEFAULT_CE']) distr = distrs_.get(id) container = Container() guid = 'job.' + commands.getoutput('uuidgen') container.guid = guid container.status = 'open' conts_.save(container) # Process ftp files if 'ftp_dir' in data.keys(): ftp_dir = data['ftp_dir'] register_ftp_files(ftp_dir, scope, container.guid) # Process guid list if 'guids' in data.keys(): guids = data['guids'] for f in guids: if f != '': file_ = files_.first(guid=f) if file_ is not None: # Register file in catalog fc.reg_file_in_cont(file_, container, 'input') else: raise WebpandaError('File with guid %s not found' % f) ofiles = ['results.tgz'] # Starts cloneReplica tasks ftasks = prepareInputFiles(container.id, site.se) # Saves output files meta for lfn in ofiles: file = File() file.scope = scope file.guid = getGUID(scope, lfn) file.lfn = lfn file.status = 'defined' files_.save(file) # Register file in catalog fc.reg_file_in_cont(file, container, 'output') # Counts files allfiles = container.files nifiles = 0 nofiles = 0 for f in allfiles: if f.type == 'input': nifiles += 1 if f.type == 'output': nofiles += 1 # Defines job meta job = Job() job.pandaid = None job.status = 'pending' job.owner = request.oauth.user job.params = params job.distr = distr job.container = container job.creation_time = datetime.utcnow() job.modification_time = datetime.utcnow() job.ninputfiles = nifiles job.noutputfiles = nofiles job.corecount = corecount job.tags = data['tags'] if 'tags' in data.keys() else "" jobs_.save(job) # Async sendjob res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id)) return {'id': job.id, 'container_id': guid}
def upload(): form = request.form # Create a unique container quid for this particular batch of uploads. cguid = 'job.' + commands.getoutput('uuidgen') # Is the upload using Ajax, or a direct POST by the form? is_ajax = False if form.get("__ajax", None) == "true": is_ajax = True # Create new container container = Container() container.guid = cguid container.status = 'open' conts_.save(container) # Process files in request for upload in request.files.getlist("file"): # Define file params lfn = upload.filename.rsplit("/")[0] scope = getScope(g.user.username) guid = getGUID(scope, lfn) site = sites_.first(se=current_app.config['DEFAULT_SE']) # Target folder for these uploads. dir = '/' + os.path.join('system', scope, guid) target = site.datadir + dir try: os.makedirs(target) except: if is_ajax: return ajax_response(False, "Couldn't create upload directory: %s" % target) else: return "Couldn't create upload directory: %s" % target replfn = os.path.join(dir, lfn) destination = os.path.join(target, lfn) upload.save(destination) if os.path.isfile(destination): # Check file existence in catalog adler = adler32(destination) md5 = md5sum(destination) size = fsize(destination) file_id = ddm_checkifexists(lfn, size, adler, md5) if file_id: # If file exists file = files_.get(file_id) else: # Otherwise create new file = File() file.scope = scope file.guid = guid file.type = 'input' file.lfn = lfn file.token = '' file.status = 'defined' files_.save(file) setFileMeta(file.id, destination) replica = Replica() replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.original = file replicas_.save(replica) # Register file in container fc.reg_file_in_cont(file, container, 'input') else: return ajax_response(False, "Couldn't save file: %s" % target) if is_ajax: return ajax_response(True, cguid) else: return redirect(url_for("jobs.jobs"))
def file_save(container_guid, lfn): """ POST: /pilot/file/<container_guid>/<lfn>/save Saves file from request, returns file guid :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: guid :rtype: json """ site = sites_.first(se=current_app.config['DEFAULT_SE']) if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) if container.status != 'open': raise WebpandaError('Unable to upload: Container is not open') cc = container.files ff = None for c in cc: f = c.file if f.lfn == lfn: ff = f if not ff: ff = File() ff.scope = getScope(g.user.username) ff.lfn = lfn ff.guid = getGUID(ff.scope, ff.lfn) ff.status = 'defined' files_.save(ff) # Register file in container fc.reg_file_in_cont(ff, container, 'input') path = os.path.join(site.datadir, getScope(g.user.username), container.guid) replfn = '/' + os.path.join(getScope(g.user.username), container.guid, ff.lfn) destination = os.path.join(path, ff.lfn) for r in ff.replicas: if r.se == site.se: destination = site.datadir + r.lfn file_dir = '/'.join(destination.split('/')[:-1]) if r.status == 'ready': if os.path.isfile(destination): # Check fsize, md5 or adler raise WebpandaError('Replica exists') else: r.status = 'broken' replicas_.save(r) raise WebpandaError('Broken replica') elif r.status == 'defined': try: os.makedirs(file_dir) except(Exception): pass f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) r.status = 'ready' replicas_.save(r) return {'guid': ff.guid} else: raise WebpandaError('Replica status: %s' % r.status) replica = Replica() if os.path.isfile(destination): raise WebpandaError('Unable to upload: File exists') try: os.makedirs(path) except(Exception): _logger.debug('Path exists: %s' % path) f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) # Create/change replica replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.token = '' replica.original = ff replicas_.save(replica) return {'guid': ff.guid}
def upload_dir(user_id, cont_id, se_id, path): """ Uploads files from external dir path into defined contained :param cont_id: id of Container :param se_id: id of SE :param path: dir path on SE :return: """ user = users_.get(user_id) cont = conts_.get(cont_id) se = sites_.get(se_id) # Initialize SE connector print "=Initialize SE connector" _logger.debug("=Initialize SE connector") conn_factory = SEFactory() connector = conn_factory.getSE(se.plugin, None) # Fetch list of files print "=Fetch list of files" _logger.debug("=Fetch list of files") try: list_of_lfn = connector.ls(path, rel=False) for item in list_of_lfn: # Check empty items if item == "": list_of_lfn.remove(item) except: raise WebpandaError("Unable to get list of files from SE: " + str(se_id)) print "=" + str(len(list_of_lfn)) _logger.debug("=" + str(len(list_of_lfn))) # Create list of File objs print "=Create list of File objs" _logger.debug("=Create list of File objs") list_of_obj = list() for item in list_of_lfn: list_of_obj.append(fc.new_file(user, item)) print "=" + str(len(list_of_obj)) _logger.debug("=" + str(len(list_of_obj))) # Iterate through files objects print "=IterateLoop:Start" _logger.debug("=IterateLoop:Start") for item in list_of_obj: # Add files to container: print "=Add file to container" _logger.debug("=Add file to container") fc.reg_file_in_cont(item, cont, 'intermediate') # Copy files into system dir print "=Copy file into system dir" _logger.debug("=Copy file into system dir") connector.link(os.path.join(path, item.lfn), fc.get_file_dir(item), rel=False) # Calculate fsize, adler32, md5hash print "=Calculate fsize, adler32, md5hash" _logger.debug("=Calculate fsize, adler32, md5hash") item.fsize = connector.fsize(fc.get_file_path(item)) item.md5sum = connector.md5sum(fc.get_file_path(item)) item.checksum = connector.adler32(fc.get_file_path(item)) fc.save(item) # Create list of Replica objs print "=Create Replica object" _logger.debug("=Create Replica object") r = fc.new_replica(item, se) r.status = 'ready' fc.save(r) # Update files' status print "=Update files' status" _logger.debug("=Update files' status") item.status = 'ready' fc.save(item) print "=IterateLoop:Finish" _logger.debug("=IterateLoop:Finish") # Return container id print "=Return container id" _logger.debug("=Return container id") return cont_id
def upload(): form = request.form # Create a unique container quid for this particular batch of uploads. cguid = 'job.' + commands.getoutput('uuidgen') # Is the upload using Ajax, or a direct POST by the form? is_ajax = False if form.get("__ajax", None) == "true": is_ajax = True # Create new container container = Container() container.guid = cguid container.status = 'open' conts_.save(container) # Process files in request for upload in request.files.getlist("file"): # Define file params lfn = upload.filename.rsplit("/")[0] scope = getScope(g.user.username) guid = getGUID(scope, lfn) site = sites_.first(se=current_app.config['DEFAULT_SE']) # Target folder for these uploads. dir = '/' + os.path.join('system', scope, guid) target = site.datadir + dir try: os.makedirs(target) except: if is_ajax: return ajax_response( False, "Couldn't create upload directory: %s" % target) else: return "Couldn't create upload directory: %s" % target replfn = os.path.join(dir, lfn) destination = os.path.join(target, lfn) upload.save(destination) if os.path.isfile(destination): # Check file existence in catalog adler = adler32(destination) md5 = md5sum(destination) size = fsize(destination) file_id = ddm_checkifexists(lfn, size, adler, md5) if file_id: # If file exists file = files_.get(file_id) else: # Otherwise create new file = File() file.scope = scope file.guid = guid file.type = 'input' file.lfn = lfn file.token = '' file.status = 'defined' files_.save(file) setFileMeta(file.id, destination) replica = Replica() replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.original = file replicas_.save(replica) # Register file in container fc.reg_file_in_cont(file, container, 'input') else: return ajax_response(False, "Couldn't save file: %s" % target) if is_ajax: return ajax_response(True, cguid) else: return redirect(url_for("jobs.jobs"))
def registerLocalFile(arg, dirname, names, scope): """Register files from local dir to container :param arg: Container guid :param dirname: Abs dir :param names: File name :param scope: Scope to upload files in :return: """ site = sites_.first(se=client_config.DEFAULT_SE) _logger.debug(str(arg)) cont = conts_.first(guid=arg) files = cont.files for name in names: fpath = os.path.join(dirname, name) fobj = None # Check in container for file in files: if file.lfn == name: fobj = file # Check in catalog if not fobj: destination = os.path.join(dirname, name) adler = adler32(destination) md5 = md5sum(destination) size = fsize(destination) file_id = ddm_checkifexists(name, size, adler, md5) if file_id: # If file exists fobj = files_.get(file_id) if not fobj: fobj = File() fobj.scope = scope fobj.lfn = name fobj.guid = getGUID(fobj.scope, fobj.lfn) fobj.type = 'input' fobj.status = 'defined' files_.save(fobj) setFileMeta(fobj.id, fpath) # Register file in catalog fc.reg_file_in_cont(fobj, cont, "input") replicas = fobj.replicas replica = None for r in replicas: if r.se == site.se and r.status == 'ready': replica = r if not replica: ldir = '/' + os.path.join('system', fobj.scope, fobj.guid) ddm_localmakedirs(ldir) ddm_localcp(fpath[len(site.datadir):], ldir) replica = Replica() replica.se = site.se replica.status = 'ready' replica.token = '' replica.lfn = os.path.join(ldir, fobj.lfn) replica.original = fobj replicas_.save(replica)
def job(): """ New job form view :return: Response obj """ form = NewJobForm(request.form) if request.method == 'POST': site = sites_.get(int(form.site.data)) distr_name, distr_release = form.distr.data.split(':') distr = distrs_.first(name=distr_name, release=int(distr_release)) container_guid = form.container.data try: container = conts_.first(guid=container_guid) except(Exception): _logger.error(Exception.message) return make_response(jsonify({'error': 'Container not found'}), 404) if site.encode_commands: # By default frontend encodes with base64 job script parts separated by ";" # It requires script wrapper on cluster side jparams = form.params.data else: # Set site.encode_commands as False if you want to send command string without base64 encoding jparams = ';'.join([b64decode(command) for command in form.params.data.split(';')]) ifiles = request.form.getlist('ifiles[]') iguids = request.form.getlist('iguids[]') iconts = request.form.getlist('iconts[]') ofiles = ['{guid}.out.tgz'.format(guid=container.guid)] scope = getScope(g.user.username) # Process ftp files ftp_dir = form.ftpdir.data register_ftp_files(ftp_dir, scope, container.guid) # Process guid list for f in iguids: if f != '': file = files_.first(guid=f) if file is not None: # Register files in container fc.reg_file_in_cont(file, container, 'input') else: return make_response(jsonify({'error': "GUID {} not found".format(f)})) # Process containers for c in iconts: if c != '': try: form_cont = conts_.first(guid=c) except(Exception): _logger.error(Exception.message) return make_response(jsonify({'error': 'Container in form not found'}), 404) for f in form_cont.files: # Register file in catalog fc.reg_file_in_cont(f.file, container, 'input') # Processes urls for f in ifiles: if f != '': from_se, path, token = getUrlInfo(f) replfn = ':/'.join([from_se, path]) # Check if used before file_id = ddm_checkexternalifexists('', replfn) if file_id: file = files_.get(file_id) else: lfn = path.split('/')[-1] guid = getGUID(scope, lfn) file = File() file.scope = scope file.guid = guid file.type = 'input' file.lfn = lfn file.status = 'defined' files_.save(file) replica = Replica() replica.se = from_se replica.status = 'link' # Separate url & token replica.lfn = replfn replica.token = token replica.original = file replicas_.save(replica) # Register file in container fc.reg_file_in_cont(file, container, 'input') # Starts cloneReplica tasks ftasks = prepareInputFiles(container.id, site.se) # Saves output files meta for lfn in ofiles: file = File() file.scope = scope file.guid = getGUID(scope, lfn) file.type = 'output' file.lfn = lfn file.status = 'defined' files_.save(file) # Register file in container fc.reg_file_in_cont(file, container, 'output') # Counts files allfiles = container.files nifiles = 0 nofiles = 0 for f in allfiles: if f.type == 'input': nifiles += 1 if f.type == 'output': nofiles += 1 # Defines job meta job = Job() job.pandaid = None job.status = 'pending' job.owner = g.user job.params = jparams job.distr = distr job.container = container job.creation_time = datetime.utcnow() job.modification_time = datetime.utcnow() job.ninputfiles = nifiles job.noutputfiles = nofiles job.corecount = form.corecount.data job.tags = form.tags.data if form.tags.data != "" else None jobs_.save(job) # Async sendjob res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id)) return redirect(url_for('jobs.jobs')) form.distr.choices = [("%s:%s" % (distr.name, distr.release), "%s: %s" % (distr.name, distr.version)) for distr in distrs_.find().order_by('name').order_by('version')] form.site.choices = [(site.id, "{ce}".format(ce=site.ce)) for site in sites_.find(active=1).order_by('ce')] return render_template("dashboard/jobs/new.html", form=form)
def payload3(task): """ run1 - N parallel jobs. {N} = sequence 0..01,0..02,...,N, not less than 2 placeholders #TODO deal with {N}.fastq.bz2 ?? input: Makefile.{N}, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq output: likely reads{N}.tgz, maps{N}.tgz :param task: :return: """ logger.debug("payload3: Start") #### Prepare # Check type of task task_type = task.task_type # if task_type.id != 3or6?: # raise WebpandaError("Illegal task_type.id") # Get user user = users_.get(task.owner_id) n=10 if task.params is not None: n=int(task.params) if n==0: n=10 task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) for jobname in gen_sfx("a",n): # Get container container = Container() container.guid = task.tag + "."+jobname conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, jobname+'.reads.bz2', container, 'output') fc.reg_file_in_cont_byname(user, jobname + '.maps.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash "+swdir+"run11.sh -t " +jobname send_job_(task, container, script) return True
def file_save(container_guid, lfn): """ POST: /pilot/file/<container_guid>/<lfn>/save Saves file from request, returns file guid :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: guid :rtype: json """ site = sites_.first(se=current_app.config['DEFAULT_SE']) if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) if container.status != 'open': raise WebpandaError('Unable to upload: Container is not open') cc = container.files ff = None for c in cc: f = c.file if f.lfn == lfn: ff = f if not ff: ff = File() ff.scope = getScope(g.user.username) ff.lfn = lfn ff.guid = getGUID(ff.scope, ff.lfn) ff.status = 'defined' files_.save(ff) # Register file in container fc.reg_file_in_cont(ff, container, 'input') path = os.path.join(site.datadir, getScope(g.user.username), container.guid) replfn = '/' + os.path.join(getScope(g.user.username), container.guid, ff.lfn) destination = os.path.join(path, ff.lfn) for r in ff.replicas: if r.se == site.se: destination = site.datadir + r.lfn file_dir = '/'.join(destination.split('/')[:-1]) if r.status == 'ready': if os.path.isfile(destination): # Check fsize, md5 or adler raise WebpandaError('Replica exists') else: r.status = 'broken' replicas_.save(r) raise WebpandaError('Broken replica') elif r.status == 'defined': try: os.makedirs(file_dir) except (Exception): pass f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) r.status = 'ready' replicas_.save(r) return {'guid': ff.guid} else: raise WebpandaError('Replica status: %s' % r.status) replica = Replica() if os.path.isfile(destination): raise WebpandaError('Unable to upload: File exists') try: os.makedirs(path) except (Exception): _logger.debug('Path exists: %s' % path) f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) # Create/change replica replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.token = '' replica.original = ff replicas_.save(replica) return {'guid': ff.guid}