def get(self, run_id): """ Return the dag of the given run """ pipeline = db.pipelines.find_one({'run_id': run_id}, {'config': 1, 'file_registry':1}) file_registry = pipeline.get('file_registry', []) if file_registry: file_registry = json.loads(file_registry) conf_str = json.loads(pipeline['config']) config = Pipeline.load_cfg(conf_str); result_steps = config.get('config', {}).get('pipeline', {}).get('results', []) delete_steps = config.get('config', {}).get('pipeline', {}).get('delete', []) delete_steps.append('finalize') delete_steps.append('inputs') steps = list(db.steps.find( {"run_id":run_id, "name": {"$nin": delete_steps}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}}, {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1})) outputs = {} for step in steps: if step.get('step_config', {}): s = Step.load_step(step['step_config']) output_files = [] for job_id, job in enumerate(step['jobs']): for key in job['outputs']: if key in s.keys(key_groups='outputs', key_filter={'type':'file'}): for i, filename in enumerate(job['outputs'][key]): output = { 'path': filename } if not isinstance(filename, list): output['archived'] = (filename in file_registry) else: output['archived'] = False output_files.append(output) if output_files: outputs[step['name']] = defaultdict(list) outputs[step['name']]['archive'] = step['name'] in result_steps outputs[step['name']]['dir'] = step.get('outputs', {}).get('output_dir') outputs[step['name']]['files'] = copy.deepcopy(output_files) return outputs
def put(self): """ Queue the specific pipeline """ data = request.get_json(force=True) config = data.get('config') user = auth_get_username(request.authorization, data.get('user')) errors = None # Pipeline.validate_config(config, user) if not errors: config = Pipeline.load_cfg(config) # Get id from DB db_info = dbmodel.PipelineDb(config['name'], config, Pipeline.ordered_steps(config), user) config['run_id'] = db_info.run_id ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (config['label'], config['run_id'], user)) return pm.add_pipeline(config, user) else: return errors, 400
def post(self, run_id): """ Pushes files into iRODS """ data = request.get_json(force=True) runmeta = data.get('meta') selection = data.get('selection') user = auth_get_username(request.authorization, data.get('user')) npdis = dbmodel.get_npdi_projects() npdi = runmeta.get('Project NPDI ID', '') study_nickname = runmeta.get('Study nickname', 'Required field missing') if (npdi + study_nickname) not in npdis: return {'pipeline': { 'Project': '%s (%s)' %(npdi, study_nickname) }}, 400 run = db.pipelines.find_one({'run_id': run_id}, {'meta':1, 'run_id':1}) steps_names = selection.keys() steps = list(db.steps.find( {"run_id":run_id, "name": {'$in': steps_names}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}}, {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1})) outputs = {} for step in steps: if step.get('step_config', {}): s = Step.load_step(step['step_config']) output_files = {} for job_id, job in enumerate(step['jobs']): for key in job['outputs']: if key in s.keys(key_groups='outputs', key_filter={'type':'file'}): for i, filename in enumerate(job['outputs'][key]): filemeta = {'step': step['name'], 'job_id': job_id} ext = os.path.splitext(filename)[1][1:].upper() for key in job.get('meta', {}): meta = job['meta'][key] if key == 'sample_id': okey = 'Operational sample accession' else: okey = key if isinstance(meta, list): filemeta[okey] = meta[i] else: filemeta[okey] = meta filemeta['File type'] = 'Processed data file' filemeta['File format'] = ext output_files[filename] = filemeta if output_files: outputs[step['name']] = output_files input_files = [] meta_data = [] for step_name, step_selection in selection.iteritems(): for filepath in step_selection: input_files.append(filepath) filemeta = outputs[step_name][filepath] filemeta.update(runmeta) meta_data.append(filemeta) cfg = Pipeline.load_cfg(pipeline_specs['irods_lz']) cfg['config']['steps']['irods_mvtolz'] = { 'input_files' : input_files, 'meta_data' : meta_data } cfg['config']['steps']['irods_monitorlz'] = { 'prun_id' : run['run_id'] } cfg['config']['pipeline']['project_name'] = run['meta']['project_name'] cfg['config']['pipeline']['description'] = 'Archive data for run %s' %run['run_id'] cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods' # Get id from DB db_info = dbmodel.PipelineDb(cfg['name'], cfg, Pipeline.ordered_steps(cfg), user) cfg['run_id'] = db_info.run_id ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (cfg['label'], cfg['run_id'], user)) return pm.add_pipeline(cfg, user)
def post(self, run_id): """ Pushes files into iRODS """ data = request.get_json(force=True) runmeta = data.get('meta') selection = data.get('selection') user = auth_get_username(request.authorization, data.get('user')) npdis = dbmodel.get_npdi_projects() npdi = runmeta.get('Project NPDI ID', '') study_nickname = runmeta.get('Study nickname', 'Required field missing') if (npdi + study_nickname) not in npdis: return { 'pipeline': { 'Project': '%s (%s)' % (npdi, study_nickname) } }, 400 run = db.pipelines.find_one({'run_id': run_id}, { 'meta': 1, 'run_id': 1 }) steps_names = selection.keys() steps = list( db.steps.find( { "run_id": run_id, "name": { '$in': steps_names }, "jobs": { "$elemMatch": { "outputs": { "$exists": True } } } }, { "name": 1, "jobs": 1, "outputs.output_dir": 1, "step_config": 1 })) outputs = {} for step in steps: if step.get('step_config', {}): s = Step.load_step(step['step_config']) output_files = {} for job_id, job in enumerate(step['jobs']): for key in job['outputs']: if key in s.keys(key_groups='outputs', key_filter={'type': 'file'}): for i, filename in enumerate( job['outputs'][key]): filemeta = { 'step': step['name'], 'job_id': job_id } ext = os.path.splitext( filename)[1][1:].upper() for key in job.get('meta', {}): meta = job['meta'][key] if key == 'sample_id': okey = 'Operational sample accession' else: okey = key if isinstance(meta, list): filemeta[okey] = meta[i] else: filemeta[okey] = meta filemeta[ 'File type'] = 'Processed data file' filemeta['File format'] = ext output_files[filename] = filemeta if output_files: outputs[step['name']] = output_files input_files = [] meta_data = [] for step_name, step_selection in selection.iteritems(): for filepath in step_selection: input_files.append(filepath) filemeta = outputs[step_name][filepath] filemeta.update(runmeta) meta_data.append(filemeta) cfg = Pipeline.load_cfg(pipeline_specs['irods_lz']) cfg['config']['steps']['irods_mvtolz'] = { 'input_files': input_files, 'meta_data': meta_data } cfg['config']['steps']['irods_monitorlz'] = { 'prun_id': run['run_id'] } cfg['config']['pipeline']['project_name'] = run['meta'][ 'project_name'] cfg['config']['pipeline'][ 'description'] = 'Archive data for run %s' % run['run_id'] cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods' # Get id from DB db_info = dbmodel.PipelineDb(cfg['name'], cfg, Pipeline.ordered_steps(cfg), user) cfg['run_id'] = db_info.run_id ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (cfg['label'], cfg['run_id'], user)) return pm.add_pipeline(cfg, user)
def get(self, run_id): """ Return the dag of the given run """ pipeline = db.pipelines.find_one({'run_id': run_id}, { 'config': 1, 'file_registry': 1 }) file_registry = pipeline.get('file_registry', []) if file_registry: file_registry = json.loads(file_registry) conf_str = json.loads(pipeline['config']) config = Pipeline.load_cfg(conf_str) result_steps = config.get('config', {}).get('pipeline', {}).get('results', []) delete_steps = config.get('config', {}).get('pipeline', {}).get('delete', []) delete_steps.append('finalize') delete_steps.append('inputs') steps = list( db.steps.find( { "run_id": run_id, "name": { "$nin": delete_steps }, "jobs": { "$elemMatch": { "outputs": { "$exists": True } } } }, { "name": 1, "jobs": 1, "outputs.output_dir": 1, "step_config": 1 })) outputs = {} for step in steps: if step.get('step_config', {}): s = Step.load_step(step['step_config']) output_files = [] for job_id, job in enumerate(step['jobs']): for key in job['outputs']: if key in s.keys(key_groups='outputs', key_filter={'type': 'file'}): for i, filename in enumerate( job['outputs'][key]): output = {'path': filename} if not isinstance(filename, list): output['archived'] = (filename in file_registry) else: output['archived'] = False output_files.append(output) if output_files: outputs[step['name']] = defaultdict(list) outputs[step['name']]['archive'] = step[ 'name'] in result_steps outputs[step['name']]['dir'] = step.get( 'outputs', {}).get('output_dir') outputs[step['name']]['files'] = copy.deepcopy( output_files) return outputs