Пример #1
0
        def get(self, run_id):
            """
            Return the dag of the given run
            """

            pipeline = db.pipelines.find_one({'run_id': run_id}, {'config': 1, 'file_registry':1})
            file_registry = pipeline.get('file_registry', [])
            if file_registry:
                file_registry = json.loads(file_registry)

            conf_str = json.loads(pipeline['config'])
            config = Pipeline.load_cfg(conf_str);
            result_steps = config.get('config', {}).get('pipeline', {}).get('results', [])
            delete_steps = config.get('config', {}).get('pipeline', {}).get('delete', [])
            delete_steps.append('finalize')
            delete_steps.append('inputs')

            steps = list(db.steps.find(
                {"run_id":run_id, "name": {"$nin": delete_steps}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}},
                {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1}))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = []
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs', key_filter={'type':'file'}):
                                for i, filename in enumerate(job['outputs'][key]):
                                    output = { 'path': filename }

                                    if not isinstance(filename, list):
                                        output['archived'] = (filename in file_registry)
                                    else:
                                        output['archived'] = False
                                    output_files.append(output)

                    if output_files:
                        outputs[step['name']] = defaultdict(list)
                        outputs[step['name']]['archive'] = step['name'] in result_steps

                        outputs[step['name']]['dir'] = step.get('outputs', {}).get('output_dir')
                        outputs[step['name']]['files'] = copy.deepcopy(output_files)


            return outputs
Пример #2
0
        def put(self):
            """
            Queue the specific pipeline
            """
            data   = request.get_json(force=True)
            config = data.get('config')
            user   = auth_get_username(request.authorization, data.get('user'))

            errors = None # Pipeline.validate_config(config, user)
            if not errors:
                config = Pipeline.load_cfg(config)
                # Get id from DB
                db_info = dbmodel.PipelineDb(config['name'], config, Pipeline.ordered_steps(config), user)
                config['run_id'] = db_info.run_id

                ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (config['label'], config['run_id'], user))
                return pm.add_pipeline(config, user)
            else:
                return errors, 400
Пример #3
0
        def put(self):
            """
            Queue the specific pipeline
            """
            data = request.get_json(force=True)
            config = data.get('config')
            user = auth_get_username(request.authorization, data.get('user'))

            errors = None  # Pipeline.validate_config(config, user)
            if not errors:
                config = Pipeline.load_cfg(config)
                # Get id from DB
                db_info = dbmodel.PipelineDb(config['name'], config,
                                             Pipeline.ordered_steps(config),
                                             user)
                config['run_id'] = db_info.run_id

                ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" %
                                (config['label'], config['run_id'], user))
                return pm.add_pipeline(config, user)
            else:
                return errors, 400
Пример #4
0
        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta   = data.get('meta')
            selection = data.get('selection')
            user      = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname', 'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {'pipeline': {
                            'Project': '%s (%s)' %(npdi, study_nickname)
                        }}, 400

            run = db.pipelines.find_one({'run_id': run_id}, {'meta':1, 'run_id':1})

            steps_names = selection.keys()
            steps = list(db.steps.find(
                {"run_id":run_id, "name": {'$in': steps_names}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}},
                {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1}))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs', key_filter={'type':'file'}):
                                for i, filename in enumerate(job['outputs'][key]):
                                    filemeta = {'step': step['name'], 'job_id': job_id}
                                    ext = os.path.splitext(filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]                                       
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta['File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files


            input_files = []
            meta_data   = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files' : input_files,
                'meta_data'   : meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id' : run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta']['project_name']
            cfg['config']['pipeline']['description'] = 'Archive data for run %s' %run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg, Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)
Пример #5
0
        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta = data.get('meta')
            selection = data.get('selection')
            user = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname',
                                         'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {
                    'pipeline': {
                        'Project': '%s (%s)' % (npdi, study_nickname)
                    }
                }, 400

            run = db.pipelines.find_one({'run_id': run_id}, {
                'meta': 1,
                'run_id': 1
            })

            steps_names = selection.keys()
            steps = list(
                db.steps.find(
                    {
                        "run_id": run_id,
                        "name": {
                            '$in': steps_names
                        },
                        "jobs": {
                            "$elemMatch": {
                                "outputs": {
                                    "$exists": True
                                }
                            }
                        }
                    }, {
                        "name": 1,
                        "jobs": 1,
                        "outputs.output_dir": 1,
                        "step_config": 1
                    }))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs',
                                             key_filter={'type': 'file'}):
                                for i, filename in enumerate(
                                        job['outputs'][key]):
                                    filemeta = {
                                        'step': step['name'],
                                        'job_id': job_id
                                    }
                                    ext = os.path.splitext(
                                        filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta[
                                        'File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files

            input_files = []
            meta_data = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files': input_files,
                'meta_data': meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id': run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta'][
                'project_name']
            cfg['config']['pipeline'][
                'description'] = 'Archive data for run %s' % run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg,
                                         Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" %
                            (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)
Пример #6
0
        def get(self, run_id):
            """
            Return the dag of the given run
            """

            pipeline = db.pipelines.find_one({'run_id': run_id}, {
                'config': 1,
                'file_registry': 1
            })
            file_registry = pipeline.get('file_registry', [])
            if file_registry:
                file_registry = json.loads(file_registry)

            conf_str = json.loads(pipeline['config'])
            config = Pipeline.load_cfg(conf_str)
            result_steps = config.get('config', {}).get('pipeline',
                                                        {}).get('results', [])
            delete_steps = config.get('config', {}).get('pipeline',
                                                        {}).get('delete', [])
            delete_steps.append('finalize')
            delete_steps.append('inputs')

            steps = list(
                db.steps.find(
                    {
                        "run_id": run_id,
                        "name": {
                            "$nin": delete_steps
                        },
                        "jobs": {
                            "$elemMatch": {
                                "outputs": {
                                    "$exists": True
                                }
                            }
                        }
                    }, {
                        "name": 1,
                        "jobs": 1,
                        "outputs.output_dir": 1,
                        "step_config": 1
                    }))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = []
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs',
                                             key_filter={'type': 'file'}):
                                for i, filename in enumerate(
                                        job['outputs'][key]):
                                    output = {'path': filename}

                                    if not isinstance(filename, list):
                                        output['archived'] = (filename
                                                              in file_registry)
                                    else:
                                        output['archived'] = False
                                    output_files.append(output)

                    if output_files:
                        outputs[step['name']] = defaultdict(list)
                        outputs[step['name']]['archive'] = step[
                            'name'] in result_steps

                        outputs[step['name']]['dir'] = step.get(
                            'outputs', {}).get('output_dir')
                        outputs[step['name']]['files'] = copy.deepcopy(
                            output_files)

            return outputs