Пример #1
0
def test_run_pipeline(mock_run_pipeline, config_filename, jira=None, version=None):
    config = file_utils.load_json(config_filename)

    cleanup_fake_outputs(jira, config['storages'])

    # Create fake results instead of running pipeline
    mock_run_pipeline.side_effect = create_fake_results 

    arglist = [jira, version, "--update", "--is_test_run"]
    args = workflows.arguments.get_args(arglist=arglist)
    workflows.run.main(args)

    check_fake_outputs(mock_run_pipeline.call_args_list)
    cleanup_fake_outputs(jira, config['storages'])
Пример #2
0
def main(version,
         aligner,
         check=False,
         override_contamination=False,
         screen=False,
         ignore_status=False,
         skip=None):

    config_path = os.path.join(os.environ['HEADNODE_AUTOMATION_DIR'],
                               'workflows/config/normal_config.json')
    config = file_utils.load_json(config_path)

    log.info('version: {}, aligner: {}'.format(version, aligner))

    analyses_to_run = get_analyses_to_run(version, aligner, check=check)

    log.info("Analyses to run {}".format(analyses_to_run))

    for skip_analysis in skip:
        if not templates.JIRA_ID_RE.match(skip_analysis):
            raise Exception(
                'Invalid Jira ticket to be skipped: {}'.format(skip_analysis))

        log.info("Skipping analysis on {}".format(skip_analysis))
        for analysis_type, ticket_library in analyses_to_run.items():
            if skip_analysis in ticket_library:
                del analyses_to_run[analysis_type][skip_analysis]

    # If saltant is down, run analysis in screens
    if screen:
        log.info("Running analyses in screens")
        run_screens(analyses_to_run, version)
        return "Running in screens"

    for analysis_type, ticket_library in analyses_to_run.items():
        for ticket in ticket_library:
            library_id = ticket_library[ticket]
            if not check_running_analysis(ticket, analysis_type):
                log.info(f"Running {analysis_type} for {ticket}")
                saltant_utils.run_analysis(
                    analysis_type,
                    ticket,
                    version,
                    library_id,
                    aligner,
                    config,
                    override_contamination=override_contamination,
                )
Пример #3
0
def main(analysis_name,
         jira_id,
         version,
         args,
         config_filename=None,
         **run_options):

    if config_filename is None:
        config_filename = default_config

    if not templates.JIRA_ID_RE.match(jira_id):
        raise Exception(f'Invalid SC ID: {jira_id}')

    config = file_utils.load_json(config_filename)

    job_subdir = jira_id + run_options['tag']

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)
    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, analysis_name))

    start_automation(
        analysis_name,
        jira_id,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
    )
Пример #4
0
def run_all(
    version,
    jira=None,
    no_download=False,
    config_filename=None,
    data_dir=None,
    runs_dir=None,
    results_dir=None,
    **run_options,
):
    config = file_utils.load_json(default_config)

    # get latest analyses with status ready
    analyses_ready = tantalus_api.list(
        "analysis",
        analysis_type__name="tenx",
        status="ready",
        last_updated__gte=str(datetime.datetime.now() -
                              datetime.timedelta(days=7)),
    )
    # get latest analyses with status error
    analyses_error = tantalus_api.list(
        "analysis",
        analysis_type__name="tenx",
        status="error",
        last_updated__gte=str(datetime.datetime.now() -
                              datetime.timedelta(days=7)),
    )

    for analysis in chain(analyses_ready, analyses_error):
        jira_ticket = analysis["jira_ticket"]

        run(
            analysis["id"],
            config["version"],
            jira=jira_ticket,
            no_download=no_download,
            config_filename=config_filename,
            data_dir=data_dir,
            runs_dir=runs_dir,
            results_dir=results_dir,
            **run_options,
        )
def create_tickets_and_analyses(import_info):
    """
    Creates jira ticket and an align analysis on tantalus if new lanes were imported

    Args:
        import_info (dict): Contains keys dlp_library_id, gsc_library_id, lanes
    """
    # only create tickets and analyses when new lane is imported
    if any([lane["new"] for lane in import_info['lanes']]):
        # load config file
        config = load_json(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
                'workflows',
                'config',
                'normal_config.json',
            ))

        # create analysis jira ticket
        jira_ticket = create_jira_ticket_from_library(
            import_info["dlp_library_id"])

        # create align analysis objects
        create_qc_analyses_from_library(
            import_info["dlp_library_id"],
            jira_ticket,
            config["scp_version"],
            "align",
            aligner=config["default_aligner"],
        )

        # create analysis object on colossus
        create_colossus_analysis(
            import_info["dlp_library_id"],
            jira_ticket,
            config["scp_version"],
            config["default_aligner"],
        )
Пример #6
0
def create_tenx_analysis_from_library(jira, library):
    """Creates tenx analysis on Tantalus

    Args:
        jira (str): JIRA ID (e.g. SC-1234)
        library (str): Library Name

    Returns:
        Object: Tantalus Analysis 
    """
    # get config
    default_config = os.path.join(
        os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
        'config',
        'normal_config_tenx.json',
    )
    config = file_utils.load_json(default_config)

    # init args
    args = {}
    args['library_id'] = library
    library_info = colossus_api.get('tenxlibrary', name=library)
    args['ref_genome'] = get_ref_genome(library_info, is_tenx=True)
    args['version'] = config["version"]

    # get list of storages
    storages = config["storages"]

    # create analysis
    analysis = TenXAnalysis(
        jira,
        config["version"],
        args,
        storages=storages,
        update=True,
    )

    return analysis.analysis
Пример #7
0
def run_single(
    analysis_id,
    version,
    jira=None,
    no_download=False,
    config_filename=None,
    data_dir=None,
    runs_dir=None,
    results_dir=None,
    **run_options,
):
    config = file_utils.load_json(default_config)

    run(
        analysis_id,
        config["version"],
        jira=jira,
        no_download=no_download,
        config_filename=config_filename,
        data_dir=data_dir,
        runs_dir=runs_dir,
        results_dir=results_dir,
        **run_options,
    )
Пример #8
0
def main(
        analysis_id,
        config_filename=None,
        reset_status=False,
        **run_options
    ):

    if config_filename is None:
        config_filename = default_config

    analysis = workflows.analysis.base.Analysis.get_by_id(tantalus_api, analysis_id)

    if reset_status:
        analysis.set_error_status()

    if analysis.status == 'complete':
        raise Exception(f'analysis {analysis_id} already complete')

    if analysis.status == 'running':
        raise Exception(f'analysis {analysis_id} already running')

    jira_id = analysis.jira
    analysis_name = analysis.name

    if not templates.JIRA_ID_RE.match(jira_id):
        raise Exception(f'Invalid SC ID: {jira_id}')

    config = file_utils.load_json(config_filename)

    pipeline_dir = os.path.join(config['analysis_directory'], jira_id, analysis_name)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', jira_id)
    tmp_dir = os.path.join('singlecelltemp', 'temp', jira_id)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name))

    storages = config['storages']

    start = time.time()

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            analysis.get_input_datasets(),
            analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    if run_options['inputs_yaml'] is None:
        inputs_yaml = os.path.join(pipeline_dir, 'inputs.yaml')
        log_utils.sentinel(
            'Generating inputs yaml',
            analysis.generate_inputs_yaml,
            storages,
            inputs_yaml,
        )
    else:
        inputs_yaml = run_options['inputs_yaml']

    try:
        analysis.set_run_status()

        dirs = [
            pipeline_dir,
            config['docker_path'],
            config['docker_sock_path'],
        ]
        # Pass all server storages to docker
        for storage_name in storages.values():
            storage = tantalus_api.get('storage', name=storage_name)
            if storage['storage_type'] == 'server':
                dirs.append(storage['storage_directory'])

        if run_options['saltant']:
            context_config_file = config['context_config_file']['saltant']
        else:
            context_config_file = config['context_config_file']['sisyphus']

        log_utils.sentinel(
            f'Running single_cell {analysis_name}',
            analysis.run_pipeline,
            scpipeline_dir=scpipeline_dir,
            tmp_dir=tmp_dir,
            inputs_yaml=inputs_yaml,
            context_config_file=context_config_file,
            docker_env_file=config['docker_env_file'],
            docker_server=config['docker_server'],
            dirs=dirs,
            storages=storages,
            run_options=run_options,
        )

    except Exception:
        analysis.set_error_status()
        raise Exception("pipeline failed")

    output_dataset_ids = log_utils.sentinel(
        'Creating {} output datasets'.format(analysis_name),
        analysis.create_output_datasets,
        storages,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating {} output results'.format(analysis_name),
        analysis.create_output_results,
        storages,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
    )

    if storages["working_inputs"] != storages["remote_inputs"] and output_dataset_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))

    analysis.set_complete_status()

    comment_jira(jira_id, f'finished {analysis_name} analysis')
Пример #9
0
def main(jira,
         version,
         library_id,
         aligner,
         analysis_type,
         load_only=False,
         gsc_lanes=None,
         brc_flowcell_ids=None,
         config_filename=None,
         **run_options):

    if load_only:
        load_ticket(jira)
        return "complete"

    if config_filename is None:
        config_filename = default_config

    if not templates.JIRA_ID_RE.match(jira):
        raise Exception(f'Invalid SC ID: {jira}')

    aligner_map = {'A': 'BWA_ALN_0_5_7', 'M': 'BWA_MEM_0_7_6A'}

    aligner = aligner_map[aligner]

    # Get reference genome
    library_info = colossus_api.get("library", pool_id=library_id)
    reference_genome = colossus_utils.get_ref_genome(library_info)

    if gsc_lanes is not None:
        gsc_lanes = gsc_lanes.split(',')

    if brc_flowcell_ids is not None:
        brc_flowcell_ids = brc_flowcell_ids.split(',')

    config = file_utils.load_json(config_filename)

    job_subdir = jira + run_options['tag']

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)
    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, analysis_type))

    # Create analysis information object on Colossus
    analysis_info = workflow.models.AnalysisInfo(jira)

    log.info('Library ID: {}'.format(library_id))

    library_id = library_id
    if run_options["is_test_run"]:
        library_id += "TEST"

    args = {}
    args['aligner'] = aligner
    args['ref_genome'] = reference_genome
    args['library_id'] = library_id
    args['gsc_lanes'] = gsc_lanes
    args['brc_flowcell_ids'] = brc_flowcell_ids
    args['smoothing'] = run_options['smoothing']

    start_automation(
        jira,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
        analysis_info,
        analysis_type,
    )
Пример #10
0
from workflows.utils import saltant_utils, file_utils, tantalus_utils, colossus_utils
from workflows.utils.jira_utils import update_jira_dlp, add_attachment, comment_jira

log = logging.getLogger('sisyphus')
log.setLevel(logging.DEBUG)
stream_handler = logging.StreamHandler()
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

tantalus_api = TantalusApi()
colossus_api = ColossusApi()

# load config file
config = file_utils.load_json(
    os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        'config',
        'normal_config.json',
    ))


def attach_qc_report(jira, library_id, storages):
    """ 
    Adds qc report to library jira ticket

    Arguments:
        jira {str} -- id of jira ticket e.g SC-1234
        library_id {str} -- library name
        storages {dict} -- dictionary of storages names for results and inputs
    """

    storage_client = tantalus_api.get_storage_client(
Пример #11
0
def run(
    analysis_id,
    version,
    jira=None,
    no_download=False,
    config_filename=None,
    data_dir=None,
    runs_dir=None,
    results_dir=None,
    **run_options,
):
    run_options = run_options

    if config_filename is None:
        config_filename = default_config

    config = file_utils.load_json(config_filename)
    storages = config["storages"]

    analysis = tantalus_api.get("analysis", id=analysis_id)

    if analysis["status"] in ("running", "complete"):
        raise Exception(f'analysis {analysis_id} already {analysis["status"]}')

    jira_ticket = analysis["jira_ticket"]
    library_id = analysis["args"]["library_id"]

    # get colossus library
    library = colossus_api.get(
        "tenxlibrary",
        name=library_id,
    )

    log.info("Running {}".format(jira_ticket))
    job_subdir = jira_ticket + run_options['tag']

    # init pipeline dir
    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir,
    )

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, "tenx"))

    # SCNRA pipeline working directories
    if data_dir is None:
        data_dir = os.path.join("/datadrive", "data")
    if runs_dir is None:
        runs_dir = os.path.join("/datadrive", "runs", library_id)
    if results_dir is None:
        results_dir = os.path.join("/datadrive", "results", library_id)

    reference_dir = os.path.join("/datadrive", "reference")

    if run_options["testing"]:
        ref_genome = "test"

    elif run_options["ref_genome"]:
        ref_genome = run_options["ref_genome"]
        log.info("Default reference genome being overwritten; using {}".format(
            run_options["ref_genome"]))

    else:
        ref_genome = get_ref_genome(library, is_tenx=True)

    args = {}
    args['library_id'] = library_id
    args['ref_genome'] = ref_genome
    args['version'] = version

    analysis_info = TenXAnalysisInfo(
        jira_ticket,
        config['version'],
        run_options,
        library["id"],
    )

    if not no_download:
        download_data(storages["working_inputs"], data_dir, library_id)

    start_automation(
        jira_ticket,
        config['version'],
        args,
        run_options,
        analysis_info,
        data_dir,
        runs_dir,
        reference_dir,
        results_dir,
        storages,
        library["id"],
        analysis_id,
    )
Пример #12
0
def run_new(
    library_id,
    version,
    jira=None,
    no_download=False,
    config_filename=None,
    data_dir=None,
    runs_dir=None,
    results_dir=None,
    **run_options,
):
    # get tenx library info
    library = colossus_api.get(
        "tenxlibrary",
        name=library_id,
    )
    sample = library["sample"]["sample_id"]
    library_ticket = library["jira_ticket"]

    # create jira ticket
    jira_ticket = create_analysis_jira_ticket(library_id, sample,
                                              library_ticket)

    # create colossus analysis
    colossus_analysis, _ = colossus_api.create(
        "tenxanalysis",
        fields={
            "version": "vm",
            "jira_ticket": jira_ticket,
            "run_status": "idle",
            "tenx_library": library["id"],
            "submission_date": str(datetime.date.today()),
            "tenxsequencing_set": [],
        },
        keys=["jira_ticket"],
    )

    # create tantalus analysis
    analysis = create_tenx_analysis_from_library(jira_ticket, library["name"])

    # check if analysis with same inputs has already been ran under different ticket
    if analysis["jira_ticket"] != jira_ticket:
        log.info(
            f"Analysis with same input datasets has already been ran under {analysis['jira_ticket']}"
        )
        # remove jira ticket
        delete_ticket(jira_ticket)
        # remove colossus analysis
        colossus_api.delete("tenxanalysis", colossus_analysis["id"])
    else:
        # load config
        config = file_utils.load_json(default_config)

        run(
            analysis["id"],
            config["version"],
            jira=jira_ticket,
            no_download=no_download,
            config_filename=config_filename,
            data_dir=data_dir,
            runs_dir=runs_dir,
            results_dir=results_dir,
            **run_options,
        )
Пример #13
0
def run_pipeline(
        version,
        run_options,
        micpipeline_dir,
        tmp_dir,
        inputs_yaml,
        context_config_file,
        docker_env_file,
        docker_server,
        output_dirs,
        cli_args=(),
        max_jobs='400',
        dirs=(),
):
    config_override = run_options.get('config_override')
    default_config = os.path.realpath(
        os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir,
                     os.pardir, 'config', 'normal_config.json'))
    config = file_utils.load_json(default_config)

    run_cmd = [
        f'microscope_image_converter',
        '--input_yaml',
        inputs_yaml,
        '--tmpdir',
        tmp_dir,
        '--context_config',
        context_config_file,
        '--pipelinedir',
        micpipeline_dir,
        '--submit_config',
        config['submit_config'],
        '--sentinel_only',
        '--maxjobs',
        str(max_jobs),
    ]

    run_cmd.extend(cli_args)

    if config_override is not None:
        run_cmd += [
            '--config_override',
            f'\'{config_override}\'',
        ]

    for option_name, output_dir in output_dirs.items():
        run_cmd += [
            f'--{option_name}',
            output_dir,
        ]

    if run_options['saltant']:
        run_cmd += ['--loglevel', 'ERROR']
    else:
        run_cmd += ['--loglevel', 'DEBUG']

    if run_options['local_run']:
        run_cmd += ["--submit", "local"]
    else:
        run_cmd += [
            '--submit',
            'azurebatch',
            '--storage',
            'azureblob',
        ]

    # Append docker command to the beginning
    docker_cmd = [
        'docker',
        'run',
        '-w',
        '$PWD',
        '-v',
        '$PWD:$PWD',
        '-v',
        '$HOME:$HOME',
        '-v',
        '/var/run/docker.sock',
        '-v',
        '/usr/bin/docker',
        '--rm',
        '--env-file',
        docker_env_file,
    ]

    for d in dirs:
        docker_cmd.extend([
            '-v',
            '{d}:{d}'.format(d=d),
        ])

    docker_cmd.extend(
        ['-v', '{d}:{d}'.format(d=os.path.dirname(context_config_file))])

    docker_cmd.append(f'{docker_server}:{version}')
    run_cmd = docker_cmd + run_cmd

    if run_options['sc_config'] is not None:
        run_cmd += ['--config_file', run_options['sc_config']]
    if run_options['interactive']:
        run_cmd += ['--interactive']

    run_cmd_string = r' '.join(run_cmd)
    log.debug(run_cmd_string)

    if run_options.get("skip_pipeline"):
        log.info('skipping pipeline on request')
    else:
        subprocess.check_call(run_cmd_string, shell=True)
Пример #14
0
def run_pseudobulk(jira_ticket,
                   version,
                   inputs_tag_name,
                   matched_normal_sample,
                   matched_normal_library,
                   config_filename=None,
                   **run_options):
    if config_filename is None:
        config_filename = default_config

    config = file_utils.load_json(config_filename)

    args = dict(
        inputs_tag_name=inputs_tag_name,
        matched_normal_sample=matched_normal_sample,
        matched_normal_library=matched_normal_library,
    )

    job_subdir = jira_ticket

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    results_dir = os.path.join('singlecellresults', 'results', job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)

    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    storage_result_prefix = tantalus_api.get_storage_client(
        "singlecellresults").prefix
    destruct_output = os.path.join(storage_result_prefix, jira_ticket,
                                   "results", "destruct")
    lumpy_output = os.path.join(storage_result_prefix, jira_ticket, "results",
                                "lumpy")
    haps_output = os.path.join(storage_result_prefix, jira_ticket, "results",
                               "haps")
    variants_output = os.path.join(storage_result_prefix, jira_ticket,
                                   "results", "variants")

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'], pipeline_dir)

    start_automation(
        jira_ticket,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        results_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
        destruct_output,
        lumpy_output,
        haps_output,
        variants_output,
    )
Пример #15
0
def main(aligner):
    """
    Gets all qc (align, hmmcopy, annotation) analyses set to ready 
    and checks if requirements have been satisfied before triggering
    run on saltant.

    Kwargs:
        aligner (str): name of aligner 
    """

    # load config file
    config = file_utils.load_json(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'config',
            'normal_config.json',
        ))

    # map of type of analyses required before particular analysis can run
    # note: keep this order to avoid checking requirements more than once
    required_analyses_map = {
        'annotation': [
            'hmmcopy',
            'align',
        ],
        'hmmcopy': ['align'],
        'align': [],
    }

    # get colossus analysis information objects with status not complete
    analyses = colossus_api.list(
        "analysis_information",
        analysis_run__run_status_ne="complete",
        aligner=aligner if aligner else config["default_aligner"],
    )

    for analysis in analyses:
        # get library id
        library_id = analysis["library"]["pool_id"]
        log.info(f"{library_id}")

        # skip analysis if marked as complete
        status = analysis["analysis_run"]["run_status"]

        # skip analyses older than this year
        # parse off ending time range
        last_updated_date = parser.parse(
            analysis["analysis_run"]["last_updated"][:-6])
        if last_updated_date < datetime(2020, 1, 1):
            continue

        jira_ticket = analysis["analysis_jira_ticket"]
        log.info(f"checking ticket {jira_ticket} library {library_id}")
        for analysis_type in required_analyses_map:
            log.info(f"checking requirements for {analysis_type}")
            # check if analysis exists on tantalus
            try:
                tantalus_analysis = tantalus_api.get(
                    'analysis',
                    jira_ticket=jira_ticket,
                    analysis_type__name=analysis_type,
                )
            except:
                tantalus_analysis = None

            if tantalus_analysis is not None:
                # check if running or complete
                status = tantalus_analysis["status"]
                if status in ('running', 'complete'):
                    log.info(
                        f"skipping {analysis_type} for {jira_ticket} since status is {status}"
                    )

                    # update run status on colossus
                    if analysis_type == "annotation" and status == "complete":
                        analysis_run_id = analysis["analysis_run"]["id"]
                        analysis_run = colossus_api.get("analysis_run",
                                                        id=analysis_run_id)
                        colossus_api.update("analysis_run",
                                            id=analysis_run_id,
                                            run_status="complete")

                    continue

                log.info(
                    f"running {analysis_type} in library {library_id} with ticket {jira_ticket}"
                )
                # otherwise run analysis
                saltant_utils.run_analysis(
                    tantalus_analysis['id'],
                    analysis_type,
                    jira_ticket,
                    config["scp_version"],
                    library_id,
                    aligner if aligner else config["default_aligner"],
                    config,
                )
            else:
                # set boolean determining trigger of run
                is_ready_to_create = True
                # check if required completed analyses exist
                for required_analysis_type in required_analyses_map[
                        analysis_type]:
                    try:
                        required_analysis = tantalus_api.get(
                            'analysis',
                            jira_ticket=jira_ticket,
                            analysis_type__name=required_analysis_type,
                            status="complete",
                        )
                    except:
                        log.error(
                            f"a completed {required_analysis_type} analysis is required to run before {analysis_type} runs for {jira_ticket}"
                        )
                        # set boolean as false since analysis cannot be created yet
                        is_ready_to_create = False
                        break

                # create analysis and trigger on saltant if analysis creation has met requirements
                if is_ready_to_create:
                    log.info(
                        f"creating {analysis_type} analysis for ticket {jira_ticket}"
                    )

                    try:
                        tantalus_utils.create_qc_analyses_from_library(
                            library_id,
                            jira_ticket,
                            config["scp_version"],
                            analysis_type,
                        )
                    except Exception as e:
                        log.error(
                            f"failed to create {analysis_type} analysis for ticket {jira_ticket}"
                        )
                        continue
                    tantalus_analysis = tantalus_api.get(
                        'analysis',
                        jira_ticket=jira_ticket,
                        analysis_type__name=analysis_type,
                    )

                    log.info(
                        f"running {analysis_type} in library {library_id} with ticket {jira_ticket}"
                    )
                    saltant_utils.run_analysis(
                        tantalus_analysis['id'],
                        analysis_type,
                        jira_ticket,
                        config["scp_version"],
                        library_id,
                        aligner if aligner else config["default_aligner"],
                        config,
                    )

    # get completed analyses that need montage loading
    analyses = colossus_api.list(
        "analysis_information",
        montage_status="Pending",
        analysis_run__run_status="complete",
    )

    for analysis in analyses:
        # get library id
        library_id = analysis["library"]["pool_id"]

        # skip analyses older than this year
        # parse off ending time range
        last_updated_date = parser.parse(
            analysis["analysis_run"]["last_updated"][:-6])
        if last_updated_date < datetime(2020, 1, 1):
            continue

        jira_ticket = analysis["analysis_jira_ticket"]
        update_jira_dlp(jira_ticket, "M")
        # upload qc report to jira ticket
        attach_qc_report(jira_ticket, library_id, config["storages"])

        # load analysis into montage
        load_ticket(jira_ticket)
Пример #16
0
def main(version, library_id, config_filename=None, **run_options):

    if config_filename is None:
        config_filename = default_config

    log.info(config_filename)
    config = file_utils.load_json(config_filename)

    storages = config["storages"]

    library = colossus_api.get("tenxlibrary", name=library_id)
    sample = library["sample"]["sample_id"]
    library_ticket = library["jira_ticket"]

    # TODO: Move this to tenx automated scripts
    if len(library["analysis_set"]) == 0:
        jira = create_analysis_jira_ticket(library_id, sample, library_ticket)

    else:
        analysis_id = library["analysis_set"][0]
        analysis_object = colossus_api.get("analysis", id=analysis_id)
        jira = analysis_object["jira_ticket"]

    log.info("Running {}".format(jira))
    job_subdir = jira + run_options['tag']

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, "tenx"))

    # SCNRA pipeline working directories
    data_dir = os.path.join("/datadrive", "data", library_id)
    runs_dir = os.path.join("/datadrive", "runs", library_id)
    reference_dir = os.path.join("/datadrive", "reference")
    results_dir = os.path.join("/datadrive", "results", library_id)

    analysis_info = TenXAnalysisInfo(
        jira,
        version,
        library_id,
    )

    if run_options["testing"]:
        ref_genome = "test"

    else:
        ref_genome = get_ref_genome(library, is_tenx=True)

    args = {}
    args['library_id'] = library_id
    args['ref_genome'] = ref_genome
    args['version'] = version

    start_automation(jira, version, args, run_options, analysis_info, data_dir,
                     runs_dir, reference_dir, results_dir, storages)