def main(analysis_name, jira_id, version, args, config_filename=None, **run_options): if config_filename is None: config_filename = default_config if not templates.JIRA_ID_RE.match(jira_id): raise Exception(f'Invalid SC ID: {jira_id}') config = file_utils.load_json(config_filename) job_subdir = jira_id + run_options['tag'] run_options['job_subdir'] = job_subdir pipeline_dir = os.path.join( tantalus_api.get( "storage", name=config["storages"]["local_results"])["storage_directory"], job_subdir) scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir) tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name)) start_automation( analysis_name, jira_id, version, args, run_options, config, pipeline_dir, scpipeline_dir, tmp_dir, config['storages'], job_subdir, )
def main( analysis_id, config_filename=None, reset_status=False, **run_options ): if config_filename is None: config_filename = default_config analysis = workflows.analysis.base.Analysis.get_by_id(tantalus_api, analysis_id) if reset_status: analysis.set_error_status() if analysis.status == 'complete': raise Exception(f'analysis {analysis_id} already complete') if analysis.status == 'running': raise Exception(f'analysis {analysis_id} already running') jira_id = analysis.jira analysis_name = analysis.name if not templates.JIRA_ID_RE.match(jira_id): raise Exception(f'Invalid SC ID: {jira_id}') config = file_utils.load_json(config_filename) pipeline_dir = os.path.join(config['analysis_directory'], jira_id, analysis_name) scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', jira_id) tmp_dir = os.path.join('singlecelltemp', 'temp', jira_id) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name)) storages = config['storages'] start = time.time() if storages["working_inputs"] != storages["remote_inputs"]: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format(storages["remote_inputs"], storages["working_inputs"]), transfer_inputs, analysis.get_input_datasets(), analysis.get_input_results(), storages["remote_inputs"], storages["working_inputs"], ) if run_options['inputs_yaml'] is None: inputs_yaml = os.path.join(pipeline_dir, 'inputs.yaml') log_utils.sentinel( 'Generating inputs yaml', analysis.generate_inputs_yaml, storages, inputs_yaml, ) else: inputs_yaml = run_options['inputs_yaml'] try: analysis.set_run_status() dirs = [ pipeline_dir, config['docker_path'], config['docker_sock_path'], ] # Pass all server storages to docker for storage_name in storages.values(): storage = tantalus_api.get('storage', name=storage_name) if storage['storage_type'] == 'server': dirs.append(storage['storage_directory']) if run_options['saltant']: context_config_file = config['context_config_file']['saltant'] else: context_config_file = config['context_config_file']['sisyphus'] log_utils.sentinel( f'Running single_cell {analysis_name}', analysis.run_pipeline, scpipeline_dir=scpipeline_dir, tmp_dir=tmp_dir, inputs_yaml=inputs_yaml, context_config_file=context_config_file, docker_env_file=config['docker_env_file'], docker_server=config['docker_server'], dirs=dirs, storages=storages, run_options=run_options, ) except Exception: analysis.set_error_status() raise Exception("pipeline failed") output_dataset_ids = log_utils.sentinel( 'Creating {} output datasets'.format(analysis_name), analysis.create_output_datasets, storages, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating {} output results'.format(analysis_name), analysis.create_output_results, storages, update=run_options['update'], skip_missing=run_options['skip_missing'], ) if storages["working_inputs"] != storages["remote_inputs"] and output_dataset_ids != []: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format(storages["working_inputs"], storages["remote_inputs"]), transfer_inputs, output_dataset_ids, output_results_ids, storages["remote_inputs"], storages["working_inputs"], ) log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60)) analysis.set_complete_status() comment_jira(jira_id, f'finished {analysis_name} analysis')
def main(jira, version, library_id, aligner, analysis_type, load_only=False, gsc_lanes=None, brc_flowcell_ids=None, config_filename=None, **run_options): if load_only: load_ticket(jira) return "complete" if config_filename is None: config_filename = default_config if not templates.JIRA_ID_RE.match(jira): raise Exception(f'Invalid SC ID: {jira}') aligner_map = {'A': 'BWA_ALN_0_5_7', 'M': 'BWA_MEM_0_7_6A'} aligner = aligner_map[aligner] # Get reference genome library_info = colossus_api.get("library", pool_id=library_id) reference_genome = colossus_utils.get_ref_genome(library_info) if gsc_lanes is not None: gsc_lanes = gsc_lanes.split(',') if brc_flowcell_ids is not None: brc_flowcell_ids = brc_flowcell_ids.split(',') config = file_utils.load_json(config_filename) job_subdir = jira + run_options['tag'] run_options['job_subdir'] = job_subdir pipeline_dir = os.path.join( tantalus_api.get( "storage", name=config["storages"]["local_results"])["storage_directory"], job_subdir) scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir) tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_type)) # Create analysis information object on Colossus analysis_info = workflow.models.AnalysisInfo(jira) log.info('Library ID: {}'.format(library_id)) library_id = library_id if run_options["is_test_run"]: library_id += "TEST" args = {} args['aligner'] = aligner args['ref_genome'] = reference_genome args['library_id'] = library_id args['gsc_lanes'] = gsc_lanes args['brc_flowcell_ids'] = brc_flowcell_ids args['smoothing'] = run_options['smoothing'] start_automation( jira, version, args, run_options, config, pipeline_dir, scpipeline_dir, tmp_dir, config['storages'], job_subdir, analysis_info, analysis_type, )
def run( analysis_id, version, jira=None, no_download=False, config_filename=None, data_dir=None, runs_dir=None, results_dir=None, **run_options, ): run_options = run_options if config_filename is None: config_filename = default_config config = file_utils.load_json(config_filename) storages = config["storages"] analysis = tantalus_api.get("analysis", id=analysis_id) if analysis["status"] in ("running", "complete"): raise Exception(f'analysis {analysis_id} already {analysis["status"]}') jira_ticket = analysis["jira_ticket"] library_id = analysis["args"]["library_id"] # get colossus library library = colossus_api.get( "tenxlibrary", name=library_id, ) log.info("Running {}".format(jira_ticket)) job_subdir = jira_ticket + run_options['tag'] # init pipeline dir pipeline_dir = os.path.join( tantalus_api.get( "storage", name=config["storages"]["local_results"])["storage_directory"], job_subdir, ) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, "tenx")) # SCNRA pipeline working directories if data_dir is None: data_dir = os.path.join("/datadrive", "data") if runs_dir is None: runs_dir = os.path.join("/datadrive", "runs", library_id) if results_dir is None: results_dir = os.path.join("/datadrive", "results", library_id) reference_dir = os.path.join("/datadrive", "reference") if run_options["testing"]: ref_genome = "test" elif run_options["ref_genome"]: ref_genome = run_options["ref_genome"] log.info("Default reference genome being overwritten; using {}".format( run_options["ref_genome"])) else: ref_genome = get_ref_genome(library, is_tenx=True) args = {} args['library_id'] = library_id args['ref_genome'] = ref_genome args['version'] = version analysis_info = TenXAnalysisInfo( jira_ticket, config['version'], run_options, library["id"], ) if not no_download: download_data(storages["working_inputs"], data_dir, library_id) start_automation( jira_ticket, config['version'], args, run_options, analysis_info, data_dir, runs_dir, reference_dir, results_dir, storages, library["id"], analysis_id, )
def run_pseudobulk(jira_ticket, version, inputs_tag_name, matched_normal_sample, matched_normal_library, config_filename=None, **run_options): if config_filename is None: config_filename = default_config config = file_utils.load_json(config_filename) args = dict( inputs_tag_name=inputs_tag_name, matched_normal_sample=matched_normal_sample, matched_normal_library=matched_normal_library, ) job_subdir = jira_ticket run_options['job_subdir'] = job_subdir pipeline_dir = os.path.join( tantalus_api.get( "storage", name=config["storages"]["local_results"])["storage_directory"], job_subdir) results_dir = os.path.join('singlecellresults', 'results', job_subdir) scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir) tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) storage_result_prefix = tantalus_api.get_storage_client( "singlecellresults").prefix destruct_output = os.path.join(storage_result_prefix, jira_ticket, "results", "destruct") lumpy_output = os.path.join(storage_result_prefix, jira_ticket, "results", "lumpy") haps_output = os.path.join(storage_result_prefix, jira_ticket, "results", "haps") variants_output = os.path.join(storage_result_prefix, jira_ticket, "results", "variants") log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], pipeline_dir) start_automation( jira_ticket, version, args, run_options, config, pipeline_dir, results_dir, scpipeline_dir, tmp_dir, config['storages'], job_subdir, destruct_output, lumpy_output, haps_output, variants_output, )
def main(version, library_id, config_filename=None, **run_options): if config_filename is None: config_filename = default_config log.info(config_filename) config = file_utils.load_json(config_filename) storages = config["storages"] library = colossus_api.get("tenxlibrary", name=library_id) sample = library["sample"]["sample_id"] library_ticket = library["jira_ticket"] # TODO: Move this to tenx automated scripts if len(library["analysis_set"]) == 0: jira = create_analysis_jira_ticket(library_id, sample, library_ticket) else: analysis_id = library["analysis_set"][0] analysis_object = colossus_api.get("analysis", id=analysis_id) jira = analysis_object["jira_ticket"] log.info("Running {}".format(jira)) job_subdir = jira + run_options['tag'] pipeline_dir = os.path.join( tantalus_api.get( "storage", name=config["storages"]["local_results"])["storage_directory"], job_subdir) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, "tenx")) # SCNRA pipeline working directories data_dir = os.path.join("/datadrive", "data", library_id) runs_dir = os.path.join("/datadrive", "runs", library_id) reference_dir = os.path.join("/datadrive", "reference") results_dir = os.path.join("/datadrive", "results", library_id) analysis_info = TenXAnalysisInfo( jira, version, library_id, ) if run_options["testing"]: ref_genome = "test" else: ref_genome = get_ref_genome(library, is_tenx=True) args = {} args['library_id'] = library_id args['ref_genome'] = ref_genome args['version'] = version start_automation(jira, version, args, run_options, analysis_info, data_dir, runs_dir, reference_dir, results_dir, storages)