def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, force_update=False, workflow="NGI", already_parsed=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem(demux_fcid_dir) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=False, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: # Don't need the dict functionality anymore; revert to list projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project(project, workflow=workflow, force_overwrite=force_update) except Exception as e: print(e, file=sys.stderr)
def test_create_charon_entries_from_project_update( self, mock_seqrun_ud, mock_seqrun_cr, mock_libprep_ud, mock_libprep_cr, mock_sample_ud, mock_sample_cr, mock_project_ud, mock_project_cr): # Not the neatest of tests but gets the job done... mock_project_cr.side_effect = CharonError('Error', status_code=400) mock_sample_cr.side_effect = CharonError('Error', status_code=400) mock_libprep_cr.side_effect = CharonError('Error', status_code=400) mock_seqrun_cr.side_effect = CharonError('Error', status_code=400) create_charon_entries_from_project(self.project_obj, force_overwrite=True) mock_project_ud.assert_called_once_with( best_practice_analysis='whole_genome_reseq', name='S.One_20_02', projectid='P100001', sequencing_facility='NGI-S', status='OPEN') mock_sample_ud.assert_called_once_with(analysis_status='TO_ANALYZE', projectid='P100001', sampleid='P100001_101', status='STALE') mock_libprep_ud.assert_called_once_with(libprepid='A', projectid='P100001', qc='PASSED', sampleid='P100001_101') mock_seqrun_ud.assert_called_once_with( alignment_status='NOT_RUNNING', libprepid='A', mean_autosomal_coverage=0, projectid='P100001', sampleid='P100001_101', seqrunid='201030_A00187_0332_AHFCFLDSXX', total_reads=0)
def test_create_charon_entries_from_project(self, mock_seqrun, mock_libprep, mock_sample, mock_proj): create_charon_entries_from_project(self.project_obj) mock_proj.assert_called_once_with( best_practice_analysis='whole_genome_reseq', name='S.One_20_02', projectid='P100001', sequencing_facility='NGI-S', status='OPEN') mock_sample.assert_called_once_with(analysis_status='TO_ANALYZE', projectid='P100001', sampleid='P100001_101') mock_libprep.assert_called_once_with(libprepid='A', projectid='P100001', qc='PASSED', sampleid='P100001_101') mock_seqrun.assert_called_once_with( alignment_status='NOT_RUNNING', libprepid='A', mean_autosomal_coverage=0, projectid='P100001', sampleid='P100001_101', seqrunid='201030_A00187_0332_AHFCFLDSXX', total_reads=0)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, fallback_libprep=None, keep_existing_data=False, no_qc=False, quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param bool restart_running_jobs: Restart jobs marked as running in Charon :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool keep_existing_data: Keep existing analysis data when launching new jobs :param bool quiet: Don't send notification emails; added to config :param bool manual: This is being run from a user script; added to config :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] projects_to_analyze = organize_projects_from_flowcell( demux_fcid_dirs=demux_fcid_dirs, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, fallback_libprep=fallback_libprep, quiet=quiet, config=config) for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info( 'Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, config=None, config_file_path=None, quiet=False, manual=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: demux_fcid_dir = os.path.abspath(demux_fcid_dir) # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=True, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ("No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info('Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, config=config)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, best_practice_analysis=None, sequencing_facility=None, already_parsed=False, force_update=False, delete_existing=False, force_create_project=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if delete_existing: delete_existing = validate_delete_existing() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem( demux_fcid_dir, force_create_project=force_create_project) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: projects_to_analyze = setup_analysis_directory_structure( demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project( project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_update, delete_existing=delete_existing) except Exception as e: print(e, file=sys.stderr)
def test_create_charon_entries_from_project(self): # Create the NGIObjects project_obj = NGIProject(name=self.p_name, dirname=self.p_name, project_id=self.p_id, base_path=self.p_bp) sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id) libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id) seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id, dirname=self.sr_id) try: # Create them in the db create_charon_entries_from_project(project_obj) finally: charon_session = CharonSession() charon_session.project_delete(project_obj.project_id)
def test_create_charon_entries_from_project(self): # Create the NGIObjects project_obj = NGIProject(name=self.p_name, dirname=self.p_name, project_id=self.p_id, base_path=self.p_bp) sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id) libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id) seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id, dirname=self.sr_id) try: # Create them in the db create_charon_entries_from_project(project_obj) finally: charon_session = CharonSession() charon_session.project_delete(project_obj.project_id)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, fallback_libprep=None, keep_existing_data=False, no_qc=False, quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param bool restart_running_jobs: Restart jobs marked as running in Charon :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool keep_existing_data: Keep existing analysis data when launching new jobs :param bool quiet: Don't send notification emails; added to config :param bool manual: This is being run from a user script; added to config :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] projects_to_analyze = organize_projects_from_flowcell(demux_fcid_dirs=demux_fcid_dirs, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, fallback_libprep=fallback_libprep, quiet=quiet, config=config) for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info('Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, best_practice_analysis=None, sequencing_facility=None, already_parsed=False, force_update=False, delete_existing=False, force_create_project=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if delete_existing: delete_existing = validate_delete_existing() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem(demux_fcid_dir, force_create_project=force_create_project) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_update, delete_existing=delete_existing) except Exception as e: print(e, file=sys.stderr)
restrict_to_samples=args.restrict_to_samples) if not project.samples: LOG.info('No samples found for project {} (path {})'.format( project.project_id, qc_project_dir)) for sample in project: qc_ngi.launchers.analyze(project, sample, quiet=args.quiet) ## Organize Flowcell elif 'organize_fc_dirs' in args: organize_fc_dirs_list = list(set(args.organize_fc_dirs)) LOG.info("Organizing flowcell {} {}".format( inflector.plural("directory", len(organize_fc_dirs_list)), ", ".join(organize_fc_dirs_list))) projects_to_analyze = \ organize_projects_from_flowcell(demux_fcid_dirs=organize_fc_dirs_list, restrict_to_projects=args.restrict_to_projects, restrict_to_samples=args.restrict_to_samples, fallback_libprep=args.fallback_libprep, quiet=args.quiet) for project in projects_to_analyze: try: create_charon_entries_from_project( project=project, best_practice_analysis=args.best_practice_analysis, sequencing_facility=args.sequencing_facility, force_overwrite=args.force_update) except Exception as e: LOG.error(e.message) print(e, file=sys.stderr) LOG.info("Done with organization.")
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, config=None, config_file_path=None, quiet=False, manual=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: demux_fcid_dir = os.path.abspath(demux_fcid_dir) # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure( demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=True, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ( "No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format( ",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info( 'Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, config=config)
inflector.plural("directory", len(qc_flowcell_dirs_list)), ", ".join(qc_flowcell_dirs_list) ) ) projects_to_analyze = organize_projects_from_flowcell( demux_fcid_dirs=qc_flowcell_dirs_list, restrict_to_projects=args.restrict_to_projects, restrict_to_samples=args.restrict_to_samples, fallback_libprep=args.fallback_libprep, quiet=args.quiet, ) for project in projects_to_analyze: try: create_charon_entries_from_project( project=project, best_practice_analysis=args.best_practice_analysis, sequencing_facility=args.sequencing_facility, force_overwrite=args.force_update, delete_existing=args.delete_existing, ) except Exception as e: print(e, file=sys.stderr) LOG.info("Done with organization.") for project in projects_to_analyze: for sample in project: qc_ngi.launchers.analyze(project, sample, quiet=args.quiet) ## QC Project elif "qc_project_dirs" in args: for qc_project_dir in args.qc_project_dirs: project = recreate_project_from_filesystem( project_dir=qc_project_dir, restrict_to_samples=args.restrict_to_samples