def test_setup_analysis_directory_structure(self, mock_id, mock_parse, mock_path, mock_makedir): fc_dir = '/ngi2016003/201103_A00187_0332_AHFCFLDSXX' mock_parse.return_value = { 'fc_dir': fc_dir, 'fc_full_id': '201103_A00187_0332_AHFCFLDSXX', 'projects': [{ 'project_name': 'S.One_20_01', 'project_original_name': 'something', 'samples': [{ 'sample_name': 'one' }] }] } mock_id.return_value = 'P12345' projects_to_analyze = {} expected_project = 'S.One_20_01' got_projects = setup_analysis_directory_structure(fc_dir, projects_to_analyze, create_files=False) got_project = got_projects[ '/lupus/ngi/staging/wildwest/ngi2016001/nobackup/NGI/DATA/P12345'] self.assertEqual(expected_project, got_project.name)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, force_update=False, workflow="NGI", already_parsed=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem(demux_fcid_dir) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=False, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: # Don't need the dict functionality anymore; revert to list projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project(project, workflow=workflow, force_overwrite=force_update) except Exception as e: print(e, file=sys.stderr)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, best_practice_analysis=None, sequencing_facility=None, already_parsed=False, force_update=False, delete_existing=False, force_create_project=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if delete_existing: delete_existing = validate_delete_existing() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem( demux_fcid_dir, force_create_project=force_create_project) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: projects_to_analyze = setup_analysis_directory_structure( demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project( project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_update, delete_existing=delete_existing) except Exception as e: print(e, file=sys.stderr)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, best_practice_analysis=None, sequencing_facility=None, already_parsed=False, force_update=False, delete_existing=False, force_create_project=False, config=None, config_file_path=None): if force_update: force_update = validate_force_update() if delete_existing: delete_existing = validate_delete_existing() if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) projects_to_analyze = dict() if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format for demux_fcid_dir in demux_fcid_dirs_set: p = recreate_project_from_filesystem(demux_fcid_dir, force_create_project=force_create_project) projects_to_analyze[p.name] = p else: # Raw illumina flowcell for demux_fcid_dir in demux_fcid_dirs_set: projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, config=config) if not projects_to_analyze: sys.exit("Quitting: no projects found to process in flowcells {}" "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis, sequencing_facility=sequencing_facility, force_overwrite=force_update, delete_existing=delete_existing) except Exception as e: print(e, file=sys.stderr)
if args.delete_existing: args.delete_existing = validate_delete_existing() if not args.restrict_to_projects: args.restrict_to_projects = [] if not args.restrict_to_samples: args.restrict_to_samples = [] organize_fc_dirs_set = set(args.organize_fc_dirs) projects_to_analyze = dict() ## NOTE this bit of code not currently in use but could use later #if args.already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format # for organize_fc_dir in organize_fc_dirs_set: # p = recreate_project_from_filesystem(organize_fc_dir, # force_create_project=args.force_create_project) # projects_to_analyze[p.name] = p #else: # Raw illumina flowcell for organize_fc_dir in organize_fc_dirs_set: projects_to_analyze = setup_analysis_directory_structure(fc_dir=organize_fc_dir, projects_to_analyze=projects_to_analyze, restrict_to_projects=args.restrict_to_projects, restrict_to_samples=args.restrict_to_samples, fallback_libprep=args.fallback_libprep, quiet=args.quiet) if not projects_to_analyze: raise ValueError('No projects found to process in flowcells ' '"{}" or there was an error gathering required ' 'information.'.format(",".join(organize_fc_dirs_set))) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: try: create_charon_entries_from_project(project, best_practice_analysis=args.best_practice_analysis, sequencing_facility=args.sequencing_facility, force_overwrite=args.force_update, delete_existing=args.delete_existing)