def test_locate_project(self): project_name = 'temp_project' tmp_dir = tempfile.mkdtemp() sthlm_root = 'sthlm_root' top_dir = 'top_dir' config = { 'analysis': { 'base_root': tmp_dir, 'sthlm_root': sthlm_root, 'top_dir': top_dir } } with self.assertRaises(ValueError): # Should raise ValueError if project can't be found locate_project(project=project_name, config=config) tmp_project_path = os.path.join(tmp_dir, sthlm_root, top_dir, 'DATA', project_name) with self.assertRaises(ValueError): # Should raise ValueError as path given doesn't exist locate_project(project=tmp_project_path, config=config) os.makedirs(tmp_project_path) # Should return the path passed in self.assertEqual( locate_project(project=tmp_project_path, config=config), tmp_project_path) # Should return the full path after searching project data dir self.assertEqual(locate_project(project=project_name, config=config), tmp_project_path)
def test_locate_project(self): project_name = "temp_project" tmp_dir = tempfile.mkdtemp() config = {"analysis": {"top_dir": tmp_dir}} with self.assertRaises(ValueError): # Should raise ValueError if project can't be found locate_project(project=project_name, config=config) tmp_project_path = os.path.join(tmp_dir, "DATA", project_name) with self.assertRaises(ValueError): # Should raise ValueError as path given doesn't exist locate_project(project=tmp_project_path, config=config) os.makedirs(tmp_project_path) # Should return the path passed in self.assertEqual(locate_project(project=tmp_project_path, config=config), tmp_project_path) # Should return the full path after searching project data dir self.assertEqual(locate_project(project=project_name, config=config), tmp_project_path)
if args.__dict__.get("restart_running_jobs"): args.restart_finished_jobs = \ validate_dangerous_user_thing(action=("restart RUNNING jobs, deleting " "previous analysis files")) # Charon-specific arguments ('organize', 'analyze', 'qc') if args.__dict__.get("force_update"): args.force_update = \ validate_dangerous_user_thing("overwrite existing data in Charon") # Finally execute corresponding functions ## Analyze Project if 'analyze_project_dirs' in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = \ recreate_project_from_filesystem(project_dir=project_dir, restrict_to_samples=args.restrict_to_samples) launchers.launch_analysis( [project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, no_qc=args.no_qc, quiet=args.quiet, manual=True)
args.restrict_to_samples, args.restart_failed_jobs, args.restart_finished_jobs, args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True, generate_bqsr_bam=args.generate_bqsr_bam, ) ## Analyze Project elif "analyze_project_dirs" in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = recreate_project_from_filesystem( project_dir=project_dir, restrict_to_samples=args.restrict_to_samples ) launchers.launch_analysis( [project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True,
def project_summarize(projects, verbosity=0): if type(verbosity) is not int or verbosity < 0: print_stderr('Invalid verbosity level ("{}"); must be a positive ' 'integer; falling back to 0') verbosity = 0 update_charon_with_local_jobs_status(quiet=True) # Don't send mails charon_session = CharonSession() projects_list = [] for project in projects: try: project = os.path.basename(locate_project(project)) except ValueError as e: print_stderr("Skipping project: {}".format(e)) continue print_stderr('Gathering information for project "{}"...'.format(project)) project_dict = {} try: project = charon_session.project_get(project) except CharonError as e: print_stderr('Project "{}" not found in Charon; skipping ({})'.format(project, e), file=sys.stderr) continue project_dict['name'] = project['name'] project_dict['id'] = project['projectid'] project_dict['status'] = project['status'] samples_list = project_dict['samples'] = [] for sample in charon_session.project_get_samples(project['projectid']).get('samples', []): sample_dict = {} sample_dict['id'] = sample['sampleid'] sample_dict['analysis_status'] = sample['analysis_status'] sample_dict['coverage'] = sample['total_autosomal_coverage'] libpreps_list = sample_dict['libpreps'] = [] samples_list.append(sample_dict) for libprep in charon_session.sample_get_libpreps(project['projectid'], sample['sampleid']).get('libpreps', []): libprep_dict = {} libprep_dict['id'] = libprep['libprepid'] libprep_dict['qc'] = libprep['qc'] seqruns_list = libprep_dict['seqruns'] = [] libpreps_list.append(libprep_dict) for seqrun in charon_session.libprep_get_seqruns(project['projectid'], sample['sampleid'], libprep['libprepid']).get('seqruns', []): seqrun_dict = {} seqrun_dict['id'] = seqrun['seqrunid'] seqrun_dict['alignment_status'] = seqrun['alignment_status'] seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage'] if seqrun.get('total_reads'): seqrun_dict['total_reads'] = seqrun['total_reads'] seqruns_list.append(seqrun_dict) projects_list.append(project_dict) if verbosity in (0, 1): projects_status_list = [] #projects_by_status = collections.defaultdict(dict) #samples_by_status = collections.defaultdict(set) #libpreps_by_status = collections.defaultdict(set) #seqruns_by_status = collections.defaultdict(set) for project_dict in projects_list: project_status_dict = {} project_status_dict['name'] = "{} ({})".format(project_dict['name'], project_dict['id']) project_status_dict['status'] = project_dict['status'] samples_by_status = project_status_dict['samples_by_status'] = collections.defaultdict(set) libpreps_by_status = project_status_dict['libpreps_by_status'] = collections.defaultdict(set) seqruns_by_status = project_status_dict['seqruns_by_status'] = collections.defaultdict(set) for sample_dict in project_dict.get('samples', []): #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id']) sample_status = sample_dict['analysis_status'] libpreps = sample_dict.get('libpreps') if libpreps: if not any([libprep["seqruns"] for libprep in libpreps]): sample_status = "NO_SEQRUNS" else: for libprep_dict in libpreps: libpreps_by_status[libprep_dict['qc']].add(libprep_dict['id']) for seqrun_dict in libprep_dict.get('seqruns', []): seqruns_by_status[seqrun_dict['alignment_status']].add(seqrun_dict['id']) else: sample_status = "NO_LIBPREPS" samples_by_status[sample_status].add(sample_dict['id']) projects_status_list.append(project_status_dict) print_items = (("Samples", "samples_by_status"), ("Libpreps", "libpreps_by_status"), ("Seqruns", "seqruns_by_status"),) for project_dict in projects_status_list: print_stderr("\nProject\n-------") print_stderr(" Name: {:>40}".format(project_dict['name'])) print_stderr(" Status: {:>40}".format(project_dict['status'])) for name, dict_key in print_items: status_dict = project_dict[dict_key] print_stderr("{}\n{}".format(name, "-"*len(name))) total_items = sum(map(len, status_dict.values())) # Sort by analysis value for status, item_set in sorted(status_dict.iteritems(), key=lambda key_value: key_value[0]): num_items = len(item_set) percent = (100.00 * num_items) / total_items print_stderr(" Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(status, num_items, total_items, percent)) if verbosity == 1: for item in sorted(item_set): print_stderr(" {}".format(item)) print_stderr("") else: # Verbosity is 2+, maximum verbosity output_template = "{}{:<30}{:>{rspace}}" for project_dict in projects_list: offset = 0 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace)) print_stderr(output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace)) for sample_dict in project_dict['samples']: print_stderr("") offset = 4 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace)) print_stderr(output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace)) for libprep_dict in sample_dict['libpreps']: print_stderr("") offset = 8 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace)) for seqrun_dict in libprep_dict['seqruns']: print_stderr("") offset = 12 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace)) print_stderr(output_template.format(indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace)) if "total_reads" in seqrun_dict: print_stderr(output_template.format(indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace)) print_stderr("\n")
def project_summarize(projects, verbosity=0): if type(verbosity) is not int or verbosity < 0: print_stderr('Invalid verbosity level ("{}"); must be a positive ' 'integer; falling back to 0') verbosity = 0 update_charon_with_local_jobs_status(quiet=True) # Don't send mails charon_session = CharonSession() projects_list = [] for project in projects: try: project = os.path.basename(locate_project(project)) except ValueError as e: print_stderr("Skipping project: {}".format(e)) continue print_stderr( 'Gathering information for project "{}"...'.format(project)) project_dict = {} try: project = charon_session.project_get(project) except CharonError as e: print_stderr( 'Project "{}" not found in Charon; skipping ({})'.format( project, e), file=sys.stderr) continue project_dict['name'] = project['name'] project_dict['id'] = project['projectid'] project_dict['status'] = project['status'] samples_list = project_dict['samples'] = [] for sample in charon_session.project_get_samples( project['projectid']).get('samples', []): sample_dict = {} sample_dict['id'] = sample['sampleid'] sample_dict['analysis_status'] = sample['analysis_status'] sample_dict['coverage'] = sample['total_autosomal_coverage'] libpreps_list = sample_dict['libpreps'] = [] samples_list.append(sample_dict) for libprep in charon_session.sample_get_libpreps( project['projectid'], sample['sampleid']).get('libpreps', []): libprep_dict = {} libprep_dict['id'] = libprep['libprepid'] libprep_dict['qc'] = libprep['qc'] seqruns_list = libprep_dict['seqruns'] = [] libpreps_list.append(libprep_dict) for seqrun in charon_session.libprep_get_seqruns( project['projectid'], sample['sampleid'], libprep['libprepid']).get('seqruns', []): seqrun_dict = {} seqrun_dict['id'] = seqrun['seqrunid'] seqrun_dict['alignment_status'] = seqrun[ 'alignment_status'] seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage'] if seqrun.get('total_reads'): seqrun_dict['total_reads'] = seqrun['total_reads'] seqruns_list.append(seqrun_dict) projects_list.append(project_dict) if verbosity in (0, 1): projects_status_list = [] #projects_by_status = collections.defaultdict(dict) #samples_by_status = collections.defaultdict(set) #libpreps_by_status = collections.defaultdict(set) #seqruns_by_status = collections.defaultdict(set) for project_dict in projects_list: project_status_dict = {} project_status_dict['name'] = "{} ({})".format( project_dict['name'], project_dict['id']) project_status_dict['status'] = project_dict['status'] samples_by_status = project_status_dict[ 'samples_by_status'] = collections.defaultdict(set) libpreps_by_status = project_status_dict[ 'libpreps_by_status'] = collections.defaultdict(set) seqruns_by_status = project_status_dict[ 'seqruns_by_status'] = collections.defaultdict(set) for sample_dict in project_dict.get('samples', []): #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id']) sample_status = sample_dict['analysis_status'] libpreps = sample_dict.get('libpreps') if libpreps: if not any([libprep["seqruns"] for libprep in libpreps]): sample_status = "NO_SEQRUNS" else: for libprep_dict in libpreps: libpreps_by_status[libprep_dict['qc']].add( libprep_dict['id']) for seqrun_dict in libprep_dict.get('seqruns', []): seqruns_by_status[ seqrun_dict['alignment_status']].add( seqrun_dict['id']) else: sample_status = "NO_LIBPREPS" samples_by_status[sample_status].add(sample_dict['id']) projects_status_list.append(project_status_dict) print_items = ( ("Samples", "samples_by_status"), ("Libpreps", "libpreps_by_status"), ("Seqruns", "seqruns_by_status"), ) for project_dict in projects_status_list: print_stderr("\nProject\n-------") print_stderr(" Name: {:>40}".format(project_dict['name'])) print_stderr(" Status: {:>40}".format(project_dict['status'])) for name, dict_key in print_items: status_dict = project_dict[dict_key] print_stderr("{}\n{}".format(name, "-" * len(name))) total_items = sum(map(len, status_dict.values())) # Sort by analysis value for status, item_set in sorted( status_dict.iteritems(), key=lambda key_value: key_value[0]): num_items = len(item_set) percent = (100.00 * num_items) / total_items print_stderr( " Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format( status, num_items, total_items, percent)) if verbosity == 1: for item in sorted(item_set): print_stderr(" {}".format(item)) print_stderr("") else: # Verbosity is 2+, maximum verbosity output_template = "{}{:<30}{:>{rspace}}" for project_dict in projects_list: offset = 0 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace)) print_stderr( output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace)) for sample_dict in project_dict['samples']: print_stderr("") offset = 4 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace)) print_stderr( output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace)) for libprep_dict in sample_dict['libpreps']: print_stderr("") offset = 8 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace)) for seqrun_dict in libprep_dict['seqruns']: print_stderr("") offset = 12 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace)) print_stderr( output_template.format( indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace)) print_stderr( output_template.format( indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace)) if "total_reads" in seqrun_dict: print_stderr( output_template.format( indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace)) print_stderr("\n")
args.restrict_to_projects, args.restrict_to_samples, args.restart_failed_jobs, args.restart_finished_jobs, args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True, generate_bqsr_bam=args.generate_bqsr_bam) ## Analyze Project elif 'analyze_project_dirs' in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = \ recreate_project_from_filesystem(project_dir=project_dir, restrict_to_samples=args.restrict_to_samples) launchers.launch_analysis([project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True, generate_bqsr_bam=args.generate_bqsr_bam)