def test_get_samplesheet(self): """Test that the _get_samplesheet method behaves as expected """ # Create a few random files and folders and assert that they are not returned suffixes = [".csv","",""] for n in range(3): os.mkdir(os.path.join(self.rootdir,''.join(random.choice(string.ascii_uppercase) for x in range(5)))) fh, _ = tempfile.mkstemp(dir=self.rootdir, suffix=suffixes[n]) os.close(fh) self.assertIsNone(sq.get_samplesheet(self.rootdir), "Getting non-existing samplesheet did not return None") # Create a SampleSheet.csv and a [FCID].csv file and assert that they are # returned with a preference for the [FCID].csv file fcid = td.generate_fc_barcode() fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=fcid)) os.mkdir(fcdir) ss = [os.path.join(fcdir,"SampleSheet.csv"), os.path.join(fcdir,"{}.csv".format(fcid))] for s in ss: utils.touch_file(s) self.assertEqual(s,sq.get_samplesheet(fcdir), "Did not get existing {}".format(os.path.basename(s)))
def status_query(archive_dir, analysis_dir, flowcell, project, brief=False): """Get a status report of the progress of flowcells based on a snapshot of the file system """ last_step = 14 status = [] # Process each flowcell in the archive directory for fcdir in bcbio.get_flowcelldirs(archive_dir, flowcell): fc_status = {} fc_status['flowcell'] = os.path.basename(fcdir) # Locate the samplesheet samplesheet = bcbio.get_samplesheet(fcdir) if samplesheet is None: print( "{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping.." ) continue fc_status['samplesheet'] = samplesheet # Get a list of the projects in the samplesheet projects = bcbio.get_projects(samplesheet, project) if len(projects) == 0: print( "\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping.." .format(project)) continue fc_status['projects'] = [] # Iterate over the projects in the flowcell for proj in projects: proj = proj.replace("__", ".") proj_status = {} proj_status['project'] = proj pdir = bcbio.get_project_analysis_dir(analysis_dir, proj) if not pdir: continue proj_status['project_dir'] = pdir proj_status['samples'] = [] proj_status['no_finished_samples'] = 0 samples = bcbio.get_project_samples(samplesheet, proj) for smpl in samples: smpl = smpl.replace("__", ".") sample_status = {} proj_status['samples'].append(sample_status) sample_status['sample_id'] = smpl sdir = bcbio.get_sample_analysis_dir(pdir, smpl) if not sdir: continue sample_status['sample_dir'] = sdir # Match the flowcell we're processing to the sample flowcell directories sample_fc = [ d for d in bcbio.get_flowcelldirs(sdir) if d.split("_")[-1] == fcdir.split("_")[-1] ] if len(sample_fc) == 0: continue sample_fc = sample_fc[0] sample_status['sample_fc_dir'] = sample_fc fastq_screen = bcbio.get_fastq_screen_folder(sample_fc) if fastq_screen: sample_status['fastq_screen'] = [ fastq_screen, bcbio.fastq_screen_finished(fastq_screen) ] now = datetime.datetime.now() pipeline_start_indicator = bcbio.get_pipeline_indicator( sample_fc, [1]) if len(pipeline_start_indicator) == 0: continue pipeline_start_indicator = pipeline_start_indicator[0] most_recent, _ = bcbio.get_most_recent_indicator( [pipeline_start_indicator]) sample_status['pipeline_started'] = [ pipeline_start_indicator, most_recent ] most_recent, ifile = bcbio.get_most_recent_indicator( bcbio.get_pipeline_indicator(sample_fc)) sample_status['pipeline_progress'] = [ifile, most_recent] sample_log = bcbio.get_sample_pipeline_log(sample_fc, smpl) if not sample_log: continue st = os.stat(sample_log) sample_status['pipeline_log'] = [ sample_log, datetime.datetime.fromtimestamp(st.st_mtime) ] jobids = slurm.get_slurm_jobid(smpl) sample_status['slurm_job'] = [] for jobid in jobids: sample_status['slurm_job'].append( [jobid, slurm.get_slurm_jobstatus(jobid)]) most_recent, ifile = bcbio.get_most_recent_indicator( bcbio.get_pipeline_indicator(sample_fc, [last_step])) if ifile is not None and sample_status.get( 'fastq_screen', [None, False])[1]: sample_status['finished'] = True proj_status['no_finished_samples'] += 1 if proj_status['no_finished_samples'] == len(samples): proj_status['finished'] = True fc_status['projects'].append(proj_status) status.append(fc_status) print_status(status, brief)
def status_query(archive_dir, analysis_dir, flowcell, project, brief): """Get a status report of the progress of flowcells based on a snapshot of the file system """ last_step = 14 status = [] # Process each flowcell in the archive directory for fcdir in bcbio.get_flowcelldirs(archive_dir,flowcell): fc_status = {} fc_status['flowcell'] = os.path.basename(fcdir) # Locate the samplesheet samplesheet = bcbio.get_samplesheet(fcdir) if samplesheet is None: print("{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping..") continue fc_status['samplesheet'] = samplesheet # Get a list of the projects in the samplesheet projects = bcbio.get_projects(samplesheet,project) if len(projects) == 0: print("\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping..".format(project)) continue fc_status['projects'] = [] # Iterate over the projects in the flowcell for proj in projects: proj = proj.replace("__",".") proj_status = {} proj_status['project'] = proj pdir = bcbio.get_project_analysis_dir(analysis_dir, proj) if not pdir: continue proj_status['project_dir'] = pdir proj_status['samples'] = [] proj_status['no_finished_samples'] = 0 samples = bcbio.get_project_samples(samplesheet, proj) for smpl in samples: smpl = smpl.replace("__",".") sample_status = {} proj_status['samples'].append(sample_status) sample_status['sample_id'] = smpl sdir = bcbio.get_sample_analysis_dir(pdir, smpl) if not sdir: continue sample_status['sample_dir'] = sdir # Match the flowcell we're processing to the sample flowcell directories sample_fc = [d for d in bcbio.get_flowcelldirs(sdir) if d.split("_")[-1] == fcdir.split("_")[-1]] if len(sample_fc) == 0: continue sample_fc = sample_fc[0] sample_status['sample_fc_dir'] = sample_fc fastq_screen = bcbio.get_fastq_screen_folder(sample_fc) if fastq_screen: sample_status['fastq_screen'] = [fastq_screen,bcbio.fastq_screen_finished(fastq_screen)] now = datetime.datetime.now() pipeline_start_indicator = bcbio.get_pipeline_indicator(sample_fc,[1]) if len(pipeline_start_indicator) == 0: continue pipeline_start_indicator = pipeline_start_indicator[0] most_recent, _ = bcbio.get_most_recent_indicator([pipeline_start_indicator]) sample_status['pipeline_started'] = [pipeline_start_indicator,most_recent] most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc)) sample_status['pipeline_progress'] = [ifile,most_recent] sample_log = bcbio.get_sample_pipeline_log(sample_fc,smpl) if not sample_log: continue st = os.stat(sample_log) sample_status['pipeline_log'] = [sample_log,datetime.datetime.fromtimestamp(st.st_mtime)] jobids = slurm.get_slurm_jobid(smpl) sample_status['slurm_job'] = [] for jobid in jobids: sample_status['slurm_job'].append([jobid,slurm.get_slurm_jobstatus(jobid)]) most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc,[last_step])) if ifile is not None and sample_status.get('fastq_screen',[None,False])[1]: sample_status['finished'] = True proj_status['no_finished_samples'] += 1 if proj_status['no_finished_samples'] == len(samples): proj_status['finished'] = True fc_status['projects'].append(proj_status) status.append(fc_status) print_status(status,brief)