def get_runObj(run): """ Tries to read runParameters.xml to parse the type of sequencer and then return the respective Run object (MiSeq, HiSeq..) :param run: run name identifier :type run: string :rtype: Object :returns: returns the sequencer type object, None if the sequencer type is unknown of there was an error """ if os.path.exists(os.path.join(run, 'runParameters.xml')): run_parameters_file = "runParameters.xml" elif os.path.exists(os.path.join(run, 'RunParameters.xml')): run_parameters_file = "RunParameters.xml" else: logger.error("Cannot find RunParameters.xml or runParameters.xml in the run folder for run {}".format(run)) return rppath = os.path.join(run, run_parameters_file) try: rp = RunParametersParser(os.path.join(run, run_parameters_file)) except OSError: logger.warn("Problems parsing the runParameters.xml file at {}. " "This is quite unexpected. please archive the run {} manually".format(rppath, run)) else: #do a case by case test becasue there are so many version of RunParameters that there is no real other way runtype = rp.data['RunParameters'].get("Application", "") if "Setup" in rp.data['RunParameters']: #this is the HiSeq2500, MiSeq, and HiSeqX case try: # Works for recent control software runtype = rp.data['RunParameters']["Setup"]["Flowcell"] except KeyError: # Use this as second resource but print a warning in the logs logger.warn("Parsing runParameters to fecth instrument type, " "not found Flowcell information in it. Using ApplicationName") # here makes sense to use get with default value "" -> # so that it doesn't raise an exception in the next lines # (in case ApplicationName is not found, get returns None) runtype = rp.data['RunParameters']["Setup"].get("ApplicationName", "") if "HiSeq X" in runtype in runtype: return HiSeqX_Run(run, CONFIG["analysis"]["HiSeqX"]) elif "HiSeq" in runtype or "TruSeq" in runtype: return HiSeq_Run(run, CONFIG["analysis"]["HiSeq"]) elif "MiSeq" in runtype: return MiSeq_Run(run, CONFIG["analysis"]["MiSeq"]) elif "NextSeq" in runtype: return NextSeq_Run(run, CONFIG["analysis"]["NextSeq"]) elif "NovaSeq" in runtype: return NovaSeq_Run(run, CONFIG["analysis"]["NovaSeq"]) else: logger.warn("Unrecognized run type {}, cannot archive the run {}. " "Someone as likely bought a new sequencer without telling " "it to the bioinfo team".format(runtype, run)) # Not necessary as the function will return None at this point but # just for being explicit return None
def setUpClass(self): """ Creates the following directory tree for testing purposes: tmp/ |__ 141124_ST-COMPLETED_01_AFCIDXX | |__ RunInfo.xml | |__ Demultiplexing | | |__ Stats | | |__ DemultiplexingStats.xml | |__ RTAComplete.txt |__ 141124_ST-INPROGRESS_02_AFCIDXX | |__ RunInfo.xml | |__ Demultiplexing | |__ RTAComplete.txt |__ 141124_ST-RUNNING_03_AFCIDXX | |__ RunInfo.xml |__ 141124_ST-TOSTART_04_FCIDXXX |__ RunInfo.xml |__ RTAComplete.txt """ self.tmp_dir = os.path.join(tempfile.mkdtemp(), 'tmp') self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv') running = os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX') to_start = os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX') in_progress = os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX') completed = os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX') finished_runs = [to_start, in_progress, completed] # Create runs directory structure os.makedirs(self.tmp_dir) os.makedirs(running) os.makedirs(to_start) os.makedirs(os.path.join(in_progress, 'Demultiplexing')) os.makedirs(os.path.join(completed, 'Demultiplexing', 'Stats')) # Create files indicating that the run is finished for run in finished_runs: open(os.path.join(run, 'RTAComplete.txt'), 'w').close() # Create files indicating that the preprocessing is done open(os.path.join(completed, 'Demultiplexing', 'Stats', 'DemultiplexingStats.xml'), 'w').close() # Create transfer file and add the completed run with open(self.transfer_file, 'w') as f: tsv_writer = csv.writer(f, delimiter='\t') tsv_writer.writerow([os.path.basename(completed), str(datetime.now())]) # Move sample RunInfo.xml file to every run directory for run in [running, to_start, in_progress, completed]: shutil.copy('data/RunInfo.xml', run) shutil.copy('data/runParameters.xml', run) # Create run objects # Jose : add tests for other sequencers self.running = HiSeqX_Run(os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.to_start = Run(os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX'), CONFIG["analysis"]["HiSeqX"]) self.in_progress = Run(os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.completed = Run(os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.finished_runs = [self.to_start, self.in_progress, self.completed] self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv')
class TestTracker(unittest.TestCase): """ analysis.py script tests """ @classmethod def setUpClass(self): """ Creates the following directory tree for testing purposes: tmp/ |__ 141124_ST-COMPLETED_01_AFCIDXX | |__ RunInfo.xml | |__ Demultiplexing | | |__ Stats | | |__ DemultiplexingStats.xml | |__ RTAComplete.txt |__ 141124_ST-INPROGRESS_02_AFCIDXX | |__ RunInfo.xml | |__ Demultiplexing | |__ RTAComplete.txt |__ 141124_ST-RUNNING_03_AFCIDXX | |__ RunInfo.xml |__ 141124_ST-TOSTART_04_FCIDXXX |__ RunInfo.xml |__ RTAComplete.txt """ self.tmp_dir = os.path.join(tempfile.mkdtemp(), 'tmp') self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv') running = os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX') to_start = os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX') in_progress = os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX') completed = os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX') finished_runs = [to_start, in_progress, completed] # Create runs directory structure os.makedirs(self.tmp_dir) os.makedirs(running) os.makedirs(to_start) os.makedirs(os.path.join(in_progress, 'Demultiplexing')) os.makedirs(os.path.join(completed, 'Demultiplexing', 'Stats')) # Create files indicating that the run is finished for run in finished_runs: open(os.path.join(run, 'RTAComplete.txt'), 'w').close() # Create files indicating that the preprocessing is done open(os.path.join(completed, 'Demultiplexing', 'Stats', 'DemultiplexingStats.xml'), 'w').close() # Create transfer file and add the completed run with open(self.transfer_file, 'w') as f: tsv_writer = csv.writer(f, delimiter='\t') tsv_writer.writerow([os.path.basename(completed), str(datetime.now())]) # Move sample RunInfo.xml file to every run directory for run in [running, to_start, in_progress, completed]: shutil.copy('data/RunInfo.xml', run) shutil.copy('data/runParameters.xml', run) # Create run objects # Jose : add tests for other sequencers self.running = HiSeqX_Run(os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.to_start = Run(os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX'), CONFIG["analysis"]["HiSeqX"]) self.in_progress = Run(os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.completed = Run(os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX'), CONFIG["analysis"]["HiSeqX"]) self.finished_runs = [self.to_start, self.in_progress, self.completed] self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv') @classmethod def tearDownClass(self): shutil.rmtree(self.tmp_dir) def test_1_is_finished(self): """ Is finished should be True only if "RTAComplete.txt" file is present... """ self.assertFalse(self.running._is_sequencing_done()) self.assertTrue(all(map(lambda run: run._is_sequencing_done, self.finished_runs))) def test_2_processing_status(self): """ Status of the processing depends on the generated files """ self.assertEqual('SEQUENCING', self.running.get_run_status()) self.assertEqual('TO_START', self.to_start.get_run_status()) self.assertEqual('IN_PROGRESS', self.in_progress.get_run_status()) self.assertEqual('COMPLETED', self.completed.get_run_status()) def test_3_is_transferred(self): """ is_transferred should rely on the info in transfer.tsv """ self.assertTrue(self.completed.is_transferred(self.transfer_file)) self.assertFalse(self.running.is_transferred(self.transfer_file)) self.assertFalse(self.to_start.is_transferred(self.transfer_file)) self.assertFalse(self.in_progress.is_transferred( self.transfer_file))
def get_runObj(run): """Tries to read runParameters.xml to parse the type of sequencer and then return the respective Run object (MiSeq, HiSeq..) :param run: run name identifier :type run: string :rtype: Object :returns: returns the sequencer type object, None if the sequencer type is unknown of there was an error """ if os.path.exists(os.path.join(run, 'runParameters.xml')): run_parameters_file = 'runParameters.xml' elif os.path.exists(os.path.join(run, 'RunParameters.xml')): run_parameters_file = 'RunParameters.xml' else: logger.error( 'Cannot find RunParameters.xml or runParameters.xml in the run folder for run {}' .format(run)) return rppath = os.path.join(run, run_parameters_file) try: rp = RunParametersParser(os.path.join(run, run_parameters_file)) except OSError: logger.warn( 'Problems parsing the runParameters.xml file at {}. ' 'This is quite unexpected. please archive the run {} manually'. format(rppath, run)) else: # Do a case by case test becasue there are so many version of RunParameters that there is no real other way runtype = rp.data['RunParameters'].get( 'Application', rp.data['RunParameters'].get('ApplicationName', '')) if 'Setup' in rp.data['RunParameters']: # This is the HiSeq2500, MiSeq, and HiSeqX case try: # Works for recent control software runtype = rp.data['RunParameters']['Setup']['Flowcell'] except KeyError: # Use this as second resource but print a warning in the logs logger.warn( 'Parsing runParameters to fecth instrument type, ' 'not found Flowcell information in it. Using ApplicationName' ) # Here makes sense to use get with default value '' -> # so that it doesn't raise an exception in the next lines # (in case ApplicationName is not found, get returns None) runtype = rp.data['RunParameters']['Setup'].get( 'ApplicationName', '') if 'HiSeq X' in runtype: return HiSeqX_Run(run, CONFIG['analysis']['HiSeqX']) elif 'HiSeq' in runtype or 'TruSeq' in runtype: return HiSeq_Run(run, CONFIG['analysis']['HiSeq']) elif 'MiSeq' in runtype: return MiSeq_Run(run, CONFIG['analysis']['MiSeq']) elif 'NextSeq' in runtype: return NextSeq_Run(run, CONFIG['analysis']['NextSeq']) elif 'NovaSeq' in runtype: return NovaSeq_Run(run, CONFIG['analysis']['NovaSeq']) else: logger.warn( 'Unrecognized run type {}, cannot archive the run {}. ' 'Someone as likely bought a new sequencer without telling ' 'it to the bioinfo team'.format(runtype, run)) return None