def test_mixed_hcv_skipped(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession(skipped_types={PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI}) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup( '2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi assert {102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIDI) } == session.active_runs expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs)
def test_hcv_mixed_hcv_running_on_singleton(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup( 'NEG1', ('NEG1-HCV_S15_L001_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV folder_watcher.poll_runs() # main, midi, and mixed HCV still running assert {102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs expected_active_samples = {'NEG1-HCV_S15_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs)
def test_mixed_hcv_skipped_and_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI, PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi session.finish_all_runs() # Finish main and midi folder_watcher.poll_runs() # start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # done assert not session.active_runs assert not folder_watcher.active_runs assert not folder_watcher.active_samples assert folder_watcher.is_complete
def test_main_failed(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main denovo_main, mapping_main = session.active_runs.values() session.fail_run(mapping_main) folder_watcher.poll_runs() # Notice run failed is_complete_after_failure = folder_watcher.is_complete session.finish_all_runs() folder_watcher.poll_runs() is_complete_at_end = folder_watcher.is_complete assert not is_complete_after_failure assert is_complete_at_end
def test_hcv_mixed_hcv_not_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup( '2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV session.finish_run(dict(id=104)) # Finish main session.finish_run(dict(id=105)) # Finish midi folder_watcher.poll_runs() # mixed HCV still running, resistance started session.finish_run(dict(id=106)) # Finish res folder_watcher.poll_runs() # mixed HCV still running, resistance finished assert {102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MIDI) } == session.active_runs expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs) assert not folder_watcher.is_complete
def test_denovo_main_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance assert { 104: dict(id=104, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_RESISTANCE), 105: dict(id=105, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.RESISTANCE) } == session.active_runs assert 2 == len(folder_watcher.active_runs)
def test_resistance_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance folder_watcher.poll_runs() # resistance still running assert { 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.RESISTANCE) } == session.active_runs assert {'1234A-V3LOOP_R1_001.fastq.gz'} == folder_watcher.active_samples assert not folder_watcher.is_complete
def test_mid_hcv_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession(skipped_types={ PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi session.finish_run(dict(id=103)) # Finish midi folder_watcher.poll_runs() assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs assert 1 == len(folder_watcher.active_runs) expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples
def test_filter_quality_failed(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality filter_quality, = session.active_runs.values() session.fail_run(filter_quality) folder_watcher.poll_runs() # start main assert {} == session.active_runs assert folder_watcher.is_complete
def test_filter_quality_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality folder_watcher.poll_runs() # filter_quality still running assert {101: dict(id=101, folder_watcher=folder_watcher, sample_watcher=None, pipeline_type=PipelineType.FILTER_QUALITY) } == session.active_runs
def test_folder_watcher_run_details(): base_calls_folder = '/path/140101_M01234_JUNK/Data/Intensities/BaseCalls' expected_run_folder = Path('/path/140101_M01234_JUNK') expected_run_name = '140101_M01234' watcher = FolderWatcher(base_calls_folder) assert expected_run_folder == watcher.run_folder assert expected_run_name == watcher.run_name
def test_filter_quality_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main assert {102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs
def test_filter_quality_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality folder_watcher.poll_runs() # filter_quality still running assert { 101: dict(id=101, folder_watcher=folder_watcher, sample_watcher=None, pipeline_type=PipelineType.FILTER_QUALITY) } == session.active_runs
def test_hcv_mixed_hcv_not_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV session.finish_run(dict(id=104)) # Finish main session.finish_run(dict(id=105)) # Finish midi folder_watcher.poll_runs() # mixed HCV still running, resistance started session.finish_run(dict(id=106)) # Finish res folder_watcher.poll_runs() # mixed HCV still running, resistance finished assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MIDI) } == session.active_runs expected_active_samples = { '2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz' } assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs) assert not folder_watcher.is_complete
def test_resistance_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # Finish sample assert not session.active_runs assert not folder_watcher.active_samples assert not folder_watcher.active_runs assert folder_watcher.is_complete
def add_sample_group(self, base_calls, sample_group): """ Add a sample group (main and optional midi sample) to process. Also checks to see whether the folder finished processing since the last folder scan. :param base_calls: path to the BaseCalls folder with FASTQ files in it :param SampleGroup sample_group: the sample(s) to add :return: SampleWatcher for the sample group, or None if that folder has already finished processing """ for attempt_count in count(1): # noinspection PyBroadException try: self.check_session() folder_watcher = self.folder_watchers.get(base_calls) if folder_watcher is None: folder_watcher = FolderWatcher(base_calls, self) # Check if folder has finished since it was scanned. results_path = self.get_results_path(folder_watcher) done_path = results_path / "doneprocessing" if done_path.exists(): return None error_path = folder_watcher.run_folder / "errorprocessing" if error_path.exists(): return None self.create_batch(folder_watcher) self.upload_filter_quality(folder_watcher) shutil.rmtree(results_path, ignore_errors=True) self.folder_watchers[base_calls] = folder_watcher for sample_watcher in folder_watcher.sample_watchers: if sample_watcher.sample_group == sample_group: return sample_watcher sample_watcher = SampleWatcher(sample_group) for fastq1 in filter(None, sample_group.names): fastq2 = fastq1.replace('_R1_', '_R2_') for fastq_name, direction in ((fastq1, 'forward'), (fastq2, 'reverse')): with (base_calls / fastq_name).open('rb') as fastq_file: fastq_dataset = self.find_or_upload_dataset( fastq_file, fastq_name, direction + ' read from MiSeq run ' + folder_watcher.run_name) sample_watcher.fastq_datasets.append(fastq_dataset) folder_watcher.sample_watchers.append(sample_watcher) return sample_watcher except Exception: if not self.retry: raise wait_for_retry(attempt_count)
def test_hcv_mixed_hcv_running_on_singleton(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('NEG1', ('NEG1-HCV_S15_L001_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV folder_watcher.poll_runs() # main, midi, and mixed HCV still running assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs expected_active_samples = {'NEG1-HCV_S15_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs)
def test_filter_quality_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs
def test_resistance_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # Finish sample assert not session.active_runs assert not folder_watcher.active_samples assert not folder_watcher.active_runs assert folder_watcher.is_complete
def test_denovo_resistance_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance denovo_resistance = sample_watcher.runs[PipelineType.DENOVO_RESISTANCE] session.finish_run(denovo_resistance) folder_watcher.poll_runs() # denovo resistance finished folder_watcher.poll_runs() # main resistance still running assert not folder_watcher.is_complete
def test_hcv_filter_quality_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MIDI), 104: dict(id=104, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_MAIN), 105: dict(id=105, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_MIDI), 106: dict(id=106, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN), 107: dict(id=107, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIDI) } == session.active_runs expected_active_samples = { '2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz' } assert expected_active_samples == folder_watcher.active_samples assert 6 == len(folder_watcher.active_runs)
def test_mixed_hcv_skipped_and_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession(skipped_types={PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI}) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup( '2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi session.finish_all_runs() # Finish main and midi folder_watcher.poll_runs() # start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # done assert not session.active_runs assert not folder_watcher.active_runs assert not folder_watcher.active_samples assert folder_watcher.is_complete
def test_filter_quality_failed(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality filter_quality, = session.active_runs.values() session.fail_run(filter_quality) folder_watcher.poll_runs() # start main assert {} == session.active_runs assert folder_watcher.is_complete
def test_resistance_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance folder_watcher.poll_runs() # resistance still running assert {103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.RESISTANCE) } == session.active_runs assert {'1234A-V3LOOP_R1_001.fastq.gz'} == folder_watcher.active_samples assert not folder_watcher.is_complete
def test_main_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main folder_watcher.poll_runs() # main still running assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs
def add_folder(self, base_calls): folder_watcher = FolderWatcher(base_calls, self) self.folder_watchers[base_calls] = folder_watcher return folder_watcher
def test_folder_watcher_repr_with_pathlib(): base_calls_folder = Path('/path/Data/Intensities/BaseCalls') expected_repr = "FolderWatcher('/path/Data/Intensities/BaseCalls')" watcher = FolderWatcher(base_calls_folder) assert expected_repr == repr(watcher)
def run_pipeline(self, folder_watcher: FolderWatcher, pipeline_type: PipelineType, sample_watcher: SampleWatcher): if pipeline_type == PipelineType.FILTER_QUALITY: return self.find_or_launch_run( self.config.micall_filter_quality_pipeline_id, dict(quality_csv=folder_watcher.quality_dataset), 'MiCall filter quality on ' + folder_watcher.run_name, folder_watcher.batch) if pipeline_type == PipelineType.RESISTANCE: run = self.run_resistance_pipeline( sample_watcher, folder_watcher, (PipelineType.MAIN, PipelineType.MIDI), 'MiCall resistance') return run if pipeline_type == PipelineType.DENOVO_RESISTANCE: run = self.run_resistance_pipeline( sample_watcher, folder_watcher, (PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI), 'MiCall denovo resistance') return run if pipeline_type in (PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI): if self.config.mixed_hcv_pipeline_id is None: return None if pipeline_type == PipelineType.MIXED_HCV_MAIN: input_datasets = dict(fastq1=sample_watcher.fastq_datasets[0], fastq2=sample_watcher.fastq_datasets[1]) sample_name = sample_watcher.sample_group.names[0] else: input_datasets = dict(fastq1=sample_watcher.fastq_datasets[2], fastq2=sample_watcher.fastq_datasets[3]) sample_name = sample_watcher.sample_group.names[1] return self.find_or_launch_run( self.config.mixed_hcv_pipeline_id, input_datasets, 'Mixed HCV on ' + trim_name(sample_name), folder_watcher.batch) if pipeline_type == PipelineType.MAIN: group_position = 0 run_name = 'MiCall main' pipeline_id = self.config.micall_main_pipeline_id elif pipeline_type == PipelineType.MIDI: group_position = 1 run_name = 'MiCall main' pipeline_id = self.config.micall_main_pipeline_id elif pipeline_type == PipelineType.DENOVO_MAIN: group_position = 0 run_name = 'MiCall denovo main' pipeline_id = self.config.denovo_main_pipeline_id else: assert pipeline_type == PipelineType.DENOVO_MIDI group_position = 1 run_name = 'MiCall denovo main' pipeline_id = self.config.denovo_main_pipeline_id if pipeline_id is None: return None fastq1, fastq2 = sample_watcher.fastq_datasets[group_position * 2:(group_position + 1) * 2] sample_name = sample_watcher.sample_group.names[group_position] run_name += ' on ' + trim_name(sample_name) sample_info = self.get_sample_info(pipeline_id, sample_watcher, group_position) if folder_watcher.bad_cycles_dataset is None: filter_run_id = folder_watcher.filter_quality_run['id'] run_datasets = self.kive_retry( lambda: self.session.endpoints.containerruns.get( f'{filter_run_id}/dataset_list/')) bad_cycles_run_dataset, = [ run_dataset for run_dataset in run_datasets if run_dataset['argument_name'] == 'bad_cycles_csv' ] folder_watcher.bad_cycles_dataset = self.kive_retry( lambda: self.session.get(bad_cycles_run_dataset['dataset'] ).json()) inputs = dict(fastq1=fastq1, fastq2=fastq2, bad_cycles_csv=folder_watcher.bad_cycles_dataset) if sample_info is not None: inputs['sample_info_csv'] = sample_info return self.find_or_launch_run(pipeline_id, inputs, run_name, folder_watcher.batch)