def test_special_char_in_scan_name(self): """ Tests whether XNAT source can download files with spaces in their names """ cache_dir = tempfile.mkdtemp() archive = XnatArchive(server=SERVER, cache_dir=cache_dir, project_id=self.PROJECT) study = DummyStudy('study', archive, LinearRunner('ad'), inputs=[ DatasetMatch('source{}'.format(i), dicom_format, d) for i, d in enumerate(self.DATASETS, start=1) ], subject_ids=[self.SUBJECT], visit_ids=[self.VISIT]) source = archive.source([ study.input('source{}'.format(i)) for i in range(1, len(self.DATASETS) + 1) ]) source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT workflow = pe.Workflow(self.TEST_NAME, base_dir=self.work_path) workflow.add_nodes([source]) graph = workflow.run() result = next(n.result for n in graph.nodes() if n.name == source.name) for i, dname in enumerate(self.DATASETS, start=1): path = getattr(result.outputs, 'source{}{}'.format(i, PATH_SUFFIX)) self.assertEqual(os.path.basename(path), dname) self.assertTrue(os.path.exists(path))
def test_dicom_match(self): study = test_dataset.TestMatchStudy( name='test_dicom', archive=XnatArchive(project_id='TEST001', server=SERVER, cache_dir=tempfile.mkdtemp()), runner=LinearRunner(self.work_dir), inputs=test_dataset.TestDicomTagMatch.DICOM_MATCH, subject_ids=['DATASET'], visit_ids=['DICOMTAGMATCH']) phase = study.data('gre_phase')[0] mag = study.data('gre_mag')[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_archive_roundtrip(self): study = DummyStudy( self.STUDY_NAME, self.archive, runner=LinearRunner('a_dir'), inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3'), DatasetMatch('source4', nifti_gz_format, 'source4')]) # TODO: Should test out other file formats as well. source_files = [study.input(n) for n in ('source1', 'source2', 'source3', 'source4')] sink_files = [study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4')] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = self.archive.source(source_files, study_name=self.STUDY_NAME) sink = self.archive.sink(sink_files, study_name=self.STUDY_NAME) sink.inputs.name = 'archive_sink' sink.inputs.desc = ( "A test session created by archive roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_file in source_files: if not source_file.name.endswith('2'): source_name = source_file.name sink_name = source_name.replace('source', 'sink') workflow.connect( source, source_name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check local directory was created properly outputs = [ f for f in sorted(os.listdir(self.session_dir)) if f != FIELDS_FNAME] self.assertEqual(outputs, [self.STUDY_NAME + '_sink1.nii.gz', self.STUDY_NAME + '_sink3.nii.gz', self.STUDY_NAME + '_sink4.nii.gz', 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz'])
def test_summary(self): study = DummyStudy( self.SUMMARY_STUDY_NAME, self.archive, LinearRunner('ad'), inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3')]) # TODO: Should test out other file formats as well. source_files = [study.input(n) for n in ('source1', 'source2', 'source3')] inputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = self.archive.source(source_files) # Test subject sink subject_sink_files = [ study.bound_data_spec('subject_sink')] subject_sink = self.archive.sink(subject_sink_files, frequency='per_subject', study_name=self.SUMMARY_STUDY_NAME) subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide datasets") # Test visit sink visit_sink_files = [study.bound_data_spec('visit_sink')] visit_sink = self.archive.sink(visit_sink_files, frequency='per_visit', study_name=self.SUMMARY_STUDY_NAME) visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ( "Tests the sinking of visit-wide datasets") # Test project sink project_sink_files = [ study.bound_data_spec('project_sink')] project_sink = self.archive.sink(project_sink_files, frequency='per_project', study_name=self.SUMMARY_STUDY_NAME) project_sink.inputs.name = 'project_summary' project_sink.inputs.desc = ( "Tests the sinking of project-wide datasets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, project_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect( source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect( source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect( source, 'source3' + PATH_SUFFIX, project_sink, 'project_sink' + PATH_SUFFIX) workflow.run() # Check local summary directories were created properly subject_dir = self.get_session_dir(frequency='per_subject') self.assertEqual(sorted(os.listdir(subject_dir)), [self.SUMMARY_STUDY_NAME + '_subject_sink.nii.gz']) visit_dir = self.get_session_dir(frequency='per_visit') self.assertEqual(sorted(os.listdir(visit_dir)), [self.SUMMARY_STUDY_NAME + '_visit_sink.nii.gz']) project_dir = self.get_session_dir(frequency='per_project') self.assertEqual(sorted(os.listdir(project_dir)), [self.SUMMARY_STUDY_NAME + '_project_sink.nii.gz']) # Reload the data from the summary directories reloadinputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource = self.archive.source( (source_files + subject_sink_files + visit_sink_files + project_sink_files), name='reload_source', study_name=self.SUMMARY_STUDY_NAME) reloadsink = self.archive.sink( [study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3')], study_name=self.SUMMARY_STUDY_NAME) reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary datasets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource, 'visit_id') reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink, 'visit_id') reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() outputs = [ f for f in sorted(os.listdir(self.session_dir)) if f != FIELDS_FNAME] self.assertEqual(outputs, [self.SUMMARY_STUDY_NAME + '_resink1.nii.gz', self.SUMMARY_STUDY_NAME + '_resink2.nii.gz', self.SUMMARY_STUDY_NAME + '_resink3.nii.gz', 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz'])
def runner(self): return LinearRunner(self.work_dir)
def test_digest_check(self): """ Tests check of downloaded digests to see if file needs to be redownloaded """ cache_dir = os.path.join(self.base_cache_path, 'digest-check-cache') DATASET_NAME = 'source1' STUDY_NAME = 'digest_check_study' dataset_fpath = DATASET_NAME + nifti_gz_format.extension source_target_path = os.path.join(self.session_cache(cache_dir), dataset_fpath) md5_path = source_target_path + XnatArchive.MD5_SUFFIX shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) archive = XnatArchive(project_id=self.PROJECT, server=SERVER, cache_dir=cache_dir) study = DummyStudy( STUDY_NAME, archive, LinearRunner('ad'), inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)]) source = archive.source([study.input(DATASET_NAME)], name='digest_check_source', study_name=STUDY_NAME) source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT source.run() self.assertTrue(os.path.exists(md5_path)) self.assertTrue(os.path.exists(source_target_path)) with open(md5_path) as f: digests = json.load(f) # Stash the downloaded file in a new location and create a dummy # file instead stash_path = source_target_path + '.stash' shutil.move(source_target_path, stash_path) with open(source_target_path, 'w') as f: f.write('dummy') # Run the download, which shouldn't download as the digests are the # same source.run() with open(source_target_path) as f: d = f.read() self.assertEqual(d, 'dummy') # Replace the digest with a dummy os.remove(md5_path) digests[dataset_fpath] = 'dummy_digest' with open(md5_path, 'w') as f: json.dump(digests, f) # Retry the download, which should now download since the digests # differ source.run() with open(source_target_path) as f: d = f.read() with open(stash_path) as f: e = f.read() self.assertEqual(d, e) # Resink the source file and check that the generated MD5 digest is # stored in identical format sink_archive = XnatArchive(project_id=self.DIGEST_SINK_PROJECT, server=SERVER, cache_dir=cache_dir) DATASET_NAME = 'sink1' sink = sink_archive.sink([study.bound_data_spec(DATASET_NAME)], name='digest_check_sink', study_name=STUDY_NAME) sink.inputs.name = 'digest_check_sink' sink.inputs.desc = "Tests the generation of MD5 digests" sink.inputs.subject_id = self.DIGEST_SINK_SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.sink1_path = source_target_path sink_fpath = (STUDY_NAME + '_' + DATASET_NAME + nifti_gz_format.extension) sink_target_path = os.path.join( (self.session_cache(cache_dir, project=self.DIGEST_SINK_PROJECT, subject=(self.DIGEST_SINK_SUBJECT)) + XnatArchive.PROCESSED_SUFFIX), sink_fpath) sink_md5_path = sink_target_path + XnatArchive.MD5_SUFFIX sink.run() with open(md5_path) as f: source_digests = json.load(f) with open(sink_md5_path) as f: sink_digests = json.load(f) self.assertEqual( source_digests[dataset_fpath], sink_digests[sink_fpath], ("Source digest ({}) did not equal sink digest ({})".format( source_digests[dataset_fpath], sink_digests[sink_fpath])))
def test_delayed_download(self): """ Tests handling of race conditions where separate processes attempt to cache the same dataset """ cache_dir = os.path.join(self.base_cache_path, 'delayed-download-cache') DATASET_NAME = 'source1' target_path = os.path.join(self.session_cache(cache_dir), DATASET_NAME + nifti_gz_format.extension) tmp_dir = target_path + '.download' shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) archive = XnatArchive(server=SERVER, cache_dir=cache_dir, project_id=self.PROJECT) study = DummyStudy( self.STUDY_NAME, archive, LinearRunner('ad'), inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)]) source = archive.source([study.input(DATASET_NAME)], name='delayed_source', study_name='delayed_study') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT result1 = source.run() source1_path = result1.outputs.source1_path self.assertTrue(os.path.exists(source1_path)) self.assertEqual( source1_path, target_path, "Output file path '{}' not equal to target path '{}'".format( source1_path, target_path)) # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the dataset. os.makedirs(tmp_dir) source.inputs.race_cond_delay = 1 result2 = source.run() source1_path = result2.outputs.source1_path # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the dataset. internal_dir = os.path.join(tmp_dir, 'internal') deleted_tmp_dir = tmp_dir + '.deleted' def simulate_download(): "Simulates a download in a separate process" os.makedirs(internal_dir) time.sleep(5) # Modify a file in the temp dir to make the source download keep # waiting logger.info('Updating simulated download directory') with open(os.path.join(internal_dir, 'download'), 'a') as f: f.write('downloading') time.sleep(10) # Simulate the finalising of the download by copying the previously # downloaded file into place and deleting the temp dir. logger.info('Finalising simulated download') with open(target_path, 'a') as f: f.write('simulated') shutil.move(tmp_dir, deleted_tmp_dir) source.inputs.race_cond_delay = 10 p = Process(target=simulate_download) p.start() # Start the simulated download in separate process time.sleep(1) source.run() # Run the local download p.join() with open(os.path.join(deleted_tmp_dir, 'internal', 'download')) as f: d = f.read() self.assertEqual(d, 'downloading') with open(target_path) as f: d = f.read() self.assertEqual(d, 'simulated')
def test_summary(self): # Create working dirs # Create XnatSource node archive = XnatArchive(server=SERVER, cache_dir=self.archive_cache_dir, project_id=self.PROJECT) study = DummyStudy(self.SUMMARY_STUDY_NAME, archive, LinearRunner('ad'), inputs=[ DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3') ]) # TODO: Should test out other file formats as well. source_files = [ study.input(n) for n in ('source1', 'source2', 'source3') ] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = archive.source(source_files) subject_sink_files = [study.bound_data_spec('subject_sink')] subject_sink = archive.sink(subject_sink_files, frequency='per_subject', study_name=self.SUMMARY_STUDY_NAME) subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide datasets") # Test visit sink visit_sink_files = [study.bound_data_spec('visit_sink')] visit_sink = archive.sink(visit_sink_files, frequency='per_visit', study_name=self.SUMMARY_STUDY_NAME) visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ("Tests the sinking of visit-wide datasets") # Test project sink project_sink_files = [study.bound_data_spec('project_sink')] project_sink = archive.sink(project_sink_files, frequency='per_project', study_name=self.SUMMARY_STUDY_NAME) project_sink.inputs.name = 'project_summary' project_sink.inputs.desc = ( "Tests the sinking of project-wide datasets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, project_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect(source, 'source3' + PATH_SUFFIX, project_sink, 'project_sink' + PATH_SUFFIX) workflow.run() with self._connect() as mbi_xnat: # Check subject summary directories were created properly in cache expected_subj_datasets = [ self.SUMMARY_STUDY_NAME + '_subject_sink' ] subject_dir = os.path.join( self.archive_cache_dir, self.PROJECT, '_'.join((self.PROJECT, self.SUBJECT)), '_'.join( (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME))) self.assertEqual(filter_md5_fnames(os.listdir(subject_dir)), [ d + nifti_gz_format.extension for d in expected_subj_datasets ]) # and on XNAT subject_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments['_'.join( (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME))].scans.keys() self.assertEqual(expected_subj_datasets, subject_dataset_names) # Check visit summary directories were created properly in # cache expected_visit_datasets = [self.SUMMARY_STUDY_NAME + '_visit_sink'] visit_dir = os.path.join( self.archive_cache_dir, self.PROJECT, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME, (self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' + self.VISIT)) self.assertEqual(filter_md5_fnames(os.listdir(visit_dir)), [ d + nifti_gz_format.extension for d in expected_visit_datasets ]) # and on XNAT visit_dataset_names = mbi_xnat.projects[self.PROJECT].experiments[ '{}_{}_{}'.format(self.PROJECT, XnatArchive.SUMMARY_NAME, self.VISIT)].scans.keys() self.assertEqual(expected_visit_datasets, visit_dataset_names) # Check project summary directories were created properly in cache expected_proj_datasets = [ self.SUMMARY_STUDY_NAME + '_project_sink' ] project_dir = os.path.join( self.archive_cache_dir, self.PROJECT, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' + XnatArchive.SUMMARY_NAME) self.assertEqual(filter_md5_fnames(os.listdir(project_dir)), [ d + nifti_gz_format.extension for d in expected_proj_datasets ]) # and on XNAT project_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments['{}_{sum}_{sum}'.format( self.PROJECT, sum=XnatArchive.SUMMARY_NAME)].scans.keys() self.assertEqual(expected_proj_datasets, project_dataset_names) # Reload the data from the summary directories reloadinputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource = archive.source((source_files + subject_sink_files + visit_sink_files + project_sink_files), name='reload_source', study_name=self.SUMMARY_STUDY_NAME) reloadsink = archive.sink([ study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3') ], study_name=self.SUMMARY_STUDY_NAME) reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary datasets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource, 'visit_id') reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink, 'visit_id') reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() # Check that the datasets self.assertEqual( filter_md5_fnames(os.listdir(self.proc_session_cache())), [ self.SUMMARY_STUDY_NAME + '_resink1.nii.gz', self.SUMMARY_STUDY_NAME + '_resink2.nii.gz', self.SUMMARY_STUDY_NAME + '_resink3.nii.gz' ]) # and on XNAT with self._connect() as mbi_xnat: resinked_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments[ self.session_label() + XnatArchive.PROCESSED_SUFFIX].scans.keys() self.assertEqual(sorted(resinked_dataset_names), [ self.SUMMARY_STUDY_NAME + '_resink1', self.SUMMARY_STUDY_NAME + '_resink2', self.SUMMARY_STUDY_NAME + '_resink3' ])
def test_archive_roundtrip(self): # Create working dirs # Create DarisSource node archive = XnatArchive(project_id=self.PROJECT, server=SERVER, cache_dir=self.archive_cache_dir) study = DummyStudy(self.STUDY_NAME, archive, runner=LinearRunner('a_dir'), inputs=[ DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3'), DatasetMatch('source4', nifti_gz_format, 'source4') ]) # TODO: Should test out other file formats as well. source_files = [ study.input(n) for n in ('source1', 'source2', 'source3', 'source4') ] sink_files = [ study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4') ] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = str(self.SUBJECT) inputnode.inputs.visit_id = str(self.VISIT) source = archive.source(source_files, study_name=self.STUDY_NAME) sink = archive.sink(sink_files, study_name=self.STUDY_NAME) sink.inputs.name = 'archive-roundtrip-unittest' sink.inputs.desc = ( "A test session created by archive roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_file in source_files: if source_file.name != 'source2': sink_name = source_file.name.replace('source', 'sink') workflow.connect(source, source_file.name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check cache was created properly self.assertEqual(filter_md5_fnames(os.listdir(self.session_cache())), [ 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz' ]) expected_sink_datasets = [ self.STUDY_NAME + '_sink1', self.STUDY_NAME + '_sink3', self.STUDY_NAME + '_sink4' ] self.assertEqual( filter_md5_fnames(os.listdir(self.proc_session_cache())), [d + nifti_gz_format.extension for d in expected_sink_datasets]) with self._connect() as mbi_xnat: dataset_names = mbi_xnat.experiments[ self.session_label() + XnatArchive.PROCESSED_SUFFIX].scans.keys() self.assertEqual(sorted(dataset_names), expected_sink_datasets)