def test_cache_download(self): repository = XnatRepository(project_id=self.project, server=SERVER, cache_dir=tempfile.mkdtemp()) study = self.create_study(TestStudy, 'cache_download', inputs=[ FilesetSelector('fileset1', text_format, 'fileset1'), FilesetSelector('fileset3', text_format, 'fileset3') ], repository=repository) study.cache_inputs() for subject_id, visits in list(self.STRUCTURE.items()): subj_dir = op.join(repository.cache_dir, self.project, '{}_{}'.format(self.project, subject_id)) for visit_id in visits: sess_dir = op.join( subj_dir, '{}_{}_{}'.format(self.project, subject_id, visit_id)) for inpt in study.inputs: self.assertTrue( op.exists( op.join(sess_dir, inpt.name + inpt.format.extension)))
def test_fields_roundtrip(self): repository = XnatRepository(server=SERVER, cache_dir=self.cache_dir, project_id=self.project) study = DummyStudy( self.STUDY_NAME, repository, processor=LinearProcessor('a_dir'), inputs=[FilesetSelector('source1', text_format, 'source1')]) fields = ['field{}'.format(i) for i in range(1, 4)] sink = study.sink(outputs=fields, name='fields_sink') sink.inputs.field1_field = field1 = 1 sink.inputs.field2_field = field2 = 2.0 sink.inputs.field3_field = field3 = str('3') sink.inputs.subject_id = self.SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.desc = "Test sink of fields" sink.inputs.name = 'test_sink' sink.run() source = study.source(inputs=fields, name='fields_source') source.inputs.visit_id = self.VISIT source.inputs.subject_id = self.SUBJECT source.inputs.desc = "Test source of fields" source.inputs.name = 'test_source' results = source.run() self.assertEqual(results.outputs.field1_field, field1) self.assertEqual(results.outputs.field2_field, field2) self.assertEqual(results.outputs.field3_field, field3)
def test_cache_on_path_access(self): tmp_dir = tempfile.mkdtemp() repository = XnatRepository(project_id=self.project, server=SERVER, cache_dir=tmp_dir) tree = repository.tree(subject_ids=[self.SUBJECT], visit_ids=[self.VISIT]) # Get a fileset fileset = next(next(next(tree.subjects).sessions).filesets) self.assertEqual(fileset._path, None) target_path = op.join( tmp_dir, self.project, '{}_{}'.format(self.project, self.SUBJECT), '{}_{}_{}'.format(self.project, self.SUBJECT, self.VISIT), fileset.fname) # This should implicitly download the fileset self.assertEqual(fileset.path, target_path) with open(target_path) as f: self.assertEqual(f.read(), self.INPUT_DATASETS[fileset.name])
def test_repository_roundtrip(self): # Create working dirs # Create DarisSource node repository = XnatRepository(project_id=self.project, server=SERVER, cache_dir=self.cache_dir) study = DummyStudy(self.STUDY_NAME, repository, processor=LinearProcessor('a_dir'), inputs=[ FilesetSelector('source1', text_format, 'source1'), FilesetSelector('source2', text_format, 'source2'), FilesetSelector('source3', text_format, 'source3'), FilesetSelector('source4', text_format, 'source4') ]) # TODO: Should test out other file formats as well. source_files = ['source1', 'source2', 'source3', 'source4'] sink_files = ['sink1', 'sink3', 'sink4'] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = str(self.SUBJECT) inputnode.inputs.visit_id = str(self.VISIT) source = study.source(source_files) sink = study.sink(sink_files) sink.inputs.name = 'repository-roundtrip-unittest' sink.inputs.desc = ( "A test session created by repository roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_name in source_files: if source_name != 'source2': sink_name = source_name.replace('source', 'sink') workflow.connect(source, source_name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check cache was created properly self.assertEqual( ls_with_md5_filter(self.session_cache()), ['source1.txt', 'source2.txt', 'source3.txt', 'source4.txt']) expected_sink_filesets = ['sink1', 'sink3', 'sink4'] self.assertEqual( ls_with_md5_filter(self.session_cache(from_study=self.STUDY_NAME)), [d + text_format.extension for d in expected_sink_filesets]) with self._connect() as login: fileset_names = list(login.experiments[self.session_label( from_study=self.STUDY_NAME)].scans.keys()) self.assertEqual(sorted(fileset_names), expected_sink_filesets)
def test_dicom_match(self): study = test_fileset.TestMatchStudy( name='test_dicom', repository=XnatRepository(project_id=self.project, server=SERVER, cache_dir=tempfile.mkdtemp()), processor=LinearProcessor(self.work_dir), inputs=test_fileset.TestDicomTagMatch.DICOM_MATCH) phase = list(study.data('gre_phase'))[0] mag = list(study.data('gre_mag'))[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_connect_disconnect(self): repository = XnatRepository(project_id='dummy', server=SERVER, cache_dir=tempfile.mkdtemp()) with repository: self._test_open(repository) self._test_closed(repository) with repository: self._test_open(repository) with repository: self._test_open(repository) self._test_open(repository) self._test_closed(repository)
def setUp(self): self._clean_up() self._repository = XnatRepository(project_id=self.project, server=SERVER, cache_dir=self.cache_dir) self.BASE_CLASS.setUp(self) local_repository = DirectoryRepository(self.project_dir) tree = local_repository.tree() self._create_project() repo = XnatRepository(SERVER, self.project, '/tmp') with repo: for node in tree: for fileset in node.filesets: repo.put_fileset(fileset) for field in node.fields: repo.put_field(field)
def run_md(input_dir, dynamic=False, xnat_id=None): if xnat_id is not None: repository = XnatRepository(cache_dir=input_dir + '/motion_detection_cache') work_dir = input_dir project_id = xnat_id.split('_')[0] sub_id = xnat_id.split('_')[0] + '_' + xnat_id.split('_')[1] session_id = xnat_id.split('_')[2] else: repository = LocalRepository(input_dir) work_dir = os.path.join(input_dir, 'motion_detection_cache') project_id = 'work_dir' sub_id = 'work_sub_dir' session_id = 'work_session_dir' ref = 'Head_t1_mprage_sag_p2_iso' t1s = ['Head_MAN_SHIM_T1_fl3d_sag_p3_iso_magnitude'] cls, inputs = create_motion_detection_class('motion_mixin', ref, ref_type='t1', t1s=t1s, dynamic=dynamic) print inputs WORK_PATH = work_dir try: os.makedirs(WORK_PATH) except OSError as e: if e.errno != errno.EEXIST: raise study = cls(name='motion_detection', project_id=project_id, repository=repository, inputs=inputs) study.gather_outputs_pipeline().run(subject_ids=[sub_id], visit_ids=[session_id], work_dir=WORK_PATH)
os.makedirs(WORK_PATH) except OSError as e: if e.errno != errno.EEXIST: raise session_ids_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'resources', 'old_swi_coils_remaining.txt') print(session_ids_path) with open(session_ids_path) as f: ids = f.read().split() PROJECT_ID = 'MRH017' datasets = {DatasetMatch('coils', zip_format, 'swi_coils')} visit_ids = visit_ids['MR01'] repository = XnatRepository(cache_dir='/scratch/dq13/xnat_cache3') if args.cache_project: project = repository.project(PROJECT_ID, subject_ids=ids, visit_ids=visit_ids) with open(CACHE_PROJECT_PATH, 'w') as f: pkl.dump(project, f) else: with open(CACHE_PROJECT_PATH) as f: project = pkl.load(f) repository.cache(PROJECT_ID, datasets.values(), subject_ids=ids, visit_ids=visit_ids)
# File Handler handler = logging.FileHandler(os.path.join(WORK_PATH, 'out.log')) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) inputs = [ DatasetMatch('primary', dicom_format, 'R-L_MRtrix_60_directions_interleaved_B0_ep2d_diff_p2'), DatasetMatch('dwi_reference', dicom_format, 'L-R_MRtrix_60_directions_interleaved_B0_ep2d_diff_p2') ] study = DiffusionStudy( name=args.study_name, repository=XnatRepository(project_id='MRH060', server='https://mbi-xnat.erc.monash.edu.au', cache_dir=os.path.join( scratch_dir, 'xnat_cache-mnd')), runner=LinearRunner( work_dir=os.path.join(scratch_dir, 'xnat_working_dir-mnd')), inputs=inputs, subject_ids=args.subject, visit_ids=args.session, parameters={'preproc_pe_dir': 'RL'}, switches={'preproc_denoise': True}) fods = study.data('wm_odf') print(fods[0].path) print('Done')
from nianalysis.file_format import dicom_format from arcana.dataset import DatasetMatch from arcana.repository.xnat import XnatRepository repository = XnatRepository() repository.cache('MRH032', [ Dataset('t1_mprage_sag_p2_iso_1mm', format=dicom_format), Dataset('t2_tra_tse_320_4mm', format=dicom_format) ], subject_ids=['MRH032_{:03}'.format(i) for i in range(1, 20)], visit_ids=['MR01', 'MR03'])
def test_summary(self): # Create working dirs # Create XnatSource node repository = XnatRepository(server=SERVER, cache_dir=self.cache_dir, project_id=self.project) study = DummyStudy(self.SUMMARY_STUDY_NAME, repository, LinearProcessor('ad'), inputs=[ FilesetSelector('source1', text_format, 'source1'), FilesetSelector('source2', text_format, 'source2'), FilesetSelector('source3', text_format, 'source3') ]) # TODO: Should test out other file formats as well. source_files = ['source1', 'source2', 'source3'] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = study.source(source_files) subject_sink_files = ['subject_sink'] subject_sink = study.sink(subject_sink_files, frequency='per_subject') subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide filesets") # Test visit sink visit_sink_files = ['visit_sink'] visit_sink = study.sink(visit_sink_files, frequency='per_visit') visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ("Tests the sinking of visit-wide filesets") # Test project sink project_sink_files = ['project_sink'] project_sink = study.sink(project_sink_files, frequency='per_study') project_sink.inputs.name = 'project_summary' project_sink.inputs.desc = ( "Tests the sinking of project-wide filesets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, project_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect(source, 'source3' + PATH_SUFFIX, project_sink, 'project_sink' + PATH_SUFFIX) workflow.run() with self._connect() as login: # Check subject summary directories were created properly in cache expected_subj_filesets = ['subject_sink'] subject_dir = self.session_cache( visit=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME) self.assertEqual( ls_with_md5_filter(subject_dir), [d + text_format.extension for d in expected_subj_filesets]) # and on XNAT subject_fileset_names = list( login.projects[self.project].experiments[self.session_label( visit=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_subj_filesets, subject_fileset_names) # Check visit summary directories were created properly in # cache expected_visit_filesets = ['visit_sink'] visit_dir = self.session_cache(subject=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME) self.assertEqual( ls_with_md5_filter(visit_dir), [d + text_format.extension for d in expected_visit_filesets]) # and on XNAT visit_fileset_names = list( login.projects[self.project].experiments[self.session_label( subject=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_visit_filesets, visit_fileset_names) # Check project summary directories were created properly in cache expected_proj_filesets = ['project_sink'] project_dir = self.session_cache( subject=XnatRepository.SUMMARY_NAME, visit=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME) self.assertEqual( ls_with_md5_filter(project_dir), [d + text_format.extension for d in expected_proj_filesets]) # and on XNAT project_fileset_names = list( login.projects[self.project].experiments[self.session_label( subject=XnatRepository.SUMMARY_NAME, visit=XnatRepository.SUMMARY_NAME, from_study=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_proj_filesets, project_fileset_names) # Reload the data from the summary directories reloadinputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource = study.source((source_files + subject_sink_files + visit_sink_files + project_sink_files), name='reload_source') reloadsink = study.sink(['resink1', 'resink2', 'resink3']) reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary filesets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource, 'visit_id') reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink, 'visit_id') reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() # Check that the filesets self.assertEqual( ls_with_md5_filter( self.session_cache(from_study=self.SUMMARY_STUDY_NAME)), ['resink1.txt', 'resink2.txt', 'resink3.txt']) # and on XNAT with self._connect() as login: resinked_fileset_names = list( login.projects[self.project].experiments[self.session_label( from_study=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(sorted(resinked_fileset_names), ['resink1', 'resink2', 'resink3'])
def test_digest_check(self): """ Tests check of downloaded digests to see if file needs to be redownloaded """ cache_dir = op.join(self.work_dir, 'cache-digest-check') DATASET_NAME = 'source1' STUDY_NAME = 'digest_check_study' fileset_fpath = DATASET_NAME + text_format.extension source_target_path = op.join(self.session_cache(cache_dir), fileset_fpath) md5_path = source_target_path + XnatRepository.MD5_SUFFIX shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) source_repository = XnatRepository(project_id=self.project, server=SERVER, cache_dir=cache_dir) sink_repository = XnatRepository(project_id=self.digest_sink_project, server=SERVER, cache_dir=cache_dir) study = DummyStudy(STUDY_NAME, sink_repository, LinearProcessor('ad'), inputs=[ FilesetSelector(DATASET_NAME, text_format, DATASET_NAME, repository=source_repository) ], subject_ids=['SUBJECT'], visit_ids=['VISIT'], fill_tree=True) source = study.source([DATASET_NAME], name='digest_check_source') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT source.run() self.assertTrue(op.exists(md5_path)) self.assertTrue(op.exists(source_target_path)) with open(md5_path) as f: digests = json.load(f) # Stash the downloaded file in a new location and create a dummy # file instead stash_path = source_target_path + '.stash' shutil.move(source_target_path, stash_path) with open(source_target_path, 'w') as f: f.write('dummy') # Run the download, which shouldn't download as the digests are the # same source.run() with open(source_target_path) as f: d = f.read() self.assertEqual(d, 'dummy') # Replace the digest with a dummy os.remove(md5_path) digests[fileset_fpath] = 'dummy_digest' with open(md5_path, 'w', **JSON_ENCODING) as f: json.dump(digests, f) # Retry the download, which should now download since the digests # differ source.run() with open(source_target_path) as f: d = f.read() with open(stash_path) as f: e = f.read() self.assertEqual(d, e) # Resink the source file and check that the generated MD5 digest is # stored in identical format DATASET_NAME = 'sink1' sink = study.sink([DATASET_NAME], name='digest_check_sink') sink.inputs.name = 'digest_check_sink' sink.inputs.desc = "Tests the generation of MD5 digests" sink.inputs.subject_id = self.SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.sink1_path = source_target_path sink_fpath = DATASET_NAME + text_format.extension sink_target_path = op.join( self.session_cache(cache_dir, project=self.digest_sink_project, subject=(self.SUBJECT), from_study=STUDY_NAME), sink_fpath) sink_md5_path = sink_target_path + XnatRepository.MD5_SUFFIX sink.run() with open(md5_path) as f: source_digests = json.load(f) with open(sink_md5_path) as f: sink_digests = json.load(f) self.assertEqual( source_digests[fileset_fpath], sink_digests[sink_fpath], ("Source digest ({}) did not equal sink digest ({})".format( source_digests[fileset_fpath], sink_digests[sink_fpath])))
def test_delayed_download(self): """ Tests handling of race conditions where separate processes attempt to cache the same fileset """ cache_dir = op.join(self.work_dir, 'cache-delayed-download') DATASET_NAME = 'source1' target_path = op.join(self.session_cache(cache_dir), DATASET_NAME + text_format.extension) tmp_dir = target_path + '.download' shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) repository = XnatRepository(server=SERVER, cache_dir=cache_dir, project_id=self.project) study = DummyStudy( self.STUDY_NAME, repository, LinearProcessor('ad'), inputs=[FilesetSelector(DATASET_NAME, text_format, DATASET_NAME)]) source = study.source([study.input(DATASET_NAME)], name='delayed_source') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT result1 = source.run() source1_path = result1.outputs.source1_path self.assertTrue(op.exists(source1_path)) self.assertEqual( source1_path, target_path, "Output file path '{}' not equal to target path '{}'".format( source1_path, target_path)) # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the fileset. os.makedirs(tmp_dir) source.inputs.race_cond_delay = 1 result2 = source.run() source1_path = result2.outputs.source1_path # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the fileset. internal_dir = op.join(tmp_dir, 'internal') deleted_tmp_dir = tmp_dir + '.deleted' def simulate_download(): "Simulates a download in a separate process" os.makedirs(internal_dir) time.sleep(5) # Modify a file in the temp dir to make the source download keep # waiting logger.info('Updating simulated download directory') with open(op.join(internal_dir, 'download'), 'a') as f: f.write('downloading') time.sleep(10) # Simulate the finalising of the download by copying the previously # downloaded file into place and deleting the temp dir. logger.info('Finalising simulated download') with open(target_path, 'a') as f: f.write('simulated') shutil.move(tmp_dir, deleted_tmp_dir) source.inputs.race_cond_delay = 10 p = Process(target=simulate_download) p.start() # Start the simulated download in separate process time.sleep(1) source.run() # Run the local download p.join() with open(op.join(deleted_tmp_dir, 'internal', 'download')) as f: d = f.read() self.assertEqual(d, 'downloading') with open(target_path) as f: d = f.read() self.assertEqual(d, 'simulated')
fMRI, inputs, output_files = create_fmri_study_class( 'fMRI', args.hires_structural, args.fmri, args.fmri_order, args.fmri_echo_spacing, fm_mag=args.field_map_mag, fm_phase=args.field_map_phase, run_regression=args.run_regression) CACHE_PATH = os.path.join(args.working_dir, 'xnat_cache') WORK_PATH = os.path.join(args.working_dir, 'work_dir') try: os.makedirs(WORK_PATH) except OSError as e: if e.errno != errno.EEXIST: raise try: os.makedirs(CACHE_PATH) except OSError as e: if e.errno != errno.EEXIST: raise repository = XnatRepository( server=args.xnat_server, project_id=args.project_id, user=args.xnat_username, password=args.xnat_password, cache_dir=CACHE_PATH) study = fMRI(name='fMRI_preprocessing', processor=LinearProcessor(WORK_PATH), repository=repository, inputs=inputs, subject_ids=args.subject_ids, visit_ids=args.visit_ids) study.data(output_files) print('Done!')