Пример #1
0
 def test_altered_workflow(self):
     """
     Tests whether data is regenerated if the pipeline workflows are altered
     """
     study_name = 'add_node'
     # Test vanilla study
     study = self.create_study(TestProvStudy,
                               study_name,
                               inputs=STUDY_INPUTS)
     self.assertEqual(
         study.data('derived_field2').value(*self.SESSION), 156.0)
     # Rerun results of altered study
     study = self.create_study(TestProvStudyAddNode,
                               study_name,
                               processor=SingleProc(self.work_dir,
                                                    reprocess=True),
                               inputs=STUDY_INPUTS)
     self.assertEqual(
         study.data('derived_field2').value(*self.SESSION), 1252.0)
     study_name = 'add_connect'
     # Test vanilla study
     study = self.create_study(TestProvStudy,
                               study_name,
                               inputs=STUDY_INPUTS)
     self.assertEqual(
         study.data('derived_field2').value(*self.SESSION), 156.0)
     # Rerun results of altered study
     study = self.create_study(TestProvStudyAddConnect,
                               study_name,
                               processor=SingleProc(self.work_dir,
                                                    reprocess=True),
                               inputs=STUDY_INPUTS)
     self.assertEqual(
         study.data('derived_field2').value(*self.SESSION), 170.0)
Пример #2
0
    def test_process_dialation(self):
        study_name = 'process_dialation'
        new_value = -101
        study = self.create_study(TestDialationStudy,
                                  study_name,
                                  inputs=self.STUDY_INPUTS)
        study.data('derived_field5')

        def values_equal(field_name, values):
            for subj_i in range(self.NUM_SUBJECTS):
                for vis_i in range(self.NUM_VISITS):
                    sess = study.tree.session(subj_i, vis_i)
                    field = sess.field(field_name, from_study=study_name)
                    self.assertEqual(field.value,
                                     values[(str(subj_i), str(vis_i))])

        # Test generated values
        values_equal('derived_field5', self.DEFAULT_FIELD5_VALUES)
        # Tag the field 1 value so we can detect if it gets regenerated
        orig_field1_values = {}
        orig_field3_values = {}
        for vis_i in range(self.NUM_VISITS):
            for subj_i in range(self.NUM_SUBJECTS):
                sess = study.tree.session(subj_i, vis_i)
                field1 = sess.field('derived_field1', from_study=study_name)
                orig_field1_values[(str(subj_i), str(vis_i))] = field1.value
                change_value_w_prov(field1, new_value)
            field3 = study.tree.visit(vis_i).field('derived_field3',
                                                   from_study=study_name)
            orig_field3_values[str(vis_i)] = field3.value
        # Rerun analysis with new parameters
        study = self.create_study(TestDialationStudy,
                                  study_name,
                                  inputs=self.STUDY_INPUTS,
                                  processor=SingleProc(self.work_dir,
                                                       reprocess=True),
                                  parameters={'pipeline3_op': 'mul'})
        study.data('derived_field3', subject_id='0', visit_id='0')
        values_equal('derived_field1',
                     {k: new_value
                      for k in orig_field1_values})
        self.assertEqual(
            study.tree.visit('0').field('derived_field3',
                                        from_study=study_name).value, 10201)
        self.assertEqual(
            study.tree.visit('1').field('derived_field3',
                                        from_study=study_name).value,
            orig_field3_values['1'])
        study = self.create_study(TestDialationStudy,
                                  study_name,
                                  inputs=self.STUDY_INPUTS,
                                  processor=SingleProc(self.work_dir,
                                                       reprocess=True),
                                  parameters={'increment': 2})
        study.data('derived_field5', subject_id='0', visit_id='0')
        values_equal('derived_field1',
                     {k: v + 1
                      for k, v in orig_field1_values.items()})
Пример #3
0
 def test_protect_manually(self):
     """Protect manually altered files and fields from overwrite"""
     analysis_name = 'manual_protect'
     protected_derived_field4_value = -99.0
     protected_derived_fileset1_value = -999.0
     # Test vanilla analysis
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         inputs=STUDY_INPUTS)
     derived_fileset1_slice, derived_field4_slice = analysis.data(
         ('derived_fileset1', 'derived_field4'), derive=True)
     self.assertContentsEqual(derived_fileset1_slice, 154.0)
     self.assertEqual(derived_field4_slice.value(*self.SESSION), 155.0)
     # Rerun with new parameters
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         inputs=STUDY_INPUTS,
         processor=SingleProc(self.work_dir, reprocess=True),
         parameters={'multiplier': 100.0})
     derived_fileset1_slice, derived_field4_slice = analysis.data(
         ('derived_fileset1', 'derived_field4'), derive=True)
     self.assertContentsEqual(derived_fileset1_slice, 1414.0)
     derived_field4 = derived_field4_slice.item(*self.SESSION)
     self.assertEqual(derived_field4.value, 1415.0)
     # Manually changing the value (or file contents) of a derivative value
     # (without also altering the saved provenance record) will mean
     # that new value/file will be "protected" from reprocessing, and will
     # need to be deleted in order to be regenerated
     derived_field4.value = protected_derived_field4_value
     # Since derived_fileset1 needs to be reprocessed but
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         processor=SingleProc(self.work_dir, reprocess=True),
         inputs=STUDY_INPUTS,
         parameters={'multiplier': 1000.0})
     # Check to see protected conflict error is raise if only one of
     # derived field4/fileset1 is protected
     self.assertRaises(
         ArcanaProtectedOutputConflictError,
         analysis.derive,
         ('derived_fileset1', 'derived_field4'))
     with open(derived_fileset1_slice.path(*self.SESSION), 'w') as f:
         f.write(str(protected_derived_fileset1_value))
     analysis.clear_caches()
     # Protect the output of derived_fileset1 as well and it should return
     # the protected values
     derived_fileset1_slice, derived_field4_slice = analysis.data(
         ('derived_fileset1', 'derived_field4'), derive=True)
     self.assertContentsEqual(derived_fileset1_slice,
                              protected_derived_fileset1_value)
     self.assertEqual(derived_field4_slice.value(*self.SESSION),
                      protected_derived_field4_value)
Пример #4
0
 def test_fields_roundtrip(self):
     repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
     dataset = repository.dataset(self.project)
     analysis = DummyAnalysis(
         self.STUDY_NAME,
         dataset=dataset,
         processor=SingleProc('a_dir'),
         inputs=[FilesetFilter('source1', 'source1', text_format)])
     fields = ['field{}'.format(i) for i in range(1, 4)]
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink(
         (analysis.bound_spec(f).slice for f in fields), dummy_pipeline),
                    name='fields_sink')
     sink.inputs.field1_field = field1 = 1
     sink.inputs.field2_field = field2 = 2.0
     sink.inputs.field3_field = field3 = str('3')
     sink.inputs.subject_id = self.SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.desc = "Test sink of fields"
     sink.inputs.name = 'test_sink'
     sink.run()
     source = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in fields),
                      name='fields_source')
     source.inputs.visit_id = self.VISIT
     source.inputs.subject_id = self.SUBJECT
     source.inputs.desc = "Test source of fields"
     source.inputs.name = 'test_source'
     results = source.run()
     self.assertEqual(results.outputs.field1_field, field1)
     self.assertEqual(results.outputs.field2_field, field2)
     self.assertEqual(results.outputs.field3_field, field3)
Пример #5
0
 def test_fields_roundtrip(self):
     STUDY_NAME = 'fields_roundtrip'
     study = DummyStudy(STUDY_NAME,
                        self.repository,
                        processor=SingleProc('a_dir'),
                        inputs=[])
     dummy_pipeline = study.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink(
         (study.bound_spec(f).collection
          for f in ['field1', 'field2', 'field3']), dummy_pipeline),
                    name='fields_sink')
     sink.inputs.field1_field = field1 = 1
     sink.inputs.field2_field = field2 = 2.0
     sink.inputs.field3_field = field3 = '3'
     sink.inputs.subject_id = self.SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.desc = "Test sink of fields"
     sink.inputs.name = 'test_sink'
     sink.run()
     source = pe.Node(RepositorySource(
         study.bound_spec(f).collection
         for f in ['field1', 'field2', 'field3']),
                      name='fields_source')
     source.inputs.visit_id = self.VISIT
     source.inputs.subject_id = self.SUBJECT
     source.inputs.desc = "Test source of fields"
     source.inputs.name = 'test_source'
     results = source.run()
     self.assertEqual(results.outputs.field1_field, field1)
     self.assertEqual(results.outputs.field2_field, field2)
     self.assertEqual(results.outputs.field3_field, field3)
Пример #6
0
 def test_input_change(self):
     analysis_name = 'input_change_analysis'
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         inputs=STUDY_INPUTS)
     self.assertEqual(
         analysis.data('derived_field2',
                       derive=True).value(*self.SESSION), 156.0)
     # Change acquired file contents, which should cause the checksum check
     # to fail
     with open(analysis.data('acquired_fileset1',
                             derive=True).path(*self.SESSION), 'w') as f:
         f.write('99.9')
     # Should detect that the input has changed and throw an error
     self.assertRaises(
         ArcanaReprocessException,
         analysis.derive,
         'derived_field2')
     new_analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         processor=SingleProc(self.work_dir, reprocess=True),
         inputs=STUDY_INPUTS)
     self.assertEqual(
         new_analysis.data('derived_field2',
                           derive=True).value(*self.SESSION), 1145.0)
Пример #7
0
 def test_bids_fmri(self):
     analysis = BoldAnalysis('test_fmri',
                             repository=BidsRepo(
                                 op.join(self.BIDS_EXAMPLES_PATH, 'ds114')),
                             processor=SingleProc(self.work_dir),
                             environment=TEST_ENV,
                             bids_task='covertverbgeneration')
     analysis.pipeline('single_subject_melodic_pipeline')
Пример #8
0
 def test_bids_dwi(self):
     analysis = DwiAnalysis('test_dwi',
                            repository=BidsRepo(
                                op.join(self.BIDS_EXAMPLES_PATH, 'ds114')),
                            processor=SingleProc(self.work_dir),
                            environment=TEST_ENV,
                            parameters={'preproc_pe_dir': 'RL'})
     analysis.pipeline('global_tracking_pipeline')
Пример #9
0
 def test_bids_fmri(self):
     study = BoldStudy('test_fmri',
                       repository=self.repo,
                       processor=SingleProc(
                           self.work_dir,
                           prov_ignore=SingleProc.DEFAULT_PROV_IGNORE +
                           ['workflow/nodes/.*/requirements/.*/version']),
                       bids_task='covertverbgeneration')
     study.data('melodic_ica')
Пример #10
0
 def test_bids_dwi(self):
     study = DwiStudy(
         'test_dwi',
         repository=self.repo,
         processor=SingleProc(self.work_dir,
                              prov_ignore=SingleProc.DEFAULT_PROV_IGNORE +
                              ['workflow/nodes/.*/requirements/.*/version'],
                              reprocess=True),
         parameters={'preproc_pe_dir': 'RL'})
     study.data('tensor')
Пример #11
0
 def test_repository_roundtrip(self):
     analysis = DummyAnalysis(self.STUDY_NAME,
                              self.dataset,
                              processor=SingleProc('a_dir'),
                              inputs=[
                                  FilesetFilter('source1', 'source1',
                                                text_format),
                                  FilesetFilter('source2', 'source2',
                                                text_format),
                                  FilesetFilter('source3', 'source3',
                                                text_format),
                                  FilesetFilter('source4', 'source4',
                                                text_format)
                              ])
     # TODO: Should test out other file formats as well.
     source_files = ('source1', 'source2', 'source3', 'source4')
     sink_files = ('sink1', 'sink3', 'sink4')
     inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                         'inputnode')
     inputnode.inputs.subject_id = self.SUBJECT
     inputnode.inputs.visit_id = self.VISIT
     source = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in source_files),
                      name='source')
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink((analysis.bound_spec(f).slice
                                    for f in sink_files), dummy_pipeline),
                    name='sink')
     sink.inputs.name = 'repository_sink'
     sink.inputs.desc = (
         "A test session created by repository roundtrip unittest")
     # Create workflow connecting them together
     workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir)
     workflow.add_nodes((source, sink))
     workflow.connect(inputnode, 'subject_id', source, 'subject_id')
     workflow.connect(inputnode, 'visit_id', source, 'visit_id')
     workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
     workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
     for source_name in source_files:
         if not source_name.endswith('2'):
             sink_name = source_name.replace('source', 'sink')
             workflow.connect(source, source_name + PATH_SUFFIX, sink,
                              sink_name + PATH_SUFFIX)
     workflow.run()
     # Check local directory was created properly
     outputs = [
         f for f in sorted(
             os.listdir(self.get_session_dir(
                 from_analysis=self.STUDY_NAME)))
         if f not in (LocalFileSystemRepo.FIELDS_FNAME,
                      LocalFileSystemRepo.PROV_DIR)
     ]
     self.assertEqual(outputs, ['sink1.txt', 'sink3.txt', 'sink4.txt'])
Пример #12
0
 def test_dicom_match(self):
     analysis = test_data.TestMatchAnalysis(
         name='test_dicom',
         dataset=XnatRepo(server=SERVER,
                          cache_dir=tempfile.mkdtemp()).dataset(
                              self.project),
         processor=SingleProc(self.work_dir),
         inputs=test_data.TestDicomTagMatch.DICOM_MATCH)
     phase = list(analysis.data('gre_phase', derive=True))[0]
     mag = list(analysis.data('gre_mag', derive=True))[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Пример #13
0
 def test_dicom_match(self):
     study = test_data.TestMatchStudy(
         name='test_dicom',
         repository=XnatRepo(
             project_id=self.project,
             server=SERVER, cache_dir=tempfile.mkdtemp()),
         processor=SingleProc(self.work_dir),
         inputs=test_data.TestDicomTagMatch.DICOM_MATCH)
     phase = list(study.data('gre_phase'))[0]
     mag = list(study.data('gre_mag'))[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Пример #14
0
 def test_altered_workflow(self):
     """
     Tests whether data is regenerated if the pipeline workflows are altered
     """
     analysis_name = 'add_node'
     # Test vanilla analysis
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         inputs=STUDY_INPUTS)
     self.assertEqual(
         analysis.data('derived_field2', derive=True).value(*self.SESSION),
         156.0)
     # Rerun results of altered analysis
     analysis = self.create_analysis(
         TestProvAnalysisAddNode,
         analysis_name,
         processor=SingleProc(self.work_dir, reprocess=True),
         inputs=STUDY_INPUTS)
     self.assertEqual(
         analysis.data('derived_field2', derive=True).value(*self.SESSION),
         1252.0)
     analysis_name = 'add_connect'
     # Test vanilla analysis
     analysis = self.create_analysis(
         TestProvAnalysis,
         analysis_name,
         inputs=STUDY_INPUTS)
     self.assertEqual(
         analysis.data('derived_field2', derive=True).value(*self.SESSION),
         156.0)
     # Rerun results of altered analysis
     analysis = self.create_analysis(
         TestProvAnalysisAddConnect,
         analysis_name,
         processor=SingleProc(self.work_dir, reprocess=True),
         inputs=STUDY_INPUTS)
     self.assertEqual(
         analysis.data('derived_field2', derive=True).value(*self.SESSION),
         170.0)
Пример #15
0
 def test_id_match(self):
     study = test_data.TestMatchStudy(
         name='test_dicom',
         repository=XnatRepo(
             project_id=self.project,
             server=SERVER, cache_dir=tempfile.mkdtemp()),
         processor=SingleProc(self.work_dir),
         inputs=[
             InputFilesets('gre_phase', valid_formats=dicom_format, id=7),
             InputFilesets('gre_mag', valid_formats=dicom_format, id=6)])
     phase = list(study.data('gre_phase'))[0]
     mag = list(study.data('gre_mag'))[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Пример #16
0
 def test_dialation_protection(self):
     """
     Tests that derivatives are not re-derived unless they are needed to
     be
     """
     analysis_name = 'dialation_protection'
     analysis = self.create_analysis(
         TestDialationAnalysis,
         analysis_name,
         inputs=self.STUDY_INPUTS)
     field5 = analysis.data('derived_field5', derive=True)
     for item in field5:
         self.assertEqual(item.value,
                          self.DEFAULT_FIELD5_VALUES[(item.subject_id,
                                                      item.visit_id)])
     field1 = analysis.data('derived_field1', derive=True)
     field2 = analysis.data('derived_field2', derive=True)
     field1.item(subject_id='0', visit_id='1').value = 1000000
     field1.item(subject_id='1', visit_id='1').value = 2000000
     # Manually change value of field 2
     field2.item(subject_id='0').value = -1000
     analysis = self.create_analysis(
         TestDialationAnalysis,
         analysis_name,
         processor=SingleProc(self.work_dir, reprocess=True),
         inputs=self.STUDY_INPUTS,
         parameters={
             'increment': 2})
     analysis.dataset.clear_cache()
     # Recalculate value of field5 with new field2 value
     field1, field2, field3, field4, field5 = analysis.data(
         ['derived_field1', 'derived_field2', 'derived_field3',
          'derived_field4', 'derived_field5'], derive=True)
     self.assertEqual(field1.value(subject_id='0', visit_id='0'), 2)
     self.assertEqual(field1.value(subject_id='0', visit_id='1'), 1000000)
     self.assertEqual(field1.value(subject_id='1', visit_id='0'), 12)
     self.assertEqual(field1.value(subject_id='1', visit_id='1'), 2000000)
     self.assertEqual(field2.value(subject_id='0'), -1000)
     self.assertEqual(field2.value(subject_id='1'), 2000012)
     self.assertEqual(field3.value(visit_id='0'), 14)
     self.assertEqual(field3.value(visit_id='1'), 3000000)
     self.assertEqual(field4.value(), 3000014)
     self.assertEqual(field5.value(subject_id='0', visit_id='0'),
                      -1000 + 14 + 3000014)
     self.assertEqual(field5.value(subject_id='0', visit_id='1'),
                      -1000 + 3000000 + 3000014)
     self.assertEqual(field5.value(subject_id='1', visit_id='0'),
                      2000012 + 14 + 3000014)
     self.assertEqual(field5.value(subject_id='1', visit_id='1'),
                      2000012 + 3000000 + 3000014)
Пример #17
0
 def test_id_match(self):
     analysis = test_data.TestMatchAnalysis(
         name='test_dicom',
         dataset=XnatRepo(server=SERVER,
                          cache_dir=tempfile.mkdtemp()).dataset(
                              self.project),
         processor=SingleProc(self.work_dir),
         inputs=[
             FilesetFilter('gre_phase', valid_formats=dicom_format, id=7),
             FilesetFilter('gre_mag', valid_formats=dicom_format, id=6)
         ])
     phase = list(analysis.data('gre_phase', derive=True))[0]
     mag = list(analysis.data('gre_mag', derive=True))[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Пример #18
0
 def test_dialation_protection(self):
     study_name = 'dialation_protection'
     study = self.create_study(TestDialationStudy,
                               study_name,
                               inputs=self.STUDY_INPUTS)
     field5 = study.data('derived_field5')
     for item in field5:
         self.assertEqual(
             item.value,
             self.DEFAULT_FIELD5_VALUES[(item.subject_id, item.visit_id)])
     field1 = study.data('derived_field1')
     field2 = study.data('derived_field2')
     field1.item(subject_id='0', visit_id='1').value = 1000000
     field1.item(subject_id='1', visit_id='1').value = 2000000
     # Manually change value of field 2
     field2.item(subject_id='0').value = -1000
     study = self.create_study(TestDialationStudy,
                               study_name,
                               processor=SingleProc(self.work_dir,
                                                    reprocess=True),
                               inputs=self.STUDY_INPUTS,
                               parameters={'increment': 2})
     # Recalculate value of field5 with new field2 value
     field1, field2, field3, field4, field5 = study.data([
         'derived_field1', 'derived_field2', 'derived_field3',
         'derived_field4', 'derived_field5'
     ])
     self.assertEqual(field1.value(subject_id='0', visit_id='0'), 2)
     self.assertEqual(field1.value(subject_id='0', visit_id='1'), 1000000)
     self.assertEqual(field1.value(subject_id='1', visit_id='0'), 12)
     self.assertEqual(field1.value(subject_id='1', visit_id='1'), 2000000)
     self.assertEqual(field2.value(subject_id='0'), -1000)
     self.assertEqual(field2.value(subject_id='1'), 2000012)
     self.assertEqual(field3.value(visit_id='0'), 14)
     self.assertEqual(field3.value(visit_id='1'), 3000000)
     self.assertEqual(field4.value(), 3000014)
     self.assertEqual(field5.value(subject_id='0', visit_id='0'),
                      -1000 + 14 + 3000014)
     self.assertEqual(field5.value(subject_id='0', visit_id='1'),
                      -1000 + 3000000 + 3000014)
     self.assertEqual(field5.value(subject_id='1', visit_id='0'),
                      2000012 + 14 + 3000014)
     self.assertEqual(field5.value(subject_id='1', visit_id='1'),
                      2000012 + 3000000 + 3000014)
Пример #19
0
    def test_summary(self):
        study = DummyStudy(self.SUMMARY_STUDY_NAME,
                           self.repository,
                           SingleProc('ad'),
                           inputs=[
                               InputFilesets('source1', 'source1',
                                             text_format),
                               InputFilesets('source2', 'source2',
                                             text_format),
                               InputFilesets('source3', 'source3', text_format)
                           ])
        # TODO: Should test out other file formats as well.
        source_files = ['source1', 'source2', 'source3']
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = self.SUBJECT
        inputnode.inputs.visit_id = self.VISIT
        source = pe.Node(RepositorySource(
            study.bound_spec(f).collection for f in source_files),
                         name='source')
        # Test subject sink
        subject_sink_files = ['subject_sink']
        dummy_pipeline = study.dummy_pipeline()
        dummy_pipeline.cap()
        subject_sink = pe.Node(RepositorySink(
            (study.bound_spec(f).collection for f in subject_sink_files),
            dummy_pipeline),
                               name='subject_sink')
        subject_sink.inputs.name = 'subject_summary'
        subject_sink.inputs.desc = (
            "Tests the sinking of subject-wide filesets")
        # Test visit sink
        visit_sink_files = ['visit_sink']
        visit_sink = pe.Node(RepositorySink(
            (study.bound_spec(f).collection for f in visit_sink_files),
            dummy_pipeline),
                             name='visit_sink')
        visit_sink.inputs.name = 'visit_summary'
        visit_sink.inputs.desc = ("Tests the sinking of visit-wide filesets")
        # Test project sink
        study_sink_files = ['study_sink']
        study_sink = pe.Node(RepositorySink(
            (study.bound_spec(f).collection for f in study_sink_files),
            dummy_pipeline),
                             name='study_sink')

        study_sink.inputs.name = 'project_summary'
        study_sink.inputs.desc = ("Tests the sinking of project-wide filesets")
        # Create workflow connecting them together
        workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
        workflow.add_nodes((source, subject_sink, visit_sink, study_sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
        workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink,
                         'subject_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink,
                         'visit_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source3' + PATH_SUFFIX, study_sink,
                         'study_sink' + PATH_SUFFIX)
        workflow.run()
        # Check local summary directories were created properly
        subject_dir = self.get_session_dir(frequency='per_subject',
                                           from_study=self.SUMMARY_STUDY_NAME)
        self.assertEqual(sorted(os.listdir(subject_dir)),
                         [BasicRepo.PROV_DIR, 'subject_sink.txt'])
        visit_dir = self.get_session_dir(frequency='per_visit',
                                         from_study=self.SUMMARY_STUDY_NAME)
        self.assertEqual(sorted(os.listdir(visit_dir)),
                         [BasicRepo.PROV_DIR, 'visit_sink.txt'])
        project_dir = self.get_session_dir(frequency='per_study',
                                           from_study=self.SUMMARY_STUDY_NAME)
        self.assertEqual(sorted(os.listdir(project_dir)),
                         [BasicRepo.PROV_DIR, 'study_sink.txt'])
        # Reload the data from the summary directories
        reloadinputnode = pe.Node(IdentityInterface(['subject_id',
                                                     'visit_id']),
                                  name='reload_inputnode')
        reloadinputnode.inputs.subject_id = self.SUBJECT
        reloadinputnode.inputs.visit_id = self.VISIT
        reloadsource_per_subject = pe.Node(RepositorySource(
            study.bound_spec(f).collection for f in subject_sink_files),
                                           name='reload_source_per_subject')
        reloadsource_per_visit = pe.Node(RepositorySource(
            study.bound_spec(f).collection for f in visit_sink_files),
                                         name='reload_source_per_visit')
        reloadsource_per_study = pe.Node(RepositorySource(
            study.bound_spec(f).collection for f in study_sink_files),
                                         name='reload_source_per_study')
        reloadsink = pe.Node(RepositorySink(
            (study.bound_spec(f).collection
             for f in ['resink1', 'resink2', 'resink3']), dummy_pipeline),
                             name='reload_sink')
        reloadsink.inputs.name = 'reload_summary'
        reloadsink.inputs.desc = (
            "Tests the reloading of subject and project summary filesets")
        reloadworkflow = pe.Workflow('reload_summary_unittest',
                                     base_dir=self.work_dir)
        for node in (reloadsource_per_subject, reloadsource_per_visit,
                     reloadsource_per_study, reloadsink):
            for iterator in ('subject_id', 'visit_id'):
                reloadworkflow.connect(reloadinputnode, iterator, node,
                                       iterator)
        reloadworkflow.connect(reloadsource_per_subject,
                               'subject_sink' + PATH_SUFFIX, reloadsink,
                               'resink1' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource_per_visit,
                               'visit_sink' + PATH_SUFFIX, reloadsink,
                               'resink2' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource_per_study,
                               'study_sink' + PATH_SUFFIX, reloadsink,
                               'resink3' + PATH_SUFFIX)
        reloadworkflow.run()
        outputs = [
            f for f in sorted(
                os.listdir(
                    self.get_session_dir(from_study=self.SUMMARY_STUDY_NAME)))
            if f not in (BasicRepo.FIELDS_FNAME, BasicRepo.PROV_DIR)
        ]
        self.assertEqual(outputs,
                         ['resink1.txt', 'resink2.txt', 'resink3.txt'])
Пример #20
0
 def __init__(self,
              name,
              repository,
              processor,
              inputs,
              environment=None,
              parameters=None,
              subject_ids=None,
              visit_ids=None,
              enforce_inputs=True,
              fill_tree=False,
              clear_caches=True):
     try:
         # This works for PY3 as the metaclass inserts it itself if
         # it isn't provided
         metaclass = type(self).__dict__['__metaclass__']
         if not issubclass(metaclass, StudyMetaClass):
             raise KeyError
     except KeyError:
         raise ArcanaUsageError(
             "Need to have StudyMetaClass (or a sub-class) as "
             "the metaclass of all classes derived from Study")
     if isinstance(repository, basestring):
         repository = BasicRepo(repository, depth=None)
     if isinstance(processor, basestring):
         processor = SingleProc(processor)
     if environment is None:
         environment = StaticEnv()
     self._name = name
     self._repository = repository
     self._processor = processor.bind(self)
     self._environment = environment
     self._inputs = {}
     self._subject_ids = (tuple(subject_ids)
                          if subject_ids is not None else None)
     self._visit_ids = tuple(visit_ids) if visit_ids is not None else None
     self._fill_tree = fill_tree
     # Initialise caches for data collection and pipeline objects
     if clear_caches:
         self.clear_caches()
     # Set parameters
     if parameters is None:
         parameters = {}
     elif not isinstance(parameters, dict):
         # Convert list of parameters into dictionary
         parameters = {o.name: o for o in parameters}
     self._parameters = {}
     for param_name, param in list(parameters.items()):
         if not isinstance(param, Parameter):
             param = Parameter(param_name, param)
         try:
             param_spec = self._param_specs[param_name]
         except KeyError:
             raise ArcanaNameError(
                 param_name,
                 "Provided parameter '{}' is not present in the "
                 "allowable parameters for {} classes ('{}')".format(
                     param_name,
                     type(self).__name__,
                     "', '".join(self.param_spec_names())))
         param_spec.check_valid(param,
                                context=' {}(name={})'.format(
                                    type(self).__name__, name))
         self._parameters[param_name] = param
     # Convert inputs to a dictionary if passed in as a list/tuple
     if not isinstance(inputs, dict):
         inputs = {i.name: i for i in inputs}
     else:
         # Convert string patterns into Input objects
         for inpt_name, inpt in list(inputs.items()):
             if isinstance(inpt, basestring):
                 spec = self.data_spec(inpt_name)
                 if spec.is_fileset:
                     inpt = InputFilesets(inpt_name,
                                          pattern=inpt,
                                          is_regex=True)
                 else:
                     inpt = InputFields(inpt_name,
                                        pattern=inpt,
                                        dtype=spec.dtype,
                                        is_regex=True)
                 inputs[inpt_name] = inpt
     # Check validity of study inputs
     for inpt_name, inpt in inputs.items():
         try:
             spec = self.data_spec(inpt_name)
         except ArcanaNameError:
             raise ArcanaNameError(
                 inpt.name,
                 "Input name '{}' isn't in data specs of {} ('{}')".format(
                     inpt.name, self.__class__.__name__,
                     "', '".join(self._data_specs)))
         else:
             if spec.is_fileset:
                 if inpt.is_field:
                     raise ArcanaUsageError(
                         "Passed field ({}) as input to fileset spec"
                         " {}".format(inpt, spec))
             elif not inpt.is_field:
                 raise ArcanaUsageError(
                     "Passed fileset ({}) as input to field spec {}".format(
                         inpt, spec))
     # "Bind" input selectors to the current study object, and attempt to
     # match with data in the repository
     input_errors = []
     with self.repository:
         if not self.subject_ids:
             raise ArcanaUsageError(
                 "No subject IDs provided and destination repository "
                 "is empty")
         if not self.visit_ids:
             raise ArcanaUsageError(
                 "No visit IDs provided and destination repository "
                 "is empty")
         for inpt_name, inpt in list(inputs.items()):
             try:
                 try:
                     self._inputs[inpt_name] = bound_inpt = inpt.bind(
                         self, spec_name=inpt_name)
                 except ArcanaInputMissingMatchError as e:
                     if not inpt.drop_if_missing:
                         raise e
                 else:
                     spec = self.data_spec(inpt_name)
                     if spec.is_fileset:
                         if spec.derived:
                             try:
                                 spec.format.converter_from(
                                     bound_inpt.format)
                             except ArcanaNoConverterError as e:
                                 e.msg += (
                                     ", which is requried to convert:\n" +
                                     "{} to\n{}.").format(
                                         e, bound_inpt, spec)
                                 raise e
                         else:
                             if bound_inpt.format not in spec.valid_formats:
                                 raise ArcanaUsageError(
                                     "Cannot pass {} as an input to {} as "
                                     "it is not in one of the valid formats"
                                     " ('{}')".format(
                                         bound_inpt, spec, "', '".join(
                                             f.name
                                             for f in spec.valid_formats)))
             except ArcanaInputError as e:
                 # Collate errors across all inputs into a single error
                 # message
                 input_errors.append(e)
     if input_errors:
         raise ArcanaInputError('\n'.join(str(e) for e in input_errors))
     # Check remaining specs are optional or have default values
     for spec in self.data_specs():
         if spec.name not in self.input_names:
             if not spec.derived and spec.default is None:
                 # Emit a warning if an acquired fileset has not been
                 # provided for an "acquired fileset"
                 msg = (
                     " input fileset '{}' was not provided to {}.".format(
                         spec.name, self))
                 if spec.optional:
                     logger.info('Optional' + msg)
                 else:
                     if enforce_inputs:
                         raise ArcanaMissingInputError(
                             'Non-optional' + msg + " Pipelines depending "
                             "on this fileset will not run")
Пример #21
0
    def test_repository_roundtrip(self):

        # Create working dirs
        # Create DarisSource node
        repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
        dataset = repository.dataset(self.project)
        analysis = DummyAnalysis(self.STUDY_NAME,
                                 dataset=dataset,
                                 processor=SingleProc('a_dir'),
                                 inputs=[
                                     FilesetFilter('source1', 'source1',
                                                   text_format),
                                     FilesetFilter('source2', 'source2',
                                                   text_format),
                                     FilesetFilter('source3', 'source3',
                                                   text_format),
                                     FilesetFilter('source4', 'source4',
                                                   text_format)
                                 ])
        # TODO: Should test out other file formats as well.
        source_files = ['source1', 'source2', 'source3', 'source4']
        sink_files = ['sink1', 'sink3', 'sink4']
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = str(self.SUBJECT)
        inputnode.inputs.visit_id = str(self.VISIT)
        source = pe.Node(RepositorySource(
            analysis.bound_spec(f).slice for f in source_files),
                         name='source')
        dummy_pipeline = analysis.dummy_pipeline()
        dummy_pipeline.cap()
        sink = pe.Node(RepositorySink((analysis.bound_spec(f).slice
                                       for f in sink_files), dummy_pipeline),
                       name='sink')
        sink.inputs.name = 'repository-roundtrip-unittest'
        sink.inputs.desc = (
            "A test session created by repository roundtrip unittest")
        # Create workflow connecting them together
        workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir)
        workflow.add_nodes((source, sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
        for source_name in source_files:
            if source_name != 'source2':
                sink_name = source_name.replace('source', 'sink')
                workflow.connect(source, source_name + PATH_SUFFIX, sink,
                                 sink_name + PATH_SUFFIX)
        workflow.run()
        # Check cache was created properly
        self.assertEqual(filter_scans(os.listdir(self.session_cache())), [
            'source1-source1', 'source2-source2', 'source3-source3',
            'source4-source4'
        ])
        expected_sink_filesets = ['sink1', 'sink3', 'sink4']
        self.assertEqual(
            filter_scans(
                os.listdir(self.session_cache(from_analysis=self.STUDY_NAME))),
            [(e + '-' + e) for e in expected_sink_filesets])
        with self._connect() as login:
            fileset_names = filter_scans(login.experiments[self.session_label(
                from_analysis=self.STUDY_NAME)].scans.keys())
        self.assertEqual(fileset_names, expected_sink_filesets)
Пример #22
0
 def test_checksums(self):
     """
     Tests check of downloaded checksums to see if file needs to be
     redownloaded
     """
     cache_dir = op.join(self.work_dir, 'cache-checksum-check')
     DATASET_NAME = 'source1'
     STUDY_NAME = 'checksum_check_analysis'
     fileset_fname = DATASET_NAME + text_format.extension
     source_target_path = op.join(self.session_cache(cache_dir),
                                  DATASET_NAME + '-' + DATASET_NAME)
     md5_path = source_target_path + XnatRepo.MD5_SUFFIX
     source_target_fpath = op.join(source_target_path, fileset_fname)
     shutil.rmtree(cache_dir, ignore_errors=True)
     os.makedirs(cache_dir)
     source_repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
     source_dataset = source_repository.dataset(self.project)
     sink_repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
     sink_dataset = sink_repository.dataset(self.checksum_sink_project,
                                            subject_ids=['SUBJECT'],
                                            visit_ids=['VISIT'],
                                            fill_tree=True)
     analysis = DummyAnalysis(STUDY_NAME,
                              dataset=sink_dataset,
                              processor=SingleProc('ad'),
                              inputs=[
                                  FilesetFilter(DATASET_NAME,
                                                DATASET_NAME,
                                                text_format,
                                                dataset=source_dataset)
                              ])
     source = pe.Node(RepositorySource(
         [analysis.bound_spec(DATASET_NAME).slice]),
                      name='checksum_check_source')
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     source.run()
     self.assertTrue(op.exists(md5_path))
     self.assertTrue(op.exists(source_target_fpath))
     with open(md5_path) as f:
         checksums = json.load(f)
     # Stash the downloaded file in a new location and create a dummy
     # file instead
     stash_path = source_target_path + '.stash'
     shutil.move(source_target_path, stash_path)
     os.mkdir(source_target_path)
     with open(source_target_fpath, 'w') as f:
         f.write('dummy')
     # Run the download, which shouldn't download as the checksums are the
     # same
     source.run()
     with open(source_target_fpath) as f:
         d = f.read()
     self.assertEqual(d, 'dummy')
     # Replace the checksum with a dummy
     os.remove(md5_path)
     checksums['.'] = 'dummy_checksum'
     with open(md5_path, 'w', **JSON_ENCODING) as f:
         json.dump(checksums, f, indent=2)
     # Retry the download, which should now download since the checksums
     # differ
     source.run()
     with open(source_target_fpath) as f:
         d = f.read()
     with open(op.join(stash_path, fileset_fname)) as f:
         e = f.read()
     self.assertEqual(d, e)
     # Resink the source file and check that the generated MD5 checksum is
     # stored in identical format
     DATASET_NAME = 'sink1'
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink(
         [analysis.bound_spec(DATASET_NAME).slice], dummy_pipeline),
                    name='checksum_check_sink')
     sink.inputs.name = 'checksum_check_sink'
     sink.inputs.desc = "Tests the generation of MD5 checksums"
     sink.inputs.subject_id = self.SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.sink1_path = source_target_fpath
     sink_target_path = op.join(
         self.session_cache(cache_dir,
                            project=self.checksum_sink_project,
                            subject=(self.SUBJECT),
                            from_analysis=STUDY_NAME),
         DATASET_NAME + '-' + DATASET_NAME)
     sink_md5_path = sink_target_path + XnatRepo.MD5_SUFFIX
     sink.run()
     with open(md5_path) as f:
         source_checksums = json.load(f)
     with open(sink_md5_path) as f:
         sink_checksums = json.load(f)
     self.assertEqual(
         source_checksums, sink_checksums,
         ("Source checksum ({}) did not equal sink checksum ({})".format(
             source_checksums, sink_checksums)))
Пример #23
0
    def test_delayed_download(self):
        """
        Tests handling of race conditions where separate processes attempt to
        cache the same fileset
        """
        cache_dir = op.join(self.work_dir, 'cache-delayed-download')
        DATASET_NAME = 'source1'
        target_path = op.join(self.session_cache(cache_dir), DATASET_NAME,
                              DATASET_NAME + text_format.extension)
        tmp_dir = target_path + '.download'
        shutil.rmtree(cache_dir, ignore_errors=True)
        os.makedirs(cache_dir)
        repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
        dataset = repository.dataset(self.project)
        analysis = DummyAnalysis(
            self.STUDY_NAME,
            dataset,
            SingleProc('ad'),
            inputs=[FilesetFilter(DATASET_NAME, DATASET_NAME, text_format)])
        source = pe.Node(RepositorySource(
            [analysis.bound_spec(DATASET_NAME).slice]),
                         name='delayed_source')
        source.inputs.subject_id = self.SUBJECT
        source.inputs.visit_id = self.VISIT
        result1 = source.run()
        source1_path = result1.outputs.source1_path
        self.assertTrue(op.exists(source1_path))
        self.assertEqual(
            source1_path, target_path,
            "Output file path '{}' not equal to target path '{}'".format(
                source1_path, target_path))
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the fileset.
        os.makedirs(tmp_dir)
        source.inputs.race_cond_delay = 1
        result2 = source.run()
        source1_path = result2.outputs.source1_path
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the fileset.
        internal_dir = op.join(tmp_dir, 'internal')
        deleted_tmp_dir = tmp_dir + '.deleted'

        def simulate_download():
            "Simulates a download in a separate process"
            os.makedirs(internal_dir)
            time.sleep(5)
            # Modify a file in the temp dir to make the source download keep
            # waiting
            logger.info('Updating simulated download directory')
            with open(op.join(internal_dir, 'download'), 'a') as f:
                f.write('downloading')
            time.sleep(10)
            # Simulate the finalising of the download by copying the previously
            # downloaded file into place and deleting the temp dir.
            logger.info('Finalising simulated download')
            with open(target_path, 'a') as f:
                f.write('simulated')
            shutil.move(tmp_dir, deleted_tmp_dir)

        source.inputs.race_cond_delay = 10
        p = Process(target=simulate_download)
        p.start()  # Start the simulated download in separate process
        time.sleep(1)
        source.run()  # Run the local download
        p.join()
        with open(op.join(deleted_tmp_dir, 'internal', 'download')) as f:
            d = f.read()
        self.assertEqual(d, 'downloading')
        with open(target_path) as f:
            d = f.read()
        self.assertEqual(d, 'simulated')
Пример #24
0
 def processor(self):
     return SingleProc(self.work_dir)
Пример #25
0
 def test_summary(self):
     # Create working dirs
     # Create XnatSource node
     repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
     analysis = DummyAnalysis(self.SUMMARY_STUDY_NAME,
                              repository.dataset(self.project),
                              SingleProc('ad'),
                              inputs=[
                                  FilesetFilter('source1', 'source1',
                                                text_format),
                                  FilesetFilter('source2', 'source2',
                                                text_format),
                                  FilesetFilter('source3', 'source3',
                                                text_format)
                              ])
     # TODO: Should test out other file formats as well.
     source_files = ['source1', 'source2', 'source3']
     inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                         'inputnode')
     inputnode.inputs.subject_id = self.SUBJECT
     inputnode.inputs.visit_id = self.VISIT
     source = pe.Node(RepositorySource(
         [analysis.bound_spec(f).slice for f in source_files]),
                      name='source')
     subject_sink_files = ['subject_sink']
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     subject_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in subject_sink_files],
         dummy_pipeline),
                            name='subject_sink')
     subject_sink.inputs.name = 'subject_summary'
     subject_sink.inputs.desc = (
         "Tests the sinking of subject-wide filesets")
     # Test visit sink
     visit_sink_files = ['visit_sink']
     visit_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in visit_sink_files],
         dummy_pipeline),
                          name='visit_sink')
     visit_sink.inputs.name = 'visit_summary'
     visit_sink.inputs.desc = ("Tests the sinking of visit-wide filesets")
     # Test project sink
     analysis_sink_files = ['analysis_sink']
     analysis_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in analysis_sink_files],
         dummy_pipeline),
                             name='analysis_sink')
     analysis_sink.inputs.name = 'project_summary'
     analysis_sink.inputs.desc = (
         "Tests the sinking of project-wide filesets")
     # Create workflow connecting them together
     workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
     workflow.add_nodes((source, subject_sink, visit_sink, analysis_sink))
     workflow.connect(inputnode, 'subject_id', source, 'subject_id')
     workflow.connect(inputnode, 'visit_id', source, 'visit_id')
     workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
     workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
     workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink,
                      'subject_sink' + PATH_SUFFIX)
     workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink,
                      'visit_sink' + PATH_SUFFIX)
     workflow.connect(source, 'source3' + PATH_SUFFIX, analysis_sink,
                      'analysis_sink' + PATH_SUFFIX)
     workflow.run()
     analysis.clear_caches()  # Refreshed cached repository tree object
     with self._connect() as login:
         # Check subject summary directories were created properly in cache
         expected_subj_filesets = ['subject_sink']
         subject_dir = self.session_cache(
             visit=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(subject_dir)),
                          [(e + '-' + e) for e in expected_subj_filesets])
         # and on XNAT
         subject_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 visit=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_subj_filesets, subject_fileset_names)
         # Check visit summary directories were created properly in
         # cache
         expected_visit_filesets = ['visit_sink']
         visit_dir = self.session_cache(
             subject=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(visit_dir)),
                          [(e + '-' + e) for e in expected_visit_filesets])
         # and on XNAT
         visit_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 subject=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_visit_filesets, visit_fileset_names)
         # Check project summary directories were created properly in cache
         expected_proj_filesets = ['analysis_sink']
         project_dir = self.session_cache(
             subject=XnatRepo.SUMMARY_NAME,
             visit=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(project_dir)),
                          [(e + '-' + e) for e in expected_proj_filesets])
         # and on XNAT
         project_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 subject=XnatRepo.SUMMARY_NAME,
                 visit=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_proj_filesets, project_fileset_names)
     # Reload the data from the summary directories
     reloadinputnode = pe.Node(
         IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode')
     reloadinputnode.inputs.subject_id = self.SUBJECT
     reloadinputnode.inputs.visit_id = self.VISIT
     reloadsource_per_subject = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in subject_sink_files),
                                        name='reload_source_per_subject')
     reloadsource_per_visit = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in visit_sink_files),
                                      name='reload_source_per_visit')
     reloadsource_per_dataset = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in analysis_sink_files),
                                        name='reload_source_per_dataset')
     reloadsink = pe.Node(RepositorySink(
         (analysis.bound_spec(f).slice
          for f in ['resink1', 'resink2', 'resink3']), dummy_pipeline),
                          name='reload_sink')
     reloadsink.inputs.name = 'reload_summary'
     reloadsink.inputs.desc = (
         "Tests the reloading of subject and project summary filesets")
     reloadworkflow = pe.Workflow('reload_summary_unittest',
                                  base_dir=self.work_dir)
     for node in (reloadsource_per_subject, reloadsource_per_visit,
                  reloadsource_per_dataset, reloadsink):
         for iterator in ('subject_id', 'visit_id'):
             reloadworkflow.connect(reloadinputnode, iterator, node,
                                    iterator)
     reloadworkflow.connect(reloadsource_per_subject,
                            'subject_sink' + PATH_SUFFIX, reloadsink,
                            'resink1' + PATH_SUFFIX)
     reloadworkflow.connect(reloadsource_per_visit,
                            'visit_sink' + PATH_SUFFIX, reloadsink,
                            'resink2' + PATH_SUFFIX)
     reloadworkflow.connect(reloadsource_per_dataset,
                            'analysis_sink' + PATH_SUFFIX, reloadsink,
                            'resink3' + PATH_SUFFIX)
     reloadworkflow.run()
     # Check that the filesets
     self.assertEqual(
         filter_scans(
             os.listdir(
                 self.session_cache(
                     from_analysis=self.SUMMARY_STUDY_NAME))),
         ['resink1-resink1', 'resink2-resink2', 'resink3-resink3'])
     # and on XNAT
     with self._connect() as login:
         resinked_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(sorted(resinked_fileset_names),
                          ['resink1', 'resink2', 'resink3'])