class Test_Results(unittest.TestCase): def setUp(self): self.obj = Results(evaluation_class=DummyEvaluation, paradigm_class=DummyParadigm, suffix='test') def tearDown(self): path = self.obj.filepath if os.path.isfile(path): os.remove(path) def testCanAddSample(self): self.obj.add(to_result_input(['a'], [d1]), to_pipeline_dict(['a'])) def testRecognizesAlreadyComputed(self): _in = to_result_input(['a'], [d1]) self.obj.add(_in, to_pipeline_dict(['a'])) not_yet_computed = self.obj.not_yet_computed(to_pipeline_dict(['a']), d1['dataset'], d1['subject']) self.assertTrue(len(not_yet_computed) == 0) def testCanAddMultiplePipelines(self): _in = to_result_input(['a', 'b', 'c'], [d1, d1, d2]) self.obj.add(_in, to_pipeline_dict(['a', 'b', 'c'])) def testCanAddMultipleValuesPerPipeline(self): _in = to_result_input(['a', 'b'], [[d1, d2], [d2, d1]]) self.obj.add(_in, to_pipeline_dict(['a', 'b'])) not_yet_computed = self.obj.not_yet_computed(to_pipeline_dict(['a']), d1['dataset'], d1['subject']) self.assertTrue(len(not_yet_computed) == 0, not_yet_computed) not_yet_computed = self.obj.not_yet_computed(to_pipeline_dict(['b']), d2['dataset'], d2['subject']) self.assertTrue(len(not_yet_computed) == 0, not_yet_computed) not_yet_computed = self.obj.not_yet_computed(to_pipeline_dict(['b']), d1['dataset'], d1['subject']) self.assertTrue(len(not_yet_computed) == 0, not_yet_computed) def testCanExportToDataframe(self): _in = to_result_input(['a', 'b', 'c'], [d1, d1, d2]) self.obj.add(_in, to_pipeline_dict(['a', 'b', 'c'])) _in = to_result_input(['a', 'b', 'c'], [d2, d2, d3]) self.obj.add(_in, to_pipeline_dict(['a', 'b', 'c'])) df = self.obj.to_dataframe() self.assertTrue( set(np.unique(df['pipeline'])) == set(('a', 'b', 'c')), np.unique(df['pipeline'])) self.assertTrue(df.shape[0] == 6, df.shape[0])
def process(self, pipelines, overwrite=False, suffix=''): ''' Runs tasks on all given datasets. ''' # check pipelines if not isinstance(pipelines, dict): raise (ValueError("pipelines must be a dict")) for _, pipeline in pipelines.items(): if not (isinstance(pipeline, BaseEstimator)): raise (ValueError("pipelines must only contains Pipelines " "instance")) results = Results(type(self), type(self.paradigm), overwrite=overwrite, suffix=suffix) for dataset in self.datasets: log.info('Processing dataset: {}'.format(dataset.code)) self.preprocess_data(dataset) for subject in dataset.subject_list: # check if we already have result for this subject/pipeline run_pipes = results.not_yet_computed(pipelines, dataset, subject) if len(run_pipes) > 0: try: res = self.evaluate(dataset, subject, run_pipes) for pipe in res: for r in res[pipe]: message = '{} | '.format(pipe) message += '{} | {} '.format( r['dataset'].code, r['id']) message += ': Score %.3f' % r['score'] log.info(message) results.add(res, pipelines=pipelines) except Exception as e: log.error(e) log.debug(traceback.format_exc()) log.warning('Skipping subject {}'.format(subject)) return results