def test_not_found(self, datafiles): path = os.path.join(str(datafiles), 'valid') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() data_frame = profiles.data_frame groups = { 'A': [1], 'B': [2], 'C': [3], 'A-B': [1, 2], 'A-B-C': [1, 2, 3], 'X': [], } with pytest.raises(KeyError) as error: utils.get_multilabel(data_frame, 'profile', groups) # W not found assert 'W' in str(error.value)
def test_pipe_spread_out_column(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('spread', SpreadOutMatrixTransformer(columns=['DTI_FA', 'DTI_L1'])), ('selecting', FeatureMatrixTransformer( matrix_columns=['DTI_FA'], columns=None)), ]) output = pipe.fit_transform(profiles.data_frame) assert sorted(list(output.columns)) == [ 'DTI_FA_1_0', 'DTI_FA_2_0', 'DTI_FA_2_1'] assert output.DTI_FA_1_0.FIS_007 == 4 assert output.DTI_FA_2_0.FIS_007 == 7 assert output.DTI_FA_2_1.FIS_007 == 8
def test_pipe_params(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None params = { 'combining__columns': ['DTI_FA', 'DTI_L1'], 'combining__column_name': 'combined', } pipe = Pipeline([ ('combining', CombineMatrixTransformer()), ]) pipe.set_params(**params) output = pipe.fit_transform(profiles.data_frame) assert sorted(list(output.columns)) == ['DTI_FA', 'DTI_L1', 'combined'] assert (output.combined.FIS_007 == [[ 6, 7, 8, ], [9, 10, 11], [12, 13, 14]]).all()
def test_spread_out_simple(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) profiles.load() new_dataframe = profiles.spread_out_matrix(['DTI_L1'], keep_matrix=False, symmetric=False) assert [ 'DTI_FA', 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2', 'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0', 'DTI_L1_2_1', 'DTI_L1_2_2', ] == sorted(list(new_dataframe.columns)) assert new_dataframe.DTI_L1_0_0[0] == 11 assert new_dataframe.DTI_L1_0_1[0] == 12 assert new_dataframe.DTI_L1_0_2[0] == 13 assert new_dataframe.DTI_L1_1_0[0] == 14 assert new_dataframe.DTI_L1_1_1[0] == 15 assert new_dataframe.DTI_L1_1_2[0] == 16 assert new_dataframe.DTI_L1_2_0[0] == 17 assert new_dataframe.DTI_L1_2_1[0] == 18 assert new_dataframe.DTI_L1_2_2[0] == 19
def test_no_df_simple(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('combining', CombineMatrixTransformer(columns=['0', '1'], column_name='combined')), ]) output = pipe.fit_transform(profiles.data_frame.values) assert (output[0][2] == np.asarray([[ 6, 7, 8, ], [9, 10, 11], [12, 13, 14]])).all()
def test_fail_duplicates_data(self, datafiles): path = os.path.join(str(datafiles), 'duplicated_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) with pytest.raises(ValueError) as error: profiles.load() assert str(error.value) == "Already existing value at [FIS_007, DTI_FA]: " \ "Value from FIS_007_FA_factor.csv cannot be loaded."
def test_warning(self, datafiles, caplog): caplog.set_level(logging.INFO) path = os.path.join(str(datafiles), 'index_not_found') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) profiles.load() assert len(caplog.records) == 4 assert caplog.records[3].levelname == 'WARNING' assert caplog.records[3].message == \ "Index FIS_007 couldn't be found in main file (total 1 index(es))."
def test_fail_format(self, datafiles): path = os.path.join(str(datafiles), 'duplicated_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename, format_file='mat') with pytest.raises(NotImplementedError) as error: profiles.load() assert str(error.value) == 'Format \'mat\' not supported'
def test_debug(self, datafiles, caplog): caplog.set_level(logging.DEBUG) path = os.path.join(str(datafiles), 'index_not_found') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) profiles.load() assert len(caplog.records) == 5 assert caplog.records[2].levelname == 'DEBUG' assert caplog.records[2].message == \ "Index (id) FIS_007 couldn't be found, skipped " \ "(file FIS_007_FA_factor.csv)"
def test_fail_duplicates_id(self, datafiles): path = os.path.join(str(datafiles), 'duplicated_id') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) with pytest.raises(ValueError) as error: profiles.load() if six.PY2: assert str(error.value) == "Index has duplicate keys: " \ "Index([u'FIS_007', u'TTO_06'], dtype='object', name=u'ID')" else: assert str(error.value) == "Index has duplicate keys: " \ "Index(['FIS_007', 'TTO_06'], dtype='object', name='ID')"
def test_pipe_symmetric(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('spread', SpreadOutMatrixTransformer(columns=profiles.data_frame.columns, symmetric=False)), ]) output = pipe.fit_transform(profiles.data_frame) assert sorted(list(output.columns)) == [ 'DTI_FA_0_0', 'DTI_FA_0_1', 'DTI_FA_0_2', 'DTI_FA_1_0', 'DTI_FA_1_1', 'DTI_FA_1_2', 'DTI_FA_2_0', 'DTI_FA_2_1', 'DTI_FA_2_2', 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2', 'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0', 'DTI_L1_2_1', 'DTI_L1_2_2' ] assert output.DTI_FA_0_0.FIS_007 == 1 assert output.DTI_FA_0_1.FIS_007 == 2 assert output.DTI_FA_0_2.FIS_007 == 3 assert output.DTI_FA_1_0.FIS_007 == 4 assert output.DTI_FA_1_1.FIS_007 == 5 assert output.DTI_FA_1_2.FIS_007 == 6 assert output.DTI_FA_2_0.FIS_007 == 7 assert output.DTI_FA_2_1.FIS_007 == 8 assert output.DTI_FA_2_2.FIS_007 == 9 assert output.DTI_L1_0_0.FIS_007 == 11 assert output.DTI_L1_0_1.FIS_007 == 12 assert output.DTI_L1_0_2.FIS_007 == 13 assert output.DTI_L1_1_0.FIS_007 == 14 assert output.DTI_L1_1_1.FIS_007 == 15 assert output.DTI_L1_1_2.FIS_007 == 16 assert output.DTI_L1_2_0.FIS_007 == 17 assert output.DTI_L1_2_1.FIS_007 == 18 assert output.DTI_L1_2_2.FIS_007 == 19
def test_without_columns(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('combining', CombineMatrixTransformer(column_name='combined')), ]) output = pipe.fit_transform(profiles.data_frame) assert sorted(list(output.columns)) == ['DTI_FA', 'DTI_L1']
def test_format_info(self, datafiles, caplog): caplog.set_level(logging.INFO) path = os.path.join(str(datafiles), 'valid') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert len(caplog.records) == 25 for record in caplog.records: assert record.levelname == 'INFO' assert 'Reading content in ' in record.message assert path in record.message assert 'directory with ' in record.message
def test_threshold(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None assert isinstance(profiles.data_frame, pandas.DataFrame) assert profiles.data_frame.shape == (1, 2) assert profiles.data_frame.index.name == 'ID' assert sorted( list(profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1'] assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19 assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19 profiles.binarize_matrix(['DTI_FA', 'DTI_L1'], threshold=1) assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 0 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 1 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 1 assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 0 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 1 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 1
def test_no_df_simple(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('spread', SpreadOutMatrixTransformer(columns=['0', '1'])), ]) output = pipe.fit_transform(profiles.data_frame.values) assert output.tolist() == [[14., 17., 18., 4., 7., 8.]] or \ output.tolist() == [[4., 7., 8., 14., 17., 18.]]
def test_inplace_false(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) profiles.load() old_dataframe = profiles.data_frame new_dataframe = profiles.spread_out_matrix(['DTI_L1'], keep_matrix=False, inplace=False, symmetric=False) assert [ 'DTI_FA', 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2', 'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0', 'DTI_L1_2_1', 'DTI_L1_2_2', ] == sorted(list(new_dataframe.columns)) for new_column in [ 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2', 'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0', 'DTI_L1_2_1', 'DTI_L1_2_2', ]: assert new_column not in profiles.data_frame.columns assert (old_dataframe == profiles.data_frame).all().all()
def test_no_df_simple(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('spread', SpreadOutMatrixTransformer(columns=['0', '1'])), ('selecting', FeatureMatrixTransformer( matrix_columns=['0'], columns=None)), ]) output = pipe.fit_transform(profiles.data_frame.values) assert int(output[0]) in [4, 14]
def test_pipe_column(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None pipe = Pipeline([ ('selecting', FeatureMatrixTransformer( columns=['DTI_FA'], matrix_columns=None) ), ]) output = pipe.fit_transform(profiles.data_frame) assert sorted(list(output.columns)) == ['DTI_FA', ] assert (output.DTI_FA == profiles.data_frame.DTI_FA).all()
def test_simple(self, datafiles): path = os.path.join(str(datafiles), 'valid') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() groups = { 'A': [1], 'B': [2], 'C': [3], 'A-B': [1, 2], 'A-B-C': [1, 2, 3], 'X': [], 'W': [] } data_frame = profiles.data_frame df = utils.get_multilabel(data_frame, 'profile', groups) values = df.as_matrix() # 3 columns, 63 items assert values.shape == (63, 3) assert (values[profiles.data_frame.profile == 'A'] == [1, 0, 0]).all() assert (values[profiles.data_frame.profile == 'B'] == [0, 1, 0]).all() assert (values[profiles.data_frame.profile == 'C'] == [0, 0, 1]).all() assert (values[profiles.data_frame.profile == 'C'] == [0, 0, 1]).all() assert (values[profiles.data_frame.profile == 'A-B'] == [1, 1, 0]).all() assert (values[profiles.data_frame.profile == 'A-B-C'] == [1, 1, 1]).all() assert (values[profiles.data_frame.profile == 'X'] == [0, 0, 0]).all() assert (df.columns == [1, 2, 3]).all() assert (df.index == profiles.data_frame.index).all()
def test_default(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None assert isinstance(profiles.data_frame, pandas.DataFrame) assert profiles.data_frame.shape == (1, 2) assert profiles.data_frame.index.name == 'ID' assert sorted(list( profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1'] assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19 assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19 profiles.combine_matrix(['DTI_FA', 'DTI_L1'], 'combined') assert sorted(list( profiles.data_frame.columns)) == ['DTI_FA', 'DTI_L1', 'combined'] assert (profiles.data_frame.combined.FIS_007 == [[ 6, 7, 8, ], [9, 10, 11], [12, 13, 14]]).all()
def test_valid(self, datafiles): path = os.path.join(str(datafiles), 'valid') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) assert profiles.data_frame is None profiles.load() assert profiles.data_frame is not None assert isinstance(profiles.data_frame, pandas.DataFrame) assert profiles.data_frame.shape == (63, 8) assert profiles.data_frame.index.name == 'ID' assert 'profile' == profiles.data_frame.columns[0] for column_name in [ 'RAW', 'LS', 'DTI_L1', 'DTI_MD', 'DTI_RX', 'DTI_FA', 'FUNC' ]: assert column_name in profiles.data_frame.columns first_i = profiles.data_frame[column_name].first_valid_index() assert profiles.data_frame[column_name][first_i].shape == (76, 76)
def test_extract_multiple_keeping(self, datafiles): path = os.path.join(str(datafiles), 'small_data') filename = 'profiles.xlsx' profiles = NeuProfiles(profiles_path=path, profiles_filename=filename) profiles.load() new_dataframe = profiles.spread_out_matrix([ 'DTI_L1', 'DTI_FA', ], keep_matrix=True, symmetric=False) assert [ 'DTI_FA', 'DTI_FA_0_0', 'DTI_FA_0_1', 'DTI_FA_0_2', 'DTI_FA_1_0', 'DTI_FA_1_1', 'DTI_FA_1_2', 'DTI_FA_2_0', 'DTI_FA_2_1', 'DTI_FA_2_2', 'DTI_L1', 'DTI_L1_0_0', 'DTI_L1_0_1', 'DTI_L1_0_2', 'DTI_L1_1_0', 'DTI_L1_1_1', 'DTI_L1_1_2', 'DTI_L1_2_0', 'DTI_L1_2_1', 'DTI_L1_2_2', ] == sorted(list(new_dataframe.columns)) assert new_dataframe.DTI_L1_0_0[0] == 11 assert new_dataframe.DTI_L1_0_1[0] == 12 assert new_dataframe.DTI_L1_0_2[0] == 13 assert new_dataframe.DTI_L1_1_0[0] == 14 assert new_dataframe.DTI_L1_1_1[0] == 15 assert new_dataframe.DTI_L1_1_2[0] == 16 assert new_dataframe.DTI_L1_2_0[0] == 17 assert new_dataframe.DTI_L1_2_1[0] == 18 assert new_dataframe.DTI_L1_2_2[0] == 19 assert new_dataframe.DTI_FA_0_0[0] == 1 assert new_dataframe.DTI_FA_0_1[0] == 2 assert new_dataframe.DTI_FA_0_2[0] == 3 assert new_dataframe.DTI_FA_1_0[0] == 4 assert new_dataframe.DTI_FA_1_1[0] == 5 assert new_dataframe.DTI_FA_1_2[0] == 6 assert new_dataframe.DTI_FA_2_0[0] == 7 assert new_dataframe.DTI_FA_2_1[0] == 8 assert new_dataframe.DTI_FA_2_2[0] == 9 assert profiles.data_frame.DTI_FA.FIS_007[0][0] == 1 assert profiles.data_frame.DTI_FA.FIS_007[2][2] == 9 assert profiles.data_frame.DTI_L1.FIS_007[0][0] == 11 assert profiles.data_frame.DTI_L1.FIS_007[2][2] == 19