def _assert_result(self, namefile: str, data: str, iterations: str, project_name: str, result_means_filename: str) -> None: means_test_filename = 'hi_{}_result__data-{}_it-{}.txt'.format(namefile, data, iterations) original_means = pd.read_table(os.path.realpath('{}/{}'.format(data_test_dir, means_test_filename))) result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename)) self.assertTrue(dataframe_functions.dataframes_has_same_data(result_means, original_means)) self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
def _assert_result(self, filename: str, data: str, iterations: int, project_name: str, result_means_filename: str, debug_seed: int, threshold: float, result_precision: int) -> None: str_threshold = ''.join(str(threshold).split('.')) means_test_filename = \ 'statistical_analysis__{}_result__' \ 'data-{}_it-{}_seed-{}_threshold-{}_precision-{}.txt'.format(filename, data, iterations, debug_seed, str_threshold, result_precision) original_means = pd.read_table( os.path.realpath('{}/{}'.format(data_test_dir, means_test_filename))) result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename)) self.assertTrue(dataframe_functions.dataframes_has_same_data( result_means, original_means), msg='failed comparing {} with {}'.format( means_test_filename, result_means_filename)) self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
def test_different_unsorted_columns(self): dataframe1 = pd.DataFrame({'col2': [3, 4], 'col1': [1, 2]}) dataframe2 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 5]}) self.assertFalse( dataframe_functions.dataframes_has_same_data( dataframe1, dataframe2))
def test_equal_unsorted_columns(self): dataframe1 = pd.DataFrame({'col2': [3, 4], 'col1': [1, 2]}) dataframe2 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) self.assertTrue( dataframe_functions.dataframes_has_same_data( dataframe1, dataframe2))
def test_apply_threshold(self): cluster_counts = pd.read_csv('{}/cluster_counts_generic_cluster_counts.csv'.format(self.FIXTURES_SUBPATH)) expected_result = pd.read_csv('{}/cluster_counts_helper_threshold_results.csv'.format(self.FIXTURES_SUBPATH)) gene_column_name = 'gene' cluster_names = list(cluster_counts.columns.values) cluster_names.remove(gene_column_name) result = cluster_counts_helper.apply_threshold(cluster_counts, cluster_names, threshold=0.2) self.assertTrue(dataframe_functions.dataframes_has_same_data(result, expected_result))
def test_filter_by_multidatas(self): multidatas = pd.read_csv( '{}/filter_interaction/filter_interaction_multidatas_multidata.csv' .format(data_test_dir)) interactions = pd.read_csv( '{}/filter_interaction/filter_interaction_multidatas_interaction.csv' .format(data_test_dir)) result = interaction_filter.filter_by_multidatas( multidatas, interactions) expected_result = interactions[interactions['test_both_enabled']] self.assertTrue( dataframe_functions.dataframes_has_same_data( result, expected_result))
def test_get_involved_complex_from_protein_empty_result(self): proteins = pd.read_csv('{}/helper_complex_protein.csv'.format( self.FIXTURES_SUBPATH)) proteins.drop(proteins.index, inplace=True) complexes = pd.read_csv('{}/helper_complex_complex.csv'.format( self.FIXTURES_SUBPATH)) complex_composition = pd.read_csv( '{}/helper_complex_complex_composition.csv'.format( self.FIXTURES_SUBPATH)) result = complex_helper.get_involved_complex_from_protein( proteins, complexes, complex_composition, drop_duplicates=False) self.assertTrue( dataframe_functions.dataframes_has_same_data( result, pd.DataFrame()))
def _assert_result(self, namefile: str, data: str, project_name: str, result_means_filename: str, threshold: float, result_precision: int, ) -> None: str_threshold = ''.join(str(threshold).split('.')) test_filename = 'analysis__{}_result__data-{}_threshold-{}_precision-{}.txt'.format(namefile, data, str_threshold, result_precision) original_means = pd.read_table('{}/{}'.format(data_test_dir, test_filename)) result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename)) self.assertTrue(dataframe_functions.dataframes_has_same_data(result_means, original_means)) self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
def test_get_involved_complex_composition_from_protein(self): proteins = pd.read_csv('{}/helper_complex_protein.csv'.format( self.FIXTURES_SUBPATH), index_col=0) complex_composition = pd.read_csv( '{}/helper_complex_complex_composition.csv'.format( self.FIXTURES_SUBPATH)) result_expected = pd.read_csv( '{}/helper_complex_result_drop_duplicates.csv'.format( self.FIXTURES_SUBPATH)) result = complex_helper.get_involved_complex_composition_from_protein( proteins, complex_composition) self.assertTrue( dataframe_functions.dataframes_has_same_data( result, result_expected))
def test_filter_empty_cluster_counts(self): cluster_counts = pd.read_csv( '{}/cluster_counts_generic_cluster_counts.csv'.format( self.FIXTURES_SUBPATH)) expected_result = pd.read_csv( '{}/cluster_counts_filter_empty_cluster_results.csv'.format( self.FIXTURES_SUBPATH)) gene_column_name = 'gene' cluster_names = list(cluster_counts.columns.values) cluster_names.remove(gene_column_name) result = cluster_counts_filter.filter_empty_cluster_counts( cluster_counts, cluster_names) self.assertTrue( dataframe_functions.dataframes_has_same_data( result, expected_result))
def test_merge_complex_cluster_counts(self): complex_counts_composition = pd.read_csv( '{}/cluster_counts_helper_merge_complex_cluster_counts_complex_counts_composition.csv'.format( self.FIXTURES_SUBPATH)) expected_result = pd.read_csv( '{}/cluster_counts_helper_merge_complex_cluster_counts_result.csv'.format( self.FIXTURES_SUBPATH)) cluster_names = ['cluster_1', 'cluster_2', 'cluster_3'] complex_column_names = ['complex_multidata_id'] result = cluster_counts_helper.merge_complex_counts(cluster_names, complex_counts_composition, complex_column_names) # Need to set equal 1 to 1.0000 result[complex_column_names] = result[complex_column_names].astype(dtype='int32') expected_result[complex_column_names] = expected_result[complex_column_names].astype(dtype='int32') self.assertTrue(dataframe_functions.dataframes_has_same_data(result, expected_result))
def test_get_involved_complex_from_protein_empty_result(self): proteins = pd.read_csv('{}/helper_complex_protein.csv'.format( self.FIXTURES_SUBPATH), index_col=0) proteins.drop(proteins.index, inplace=True) complex_composition = pd.read_csv( '{}/helper_complex_complex_composition.csv'.format( self.FIXTURES_SUBPATH)) result = complex_helper.get_involved_complex_composition_from_protein( proteins, complex_composition) self.assertTrue( dataframe_functions.dataframes_has_same_data( result, pd.DataFrame(columns=[ 'complex_multidata_id', 'protein_multidata_id', 'total_protein' ])))
def test_get_complex_involved_in_counts(self): multidatas_counts = pd.read_csv( '{}/helper_cluster_counts.csv'.format( self.FIXTURES_SUBPATH)) cluster_names = ['cluster_1', 'cluster_2', 'cluster_3'] complex_composition = pd.read_csv( '{}/helper_cluster_counts_complex_composition.csv'.format( self.FIXTURES_SUBPATH)) complex_expanded = pd.read_csv( '{}/helper_cluster_counts_complex.csv'.format( self.FIXTURES_SUBPATH)) result_expected = pd.read_csv( '{}/cluster_counts_helper_get_complex_involved_in_counts_result.csv'.format(self.FIXTURES_SUBPATH)) result = cluster_counts_helper.get_complex_involved_in_counts(multidatas_counts, cluster_names, complex_composition, complex_expanded) self.assertTrue(dataframe_functions.dataframes_has_same_data(result, result_expected), 'get_complex_involved_in_counts result did not match with expected')
def test_compare_empty(self): self.assertTrue( dataframe_functions.dataframes_has_same_data( pd.DataFrame(), pd.DataFrame()))