def test_euclidean_perfect(self): observed = compare_sinks(self.mpm1, self.mpm1, 'euclidean') expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6'] expected_values = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] expected = pd.DataFrame(expected_values, index=expected_ids, columns=['Euclidean distance']) assert_data_frame_almost_equal(observed, expected)
def test_default_valid_multi_line(self): fp = get_data_path('blast7_default_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0, 60.0, 3.0, 100.0, 9e-05, 10.5], ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0, 15.0, 1.0, 100.0, 0.053, 12.0], ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0, 35.0, 2.0, 100.0, 0.002, 8.3]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) fp = get_data_path('legacy9_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0, 1e-05, 15.5], ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0, 9.0, 5.0, 7.0, 0.231, 7.8], ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 10.0, 0.022, 13.0]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp)
def test_ancom_percentiles(self): table = pd.DataFrame([[12, 11], [9, 11], [1, 11], [22, 100], [20, 53], [23, 1]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1', 'b2']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [0.0, 25.0, 50.0, 75.0, 100.0] groups = ['a', 'b'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array( [[1.0, 11.0], [5.0, 11.0], [9.0, 11.0], [10.5, 11.0], [12.0, 11.0], [20.0, 1.0], [21.0, 27.0], [22.0, 53.0], [22.5, 76.5], [23.0, 100.0]]) exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1', 'b2']) result = ancom(table, grouping)[1] assert_data_frame_almost_equal(result, exp)
def test_init_default_parameters(self): seq = ExampleGrammaredSequence('.-ABCXYZ') npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c')) self.assertEqual(seq.metadata, {}) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(8)))
def test_ancom_basic_counts_swapped(self): result = ancom(self.table8, self.cats8) exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'reject': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_to_data_frame_3x3(self): df = self.dm_3x3.to_data_frame() exp = pd.DataFrame([[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]], index=['a', 'b', 'c'], columns=['a', 'b', 'c']) assert_data_frame_almost_equal(df, exp)
def test_ancom_alpha(self): result = ancom(self.table1, self.cats1, alpha=0.5) exp = pd.DataFrame({'W': np.array([6, 6, 4, 5, 5, 4, 2]), 'reject': np.array([True, True, False, True, True, False, False], dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_to_data_frame_3x3(self): df = self.dm_3x3.to_data_frame() exp = pd.DataFrame( [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]], index=['a', 'b', 'c'], columns=['a', 'b', 'c']) assert_data_frame_almost_equal(df, exp)
def test_id_lookup(self): # Matrices have mismatched IDs but a lookup is provided. self.minx_dm_extra.ids = ['a', 'b', 'c', 'foo'] self.minz_dm_extra.ids = ['d', 'e', 'f', 'bar'] lookup = {'a': '0', 'b': '1', 'c': '2', 'foo': 'foo', 'd': '0', 'e': '1', 'f': '2', 'bar': 'bar', '0': '0', '1': '1', '2': '2'} x = self.minx_dm_extra.filter(['b', 'a', 'foo', 'c']) y = self.miny_dm.filter(['0', '2', '1']) z = self.minz_dm_extra.filter(['bar', 'e', 'f', 'd']) x_copy = x.copy() y_copy = y.copy() z_copy = z.copy() np.random.seed(0) obs = pwmantel((x, y, z), alternative='greater', strict=False, lookup=lookup) assert_data_frame_almost_equal( obs, self.exp_results_reordered_distance_matrices) # Make sure the inputs aren't modified. self.assertEqual(x, x_copy) self.assertEqual(y, y_copy) self.assertEqual(z, z_copy)
def test_scale_single_column(self): df = pd.DataFrame([[1], [0], [2]], index=['A', 'B', 'C'], columns=['foo']) exp = pd.DataFrame([[0.0], [-1.0], [1.0]], index=['A', 'B', 'C'], columns=['foo']) obs = _scale(df) assert_data_frame_almost_equal(obs, exp)
def test_ancom_no_signal(self): result = ancom(self.table3, self.cats3, multiple_comparisons_correction=None) exp = pd.DataFrame({'W': np.array([0]*7), 'reject': np.array([False]*7, dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_absolute_difference(self): mpm1 = { 'Unknown': { 'sink1': 0.25 }, 'Source1': { 'sink1': 0.50 }, 'Source2': { 'sink1': 0.25 } } mpm1 = pd.DataFrame(mpm1) mpm2 = { 'Unknown': { 'sink1': 0.1 }, 'Source2': { 'sink1': 0.8 }, 'Source1': { 'sink1': 0.1 } } mpm2 = pd.DataFrame(mpm2) observed = compare_sinks(mpm1, mpm2, 'absolute_difference') expected_ids = ['sink1'] # expected values computed by hand expected_values = [(0.4, 0.55, 0.15)] expected = pd.DataFrame(expected_values, index=expected_ids, columns=['Source1', 'Source2', 'Unknown']) assert_data_frame_almost_equal(observed.sort_index(axis=1), expected.sort_index(axis=1))
def test_permutative_f_scaled(self): test_table = pd.DataFrame( closure([[12, 11, 10, 10, 10, 10, 10], [9, 11, 12, 10, 10, 10, 10], [1, 11, 10, 11, 10, 5, 9], [2, 11, 10, 11, 10, 5, 9], [221, 210, 9, 10, 10, 10, 10], [220, 210, 9, 10, 10, 10, 10], [200, 220, 10, 10, 13, 10, 10], [230, 210, 14, 10, 10, 10, 10]]), index=['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8'], columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7']) test_cats = pd.Series([0, 0, 0, 0, 1, 1, 1, 1], index=['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8']) np.random.seed(0) original_table = copy.deepcopy(test_table) original_cats = copy.deepcopy(test_cats) result = ancom(test_table, test_cats, significance_test='permutative-anova') # Test to make sure that the input table hasn't be altered assert_data_frame_almost_equal(original_table, test_table) # Test to make sure that the input table hasn't be altered pdt.assert_series_equal(original_cats, test_cats) exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'reject': np.array([True, True, False, False, False, False, False], dtype=bool)}, index=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7']) assert_data_frame_almost_equal(result, exp)
def test_bioenv_different_column_order(self): # Specifying columns in a different order will change the row labels in # the results data frame as the column subsets will be reordered, but # the actual results (e.g., correlation coefficients) shouldn't change. obs = bioenv(self.dm, self.df, columns=self.cols[::-1]) assert_data_frame_almost_equal(obs, self.exp_results_different_column_order)
def test_not_equal(self): unequal_dfs = [ self.df, # floating point error too large to be "almost equal" pd.DataFrame({'foo': [42, 42.001, np.nan, 0], 'bar': ['a', 'b', 'cd', 'e']}), # extra NaN pd.DataFrame({'foo': [42, np.nan, np.nan, 0], 'bar': ['a', 'b', 'cd', 'e']}), # different column order pd.DataFrame(self.df, columns=['foo', 'bar']), # different index order pd.DataFrame(self.df, index=np.arange(4)[::-1]), # different index type pd.DataFrame(self.df, index=np.arange(4).astype(float)), # various forms of "empty" DataFrames that are not equivalent pd.DataFrame(), pd.DataFrame(index=np.arange(10)), pd.DataFrame(columns=np.arange(10)), pd.DataFrame(index=np.arange(10), columns=np.arange(10)), pd.DataFrame(index=np.arange(9)), pd.DataFrame(columns=np.arange(9)), pd.DataFrame(index=np.arange(9), columns=np.arange(9)) ] # each df should compare equal to itself for df in unequal_dfs: assert_data_frame_almost_equal(df, df) # every pair of dfs should not compare equal. use permutations instead # of combinations to test that comparing df1 to df2 and df2 to df1 are # both not equal for df1, df2 in itertools.permutations(unequal_dfs, 2): with self.assertRaises(AssertionError): assert_data_frame_almost_equal(df1, df2)
def test_ancom_percentiles_alt_categories(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'c', 'b', 'b', 'c'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [0.0, 25.0, 50.0, 75.0, 100.0] groups = ['a', 'b', 'c'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array([ [9.0], [9.75], [10.5], [11.25], [12.0], # a [20.0], [20.5], [21.0], [21.5], [22.0], # b [1.0], [6.5], [12.0], [17.5], [23.0] ]) # c exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1']) result = ancom(table, grouping, percentiles=percentiles)[1] assert_data_frame_almost_equal(result, exp)
def test_pearsonr(self): mpm1 = { 'Unknown': { 'sink1': 0.25 }, 'Source1': { 'sink1': 0.50 }, 'Source2': { 'sink1': 0.25 } } mpm1 = pd.DataFrame(mpm1) mpm2 = { 'Unknown': { 'sink1': 0.1 }, 'Source1': { 'sink1': 0.1 }, 'Source2': { 'sink1': 0.8 } } mpm2 = pd.DataFrame(mpm2) observed = compare_sinks(mpm1, mpm2, 'pearson') expected_ids = ['sink1'] # expected values computed by calling scipy.stats.pearsonr directly expected_values = [(-0.5, 2. / 3)] expected = pd.DataFrame(expected_values, index=expected_ids, columns=['Pearson r', 'p']) assert_data_frame_almost_equal(observed, expected)
def test_default_valid_single_line(self): fp = get_data_path('blast7_default_single_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[ 'query1', 'subject2', 100.00, 8.0, 0.0, 0.0, 1.0, 8.0, 3.0, 10.0, 9e-05, 16.9 ]], columns=[ 'qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore' ]) assert_data_frame_almost_equal(df, exp) fp = get_data_path('legacy9_single_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[ 'query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0, 1e-05, 15.5 ]], columns=[ 'qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore' ]) assert_data_frame_almost_equal(df, exp)
def test_euclidean(self): mpm1 = { 'Unknown': { 'sink1': 0.25 }, 'Source1': { 'sink1': 0.50 }, 'Source2': { 'sink1': 0.25 } } mpm1 = pd.DataFrame(mpm1) mpm2 = { 'Unknown': { 'sink1': 0.1 }, 'Source1': { 'sink1': 0.1 }, 'Source2': { 'sink1': 0.8 } } mpm2 = pd.DataFrame(mpm2) observed = compare_sinks(mpm1, mpm2, 'euclidean') expected_ids = ['sink1'] # expected values computed by calling # scipy.stats.spatial.distance.euclidean directly expected_values = [0.6964194] expected = pd.DataFrame(expected_values, index=expected_ids, columns=['Euclidean distance']) assert_data_frame_almost_equal(observed, expected)
def test_ancom_theta(self): result = ancom(self.table1, self.cats1, theta=0.3) exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp)
def test_ancom_multiple_comparisons(self): result = ancom(self.table1, self.cats1, multiple_comparisons_correction='holm-bonferroni', significance_test=scipy.stats.mannwhitneyu) exp = pd.DataFrame({'W': np.array([0]*7), 'reject': np.array([False]*7, dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_custom_valid_single_line(self): fp = get_data_path("blast7_custom_single_line") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 100.00, 100.00, 8.0, 0.0, 16.9, 8.0, 'PAAWWWWW']], columns=['qseqid', 'ppos', 'pident', 'length', 'sgi', 'bitscore', 'qend', 'qseq']) assert_data_frame_almost_equal(df, exp)
def test_ancom_no_percentiles(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) result = ancom(table, grouping, percentiles=[])[1] assert_data_frame_almost_equal(result, pd.DataFrame())
def test_minimal_compatible_input_with_labels(self): np.random.seed(0) obs = pwmantel(self.min_dms, alternative='greater', labels=('minx', 'miny', 'minz')) assert_data_frame_almost_equal( obs, self.exp_results_minimal_with_labels)
def test_minimal_compatible_input_with_labels(self): np.random.seed(0) obs = pwmantel(self.min_dms, alternative='greater', labels=('minx', 'miny', 'minz')) assert_data_frame_almost_equal(obs, self.exp_results_minimal_with_labels)
def test_bioenv_different_column_order(self): # Specifying columns in a different order will change the row labels in # the results data frame as the column subsets will be reordered, but # the actual results (e.g., correlation coefficients) shouldn't change. obs = bioenv(self.dm, self.df, columns=self.cols[::-1]) assert_data_frame_almost_equal( obs, self.exp_results_different_column_order)
def test_pearson_perfect(self): observed = compare_sinks(self.mpm1, self.mpm1, 'pearson') expected_ids = ['sink1', 'sink2', 'sink3', 'sink4', 'sink5', 'sink6'] expected_values = [(1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0), (1.0, 0.0)] expected = pd.DataFrame(expected_values, index=expected_ids, columns=['Pearson r', 'p']) assert_data_frame_almost_equal(observed, expected)
def test_filepaths_as_input(self): dms = [ get_data_path('dm.txt'), get_data_path('dm2.txt'), ] np.random.seed(0) obs = pwmantel(dms) assert_data_frame_almost_equal(obs, self.exp_results_dm_dm2)
def test_default_valid_single_line(self): fp = get_data_path('blast6_default_single_line') df = _blast6_to_data_frame(fp, default_columns=True) exp = pd.DataFrame([['query1', 'subject2', 75.0, 8.0, 2.0, 0.0, 1.0, 8.0, 2.0, 9.0, 0.06, 11.5]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp)
def test_bioenv_all_columns_implicit(self): # Test with all columns in data frame (implicitly). obs = bioenv(self.dm, self.df) assert_data_frame_almost_equal(obs, self.exp_results) # Should get the same results if order of rows/cols in distance matrix # is changed. obs = bioenv(self.dm_reordered, self.df) assert_data_frame_almost_equal(obs, self.exp_results)
def test_custom_valid_single_line(self): fp = get_data_path('blast6_custom_single_line') df = _blast6_to_data_frame(fp, columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) exp = pd.DataFrame([['query1', 'PAAWWWWW', 8.0, 1.0, 100.00, 8.0, 0.0]], columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) assert_data_frame_almost_equal(df, exp)
def test_ancom_letter_categories(self): result = ancom(self.table7, self.cats7, multiple_comparisons_correction=None) exp = pd.DataFrame({'W': np.array([5, 3, 3, 2, 2, 5, 2]), 'reject': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_ancom_alpha(self): result = ancom(self.table1, self.cats1, multiple_comparisons_correction=None, alpha=0.5) exp = pd.DataFrame( {'W': np.array([6, 6, 4, 5, 5, 4, 2]), 'Reject null hypothesis': np.array([True, True, False, True, True, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp)
def test_ancom_noncontiguous(self): result = ancom(self.table5, self.cats5, multiple_comparisons_correction=None) exp = pd.DataFrame({'W': np.array([6, 2, 2, 2, 2, 6, 2]), 'reject': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_ancom_unbalanced(self): result = ancom(self.table6, self.cats6, multiple_comparisons_correction=None) exp = pd.DataFrame( {'W': np.array([5, 3, 3, 2, 2, 5, 2]), 'Reject null hypothesis': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp)
def test_ancom_alternative_test(self): result = ancom(self.table1, self.cats1, multiple_comparisons_correction=None, significance_test=scipy.stats.ttest_ind) exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'reject': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result, exp)
def test_bioenv_all_columns_explicit(self): # Test with all columns being specified. obs = bioenv(self.dm, self.df, columns=self.cols) assert_data_frame_almost_equal(obs, self.exp_results) # Test against a data frame that has an extra non-numeric column and # some of the rows and columns reordered (we should get the same # result since we're specifying the same columns in the same order). obs = bioenv(self.dm, self.df_extra_column, columns=self.cols) assert_data_frame_almost_equal(obs, self.exp_results)
def test_init_nondefault_parameters(self): seq = ExampleGrammaredSequence( '.-ABCXYZ', metadata={'id': 'foo'}, positional_metadata={'quality': range(8)}) npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c')) self.assertEqual(seq.metadata, {'id': 'foo'}) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame({'quality': range(8)}))
def test_custom_valid_mixed_nans(self): fp = get_data_path("blast7_custom_mixed_nans") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[0.0, np.nan, 8.0, 13.0, 1.0, 1.0, np.nan, 'subject2'], [np.nan, 0.0, 8.0, np.nan, 1.0, 1.0, 'query1', np.nan]], columns=['qgi', 'sgi', 'qlen', 'slen', 'qframe', 'sframe', 'qseqid', 'sseqid']) assert_data_frame_almost_equal(df, exp)