def test_correlation_graphs(self): df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) graph = correlation_graphs(df_coeffs, self.df) n0 = list(graph.nodes(data=True))[0] n1 = list(graph.nodes(data=True))[-1] e0 = list(graph.edges(data=True))[0] e1 = list(graph.edges(data=True))[-1] # order is different between python 2 and 3 np.testing.assert_almost_equal( [n0[1]["mz"], n0[1]["intensity"], n0[1]["rt"]], [168.989654, 520.0, 120.0]) np.testing.assert_almost_equal( [n1[1]["mz"], n1[1]["intensity"], n1[1]["rt"]], [493.063765, 163.33, 192.5]) np.testing.assert_almost_equal([ e0[2]["rvalue"], e0[2]["pvalue"], e0[2]["mzdiff"], e0[2]["rtdiff"] ], [1.0, 0.0, 167.982378, 1.0]) np.testing.assert_almost_equal([ e1[2]["rvalue"], e1[2]["pvalue"], e1[2]["mzdiff"], e1[2]["rtdiff"] ], [1.0, 0.0, 1.003355, 2.5])
def group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None): conn = sqlite3.connect(db_out) cursor = conn.cursor() cursor.execute("DROP TABLE IF EXISTS groups") cursor.execute("""CREATE TABLE groups ( group_id INTEGER DEFAULT NULL, peak_id_a TEXT DEFAULT NULL, peak_id_b TEXT DEFAULT NULL, degree_a INTEGER DEFAULT NULL, degree_b INTEGER DEFAULT NULL, r_value REAL DEFAULT NULL, p_value REAL DEFAULT NULL, rt_diff REAL DEFAULT NULL, mz_diff REAL DEFAULT NULL, PRIMARY KEY (peak_id_a, peak_id_b));""") df_coeffs = statistics.correlation_coefficients(df, max_rt_diff, coeff_thres, pvalue_thres, method, block, ncpus) graph = statistics.correlation_graphs(df_coeffs, df) sub_graphs = list( graph.subgraph(c) for c in nx.weakly_connected_components(graph)) for i in range(len(sub_graphs)): sub_graphs[i].graph[ "groupid"] = i + 1 # not stored in output - place holder sub_graph_edges = [] # sort edges edges = sorted(sub_graphs[i].edges(data=True), key=lambda e: (e[0], e[1])) for edge in edges: sub_graph_edges.append( (i + 1, str(edge[0]), str(edge[1]), sub_graphs[i].degree(edge[0]), sub_graphs[i].degree(edge[1]), round(float(edge[2]["rvalue"]), 2), float(edge[2]["pvalue"]), float(edge[2]["rtdiff"]), float(edge[2]["mzdiff"]))) cursor.executemany( """insert into groups (group_id, peak_id_a, peak_id_b, degree_a, degree_b, r_value, p_value, rt_diff, mz_diff) values (?,?,?,?,?,?,?,?,?)""", sub_graph_edges) conn.commit() conn.close() return graph
def test_correlation_coefficients(self): df_coeffs_comp = pd.DataFrame( { "name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], "r_value": [ np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0) ], "p_value": [ np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(5.85415087865495e-157) ] }, columns=["name_a", "name_b", "r_value", "p_value"]) df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) df_coeffs_comp = pd.DataFrame( { "name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], "r_value": [ np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0) ], "p_value": [ np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0) ] }, columns=["name_a", "name_b", "r_value", "p_value"]) df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", block=5000, ncpus=None) pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) df_coeffs = correlation_coefficients(self.df, max_rt_diff=50000.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) self.assertEqual(df_coeffs.shape, (136, 4))