示例#1
0
    def testDataFrame(self):
        edges = pd.DataFrame([["C", "A", 0.4], ["A", "B", 0.1]],
                             columns=[0, 1, "weight"])
        g = Graph.DataFrame(edges, directed=False)
        self.assertTrue(g.es["weight"] == [0.4, 0.1])

        vertices = pd.DataFrame(
            [["A", "blue"], ["B", "yellow"], ["C", "blue"]],
            columns=[0, "color"])
        g = Graph.DataFrame(edges, directed=True, vertices=vertices)
        self.assertTrue(g.vs["name"] == ["A", "B", "C"])
        self.assertTrue(g.vs["color"] == ["blue", "yellow", "blue"])
        self.assertTrue(g.es["weight"] == [0.4, 0.1])

        # Issue #347
        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        vertices = pd.DataFrame({
            "node": [1, 2, 3, 4, 5, 6],
            "label": ["1", "2", "3", "4", "5", "6"]
        })[["node", "label"]]
        g = Graph.DataFrame(
            edges,
            directed=True,
            vertices=vertices,
        )
        self.assertTrue(g.vs["name"] == [1, 2, 3, 4, 5, 6])
        self.assertTrue(g.vs["label"] == ["1", "2", "3", "4", "5", "6"])

        # Vertex ids
        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        g = Graph.DataFrame(edges)
        self.assertTrue(g.vcount() == 6)

        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        g = Graph.DataFrame(edges, use_vids=True)
        self.assertTrue(g.vcount() == 7)
示例#2
0
def clustering(df_fingerprint_comparison, df_single_taxonomy):
    """
    Obtains two data frames. One with only single entries in the taxonomy row and the other with
    the two aglycons, their Tanimoto Index.

    Creates clusters for the data frame with the Tanimoto Index using the igraph modul.

    Passes a data frame with single taxonomy entries and the created cluster object and the cluster nodes.
    """
    df_all_aglycons_with_single_taxonomy = pd.read_pickle(df_single_taxonomy)
    fpc = pd.read_pickle(df_fingerprint_comparison)
    fpc_graph = Graph.DataFrame(fpc, directed=False)
    fpc_cluster = Graph.components(fpc_graph)
    fpc_nodes = fpc_graph.get_vertex_dataframe()
    with open("output_data/clustered_similarity_network.txt", "wb") as outfile:
        pickle.dump(fpc_cluster, outfile)
    cluster_in_lists(fpc_cluster, fpc_nodes,
                     df_all_aglycons_with_single_taxonomy)
示例#3
0
    def testDataFrame(self):
        edges = pd.DataFrame(
            [["C", "A", 0.4], ["A", "B", 0.1]], columns=[0, 1, "weight"]
        )
        g = Graph.DataFrame(edges, directed=False)
        self.assertTrue(g.es["weight"] == [0.4, 0.1])

        vertices = pd.DataFrame(
            [["A", "blue"], ["B", "yellow"], ["C", "blue"]], columns=[0, "color"]
        )
        g = Graph.DataFrame(edges, directed=True, vertices=vertices)
        self.assertTrue(g.vs["name"] == ["A", "B", "C"])
        self.assertTrue(g.vs["color"] == ["blue", "yellow", "blue"])
        self.assertTrue(g.es["weight"] == [0.4, 0.1])

        # Issue #347
        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        vertices = pd.DataFrame(
            {"node": [1, 2, 3, 4, 5, 6], "label": ["1", "2", "3", "4", "5", "6"]}
        )[["node", "label"]]
        g = Graph.DataFrame(
            edges,
            directed=True,
            vertices=vertices
        )
        self.assertTrue(g.vs["name"] == [1, 2, 3, 4, 5, 6])
        self.assertTrue(g.vs["label"] == ["1", "2", "3", "4", "5", "6"])

        # Vertex ids
        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        g = Graph.DataFrame(edges)
        self.assertTrue(g.vcount() == 6)

        edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
        g = Graph.DataFrame(edges, use_vids=True)
        self.assertTrue(g.vcount() == 7)

        # Graph clone
        g = Graph.Full(n=100, directed=True, loops=True)
        g.vs["name"] = [f"v{i}" for i in range(g.vcount())]
        g.vs["x"] = [float(i) for i in range(g.vcount())]
        g.es["w"] = [1.0] * g.ecount()
        df_edges = g.get_edge_dataframe()
        df_vertices = g.get_vertex_dataframe()
        g_clone = Graph.DataFrame(df_edges, g.is_directed(), df_vertices, True)
        self.assertTrue(df_edges.equals(g_clone.get_edge_dataframe()))
        self.assertTrue(df_vertices.equals(g_clone.get_vertex_dataframe()))

        # Invalid input
        with self.assertRaisesRegex(ValueError, "two columns"):
            edges = pd.DataFrame({"source": [1, 2, 3]})
            Graph.DataFrame(edges)
        with self.assertRaisesRegex(ValueError, "one column"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            Graph.DataFrame(edges, vertices=pd.DataFrame())
        with self.assertRaisesRegex(TypeError, "integers"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}).astype(str)
            Graph.DataFrame(edges, use_vids=True)
        with self.assertRaisesRegex(ValueError, "negative"):
            edges = -pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            Graph.DataFrame(edges, use_vids=True)
        with self.assertRaisesRegex(TypeError, "integers"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3]}, index=["1", "2", "3"])
            Graph.DataFrame(edges, vertices=vertices, use_vids=True)
        with self.assertRaisesRegex(ValueError, "negative"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3]}, index=[-1, 2, 3])
            Graph.DataFrame(edges, vertices=vertices, use_vids=True)
        with self.assertRaisesRegex(ValueError, "sequence"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3]}, index=[1, 2, 4])
            Graph.DataFrame(edges, vertices=vertices, use_vids=True)
        with self.assertRaisesRegex(TypeError, "integers"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3]}, index=pd.MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)]))
            Graph.DataFrame(edges, vertices=vertices, use_vids=True)
        with self.assertRaisesRegex(ValueError, "unique"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 2]})
            Graph.DataFrame(edges, vertices=vertices)
        with self.assertRaisesRegex(ValueError, "already contains"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3], "name": [1, 2, 2]})
            Graph.DataFrame(edges, vertices=vertices)
        with self.assertRaisesRegex(ValueError, "missing from"):
            edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
            vertices = pd.DataFrame({0: [1, 2, 3]}, index=[0, 1, 2])
            Graph.DataFrame(edges, vertices=vertices, use_vids=True)