Пример #1
0
def test_edge_update_nested_lists():
    graph = GraphDocument(force_undirected=True)
    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_single_edge(doc0, doc1)
    edge_key = graph._get_edge_key(doc0.id, doc1.id)
    graph.edge_features[edge_key] = {
        'hey': {
            'nested': True,
            'list': ['elem1', 'elem2', {
                'inlist': 'here'
            }]
        },
        'hoy': [0, 1],
    }
    graph.edge_features[edge_key]['hey']['nested'] = False
    graph.edge_features[edge_key]['hey']['list'][1] = True
    graph.edge_features[edge_key]['hey']['list'][2]['inlist'] = 'not here'
    graph.edge_features[edge_key]['hoy'][0] = 1

    assert graph.edge_features[edge_key]['hey']['nested'] is False
    assert graph.edge_features[edge_key]['hey']['list'][1] is True
    assert graph.edge_features[edge_key]['hey']['list'][2][
        'inlist'] == 'not here'
    assert graph.edge_features[edge_key]['hoy'][0] == 1
Пример #2
0
def test_graph_add_multiple_nodes():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')
    graph.add_nodes([doc0, doc1, doc2, doc3])
    assert graph.num_nodes == 4
    assert graph.num_edges == 0
Пример #3
0
def test_add_single_edge_from_id_strings_non_existing_nodes():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    with pytest.raises(AssertionError):
        graph.add_single_edge(doc0.id,
                              doc1.id,
                              features={'text': 'I connect Doc0 and Doc1'})
Пример #4
0
def test_undirected_graph():
    graph = GraphDocument()
    assert graph.undirected is False

    undirected_graph = GraphDocument(force_undirected=True)
    assert undirected_graph.undirected is True

    graph_from_undirected_no_force = GraphDocument(undirected_graph)
    assert graph_from_undirected_no_force.undirected is True

    graph_from_undirected_proto_no_force = GraphDocument(
        undirected_graph.proto)
    assert graph_from_undirected_proto_no_force.undirected is True
Пример #5
0
def test_from_dgl_graph_without_edges():
    from dgl import DGLGraph

    dummy_graph = DGLGraph()
    dummy_graph.add_nodes(2)
    jina_graph = GraphDocument.load_from_dgl_graph(dummy_graph)
    assert jina_graph.num_nodes == 2
Пример #6
0
def test_from_dgl_graph(graph):
    dgl_graph = graph.to_dgl_graph()
    jina_graph = GraphDocument.load_from_dgl_graph(dgl_graph)
    assert graph.num_nodes == jina_graph.num_nodes
    assert graph.num_edges == jina_graph.num_edges
    assert (graph.adjacency.col == jina_graph.adjacency.col).all()
    assert (graph.adjacency.col == jina_graph.adjacency.col).all()
Пример #7
0
    def node_and_graph_encode(self, docs: DocumentArray, **kwargs):
        """
        This executor is going to add for each node an embedding computed as the sum of outgoing and incoming edges.
        Then is going to assign a graph embedding as the sum of the embeddings of all its nodes multiplied by the `feature` weight of each edge.

        .. # noqa: DAR201
        :param docs: Array of GraphDocuments
        """
        for doc in docs:
            graph = GraphDocument(doc)
            for node in graph.nodes:
                node.embedding = np.array(
                    [graph.get_out_degree(node) + graph.get_in_degree(node)])

            sum = 0
            for node in graph.nodes:
                node_embedding = node.embedding[0]
                for out_node in graph.get_outgoing_nodes(node):
                    node_embedding = (
                        node_embedding *
                        graph.edge_features[f'{node.id}-{out_node.id}']
                        ['weight'])
                for in_node in graph.get_incoming_nodes(node):
                    node_embedding = (
                        node_embedding *
                        graph.edge_features[f'{in_node.id}-{node.id}']
                        ['weight'])
                sum += node_embedding
            graph.embedding = np.array([sum])
Пример #8
0
def graph():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_single_edge(doc0, doc1, features={'weight': 1})
    graph.add_single_edge(doc0, doc2, features={'weight': 1})
    graph.add_single_edge(doc2, doc1, features={'weight': 10})
    graph.add_single_edge(doc1, doc3, features={'weight': 1})
    graph.add_single_edge(doc2, doc3, features={'weight': 1})
    return graph
Пример #9
0
def test_add_multiple_edges_from_string():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_nodes([doc0, doc1, doc2, doc3])

    graph.add_edges(
        [doc0.id, doc0.id, doc2.id, doc1.id, doc2.id],
        [doc1.id, doc2.id, doc1.id, doc3.id, doc3.id],
        edge_features=[
            {
                'text': 'I connect Doc0 and Doc1'
            },
            {
                'text': 'I connect Doc0 and Doc2'
            },
            {
                'text': 'I connect Doc2 and Doc1'
            },
            {
                'text': 'I connect Doc1 and Doc3'
            },
            {
                'text': 'I connect Doc2 and Doc3'
            },
        ],
    )

    validate_graph(graph)
Пример #10
0
def graph():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'})
    graph.add_edge(doc0, doc2, features={'text': 'I connect Doc0 and Doc2'})
    graph.add_edge(doc2, doc1, features={'text': 'I connect Doc2 and Doc1'})
    graph.add_edge(doc1, doc3, features={'text': 'I connect Doc1 and Doc3'})
    graph.add_edge(doc2, doc3, features={'text': 'I connect Doc2 and Doc3'})
    return graph
Пример #11
0
def test_undirected_graph_to_dgl(graph):
    dgl_graph = graph.to_dgl_graph()
    dgl_adj_coo = dgl_graph.adjacency_matrix(scipy_fmt='coo')

    assert dgl_graph.num_nodes() == graph.num_nodes
    assert dgl_graph.num_edges() == graph.num_edges
    assert (graph.adjacency.row == dgl_adj_coo.row).all()
    assert (graph.adjacency.col == dgl_adj_coo.col).all()

    undirected_graph = GraphDocument(graph, force_undirected=True)
    dgl_undirected_graph = undirected_graph.to_dgl_graph()
    dgl_undirected_adj_coo = dgl_undirected_graph.adjacency_matrix(
        scipy_fmt='coo')

    assert dgl_undirected_graph.num_nodes() == graph.num_nodes
    assert dgl_undirected_graph.num_edges() == graph.num_edges * 2
    assert (np.concatenate(
        (undirected_graph.adjacency.row,
         undirected_graph.adjacency.col)) == dgl_undirected_adj_coo.row).all()
    assert (np.concatenate(
        (undirected_graph.adjacency.col,
         undirected_graph.adjacency.row)) == dgl_undirected_adj_coo.col).all()
Пример #12
0
def test_remove_single_node_from_string_non_existing():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    graph.add_node(doc0)
    assert graph.num_nodes == 1
    graph.remove_single_node(doc1.id)
    assert graph.num_nodes == 1
Пример #13
0
def test_remove_single_node_from_string():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_single_node(doc0)
    graph.add_single_node(doc1)
    graph.add_single_node(doc2)
    graph.add_single_node(doc3)
    assert len(graph.nodes) == 4
    assert doc0.id in graph.nodes
    graph.remove_single_node(doc0.id)
    assert len(graph.nodes) == 3
    assert doc0.id not in graph.nodes
Пример #14
0
 def validate_resp(resp):
     assert len(resp.data.docs) == 1
     for doc in resp.data.docs:
         graph = GraphDocument(doc)
         assert graph.embedding[0] == 64
         assert len(graph.nodes) == 4
         for i, node in enumerate(graph.nodes):
             if i == 0:
                 assert node.embedding[0] == 2
             if i == 1:
                 assert node.embedding[0] == 3
             if i == 2:
                 assert node.embedding[0] == 3
             if i == 3:
                 assert node.embedding[0] == 2
Пример #15
0
def test_add_remove_node_deprecated():
    graph = GraphDocument()
    d1 = Document(id='1')
    d2 = Document(id='2')
    graph.add_node(d1)
    graph.add_node(d2)
    assert len(graph.nodes) == 2
    graph.remove_node(d1)
    graph.remove_node(d2)
    assert len(graph.nodes) == 0
Пример #16
0
def test_add_remove_edge_deprecated():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'})
    assert graph.num_nodes == 2
    assert graph.num_edges == 1
    graph.remove_edge(doc0, doc1)
    assert graph.num_nodes == 2
    assert graph.num_edges == 0
Пример #17
0
def test_remove_single_edge_from_string():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_nodes([doc0, doc1, doc2, doc3])

    graph.add_single_edge(doc0.id,
                          doc1.id,
                          features={'text': 'I connect Doc0 and Doc1'})
    graph.add_single_edge(doc0.id,
                          doc2.id,
                          features={'text': 'I connect Doc0 and Doc2'})
    graph.add_single_edge(doc2.id,
                          doc1.id,
                          features={'text': 'I connect Doc2 and Doc1'})
    graph.add_single_edge(doc1.id,
                          doc3.id,
                          features={'text': 'I connect Doc1 and Doc3'})
    graph.add_single_edge(doc2.id,
                          doc3.id,
                          features={'text': 'I connect Doc2 and Doc3'})
    assert graph.num_nodes == 4
    assert graph.num_edges == 5
    graph.remove_single_edge(doc0.id, doc1.id)
    assert graph.num_nodes == 4
    assert graph.num_edges == 4
Пример #18
0
def test_add_single_edge_from_id_strings():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')
    graph.add_nodes([doc0, doc1, doc2, doc3])

    graph.add_single_edge(doc0.id,
                          doc1.id,
                          features={'text': 'I connect Doc0 and Doc1'})
    graph.add_single_edge(doc0.id,
                          doc2.id,
                          features={'text': 'I connect Doc0 and Doc2'})
    graph.add_single_edge(doc2.id,
                          doc1.id,
                          features={'text': 'I connect Doc2 and Doc1'})
    graph.add_single_edge(doc1.id,
                          doc3.id,
                          features={'text': 'I connect Doc1 and Doc3'})
    graph.add_single_edge(doc2.id,
                          doc3.id,
                          features={'text': 'I connect Doc2 and Doc3'})
    validate_graph(graph)
Пример #19
0
    dgl_undirected_adj_coo = dgl_undirected_graph.adjacency_matrix(
        scipy_fmt='coo')

    assert dgl_undirected_graph.num_nodes() == graph.num_nodes
    assert dgl_undirected_graph.num_edges() == graph.num_edges * 2
    assert (np.concatenate(
        (undirected_graph.adjacency.row,
         undirected_graph.adjacency.col)) == dgl_undirected_adj_coo.row).all()
    assert (np.concatenate(
        (undirected_graph.adjacency.col,
         undirected_graph.adjacency.row)) == dgl_undirected_adj_coo.col).all()


@pytest.mark.parametrize(
    'graph, expected_output',
    [(GraphDocument(force_undirected=True), 1), (GraphDocument(), 2)],
)
def test_graph_edge_behaviour_creation(graph, expected_output):
    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_single_edge(doc0, doc1)
    graph.add_single_edge(doc1, doc0)

    assert graph.num_edges == expected_output


@pytest.mark.parametrize(
    'graph, expected_output',
    [(GraphDocument(force_undirected=True), 1), (GraphDocument(), 2)],
)
Пример #20
0
def test_graph_add_multiple_edges():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')
    graph.add_edges(
        [doc0, doc0, doc2, doc1, doc2],
        [doc1, doc2, doc1, doc3, doc3],
        edge_features=[
            {
                'text': 'I connect Doc0 and Doc1'
            },
            {
                'text': 'I connect Doc0 and Doc2'
            },
            {
                'text': 'I connect Doc2 and Doc1'
            },
            {
                'text': 'I connect Doc1 and Doc3'
            },
            {
                'text': 'I connect Doc2 and Doc3'
            },
        ],
    )
    assert graph.num_nodes == 4
    assert graph.num_edges == 5

    doc0 = graph.chunks[0]
    assert doc0.text == 'Document0'
    doc1 = graph.chunks[1]
    assert doc1.text == 'Document1'
    doc2 = graph.chunks[2]
    assert doc2.text == 'Document2'
    doc3 = graph.chunks[3]
    assert doc3.text == 'Document3'

    edge_features = graph.edge_features
    for i, (d1, d2) in enumerate(graph):
        if i == 0:
            assert (edge_features[f'{d1.id}-{d2.id}']['text'] ==
                    'I connect Doc0 and Doc1')
            assert d1.text == 'Document0'
            assert d2.text == 'Document1'
        if i == 1:
            assert (edge_features[f'{d1.id}-{d2.id}']['text'] ==
                    'I connect Doc0 and Doc2')
            assert d1.text == 'Document0'
            assert d2.text == 'Document2'
        if i == 2:
            assert (edge_features[f'{d1.id}-{d2.id}']['text'] ==
                    'I connect Doc2 and Doc1')
            assert d1.text == 'Document2'
            assert d2.text == 'Document1'
        if i == 3:
            assert (edge_features[f'{d1.id}-{d2.id}']['text'] ==
                    'I connect Doc1 and Doc3')
            assert d1.text == 'Document1'
            assert d2.text == 'Document3'
        if i == 4:
            assert (edge_features[f'{d1.id}-{d2.id}']['text'] ==
                    'I connect Doc2 and Doc3')
            assert d1.text == 'Document2'
            assert d2.text == 'Document3'
Пример #21
0
def test_graph_document_from_graph(graph):
    graph2 = GraphDocument(graph)
    validate_graph(graph2)
Пример #22
0
def test_graph_document_from_proto(graph):
    graph2 = GraphDocument(graph._pb_body)
    validate_graph(graph2)
Пример #23
0
def test_validate_iteration_graph_without_edges():
    graph = GraphDocument()
    assert len([x for x in graph]) == 0