def test_add_nodes(add_text, add_citation):

    """
    OSP_Graph#add_nodes() should register nodes for all texts.
    """

    t1 = add_text(title='title1', surname='surname1')
    t2 = add_text(title='title2', surname='surname2')

    for i in range(3):
        add_citation(text=t1)

    for i in range(1):
        add_citation(text=t2)

    g = OSP_Graph()

    g.add_nodes()

    n1 = g.graph.node[t1.id]
    n2 = g.graph.node[t2.id]

    assert n1['label'] == t1.pretty('title')
    assert n2['label'] == t2.pretty('title')

    assert n1['author'] == t1.pretty('surname')
    assert n2['author'] == t2.pretty('surname')

    assert n1['count'] == 3
    assert n2['count'] == 1

    assert n1['score'] == 2/2
    assert n2['score'] == 1/2
def test_ignore_docs_with_too_many_texts(add_text, add_doc, add_citation):
    """
    Ignore docs with more than a given number of texts.
    """

    d1 = add_doc()
    d2 = add_doc()

    t1 = add_text()
    t2 = add_text()

    t3 = add_text()
    t4 = add_text()
    t5 = add_text()

    # 2 citations on d1.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)

    # 3 citations on d1.
    add_citation(document=d2, text=t3)
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    g = OSP_Graph()

    g.add_edges(max_texts=2)

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore texts on d2, which has too many texts.
    assert not g.graph.has_node(t3.id)
    assert not g.graph.has_node(t4.id)
    assert not g.graph.has_node(t5.id)
def test_ignore_hidden_texts(add_text, add_doc, add_citation):

    """
    Ignore citations for un-displayed texts.
    """

    d1 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text(display=False)

    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore hidden t3.
    assert not g.graph.has_node(t3.id)
def test_ignore_invalid_texts(add_text, add_doc, add_citation):

    """
    Ignore citations for invalid texts.
    """

    d1 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text(valid=False)

    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore invalid t3.
    assert not g.graph.has_node(t3.id)
def test_add_edges(add_text, add_doc, add_citation):

    """
    OSP_Graph#add_edges() should register edges from the citation table.
    """

    d1 = add_doc()
    d2 = add_doc()
    d3 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    # Texts 1-4 in d1.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)
    add_citation(document=d1, text=t4)

    # Texts 2-5 in d2.
    add_citation(document=d2, text=t2)
    add_citation(document=d2, text=t3)
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    # Texts 3-6 in d3.
    add_citation(document=d3, text=t3)
    add_citation(document=d3, text=t4)
    add_citation(document=d3, text=t5)
    add_citation(document=d3, text=t6)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.edge[t1.id][t2.id]["weight"] == 1
    assert g.graph.edge[t2.id][t3.id]["weight"] == 2
    assert g.graph.edge[t3.id][t4.id]["weight"] == 3
    assert g.graph.edge[t4.id][t5.id]["weight"] == 2
    assert g.graph.edge[t5.id][t6.id]["weight"] == 1
def test_add_edges(add_text, add_doc, add_citation):
    """
    OSP_Graph#add_edges() should register edges from the citation table.
    """

    d1 = add_doc()
    d2 = add_doc()
    d3 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    # Texts 1-4 in d1.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)
    add_citation(document=d1, text=t4)

    # Texts 2-5 in d2.
    add_citation(document=d2, text=t2)
    add_citation(document=d2, text=t3)
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    # Texts 3-6 in d3.
    add_citation(document=d3, text=t3)
    add_citation(document=d3, text=t4)
    add_citation(document=d3, text=t5)
    add_citation(document=d3, text=t6)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.edge[t1.id][t2.id]['weight'] == 1
    assert g.graph.edge[t2.id][t3.id]['weight'] == 2
    assert g.graph.edge[t3.id][t4.id]['weight'] == 3
    assert g.graph.edge[t4.id][t5.id]['weight'] == 2
    assert g.graph.edge[t5.id][t6.id]['weight'] == 1
def test_trim_unconnected_components(add_text, add_doc, add_citation):

    """
    OSP_Graph#trim_unconnected_components() should remove all subgraphs that
    aren't connected to the largest subgraph.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()

    d1 = add_doc()
    d2 = add_doc()

    # 3-node component.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    # 2-node component.
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    g = OSP_Graph()

    g.add_edges()

    g.trim_unconnected_components()

    # Keep largest component.
    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)
    assert g.graph.has_node(t3.id)

    # Remove smaller component.
    assert not g.graph.has_node(t4.id)
    assert not g.graph.has_node(t5.id)
def test_trim_texts_by_count(add_text, add_doc, add_citation):

    """
    OSP_Graph#trim_texts_by_count() should remove all texts with fewer than a
    given number of total citations.
    """

    docs = [
        add_doc(),
        add_doc(),
        add_doc(),
        add_doc(),
    ]

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    # 4 citations to t1.
    for i in range(4):
        add_citation(document=docs[i], text=t1)

    # 3 citations to t2.
    for i in range(3):
        add_citation(document=docs[i], text=t2)

    # 2 citations to t3.
    for i in range(2):
        add_citation(document=docs[i], text=t3)

    # 1 citation to t4.
    for i in range(1):
        add_citation(document=docs[i], text=t4)

    g = OSP_Graph()

    g.add_edges()
    g.add_nodes()

    g.trim_texts_by_count(min_count=3)

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    assert not g.graph.has_node(t3.id)
    assert not g.graph.has_node(t4.id)
def test_ignore_hidden_texts(add_text, add_doc, add_citation):
    """
    Ignore citations for un-displayed texts.
    """

    d1 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text(display=False)

    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore hidden t3.
    assert not g.graph.has_node(t3.id)
def test_ignore_invalid_texts(add_text, add_doc, add_citation):
    """
    Ignore citations for invalid texts.
    """

    d1 = add_doc()

    t1 = add_text()
    t2 = add_text()
    t3 = add_text(valid=False)

    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    g = OSP_Graph()

    g.add_edges()

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore invalid t3.
    assert not g.graph.has_node(t3.id)
def test_ignore_docs_with_too_many_texts(add_text, add_doc, add_citation):

    """
    Ignore docs with more than a given number of texts.
    """

    d1 = add_doc()
    d2 = add_doc()

    t1 = add_text()
    t2 = add_text()

    t3 = add_text()
    t4 = add_text()
    t5 = add_text()

    # 2 citations on d1.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)

    # 3 citations on d1.
    add_citation(document=d2, text=t3)
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    g = OSP_Graph()

    g.add_edges(max_texts=2)

    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)

    # Ignore texts on d2, which has too many texts.
    assert not g.graph.has_node(t3.id)
    assert not g.graph.has_node(t4.id)
    assert not g.graph.has_node(t5.id)
示例#12
0
def test_trim_unconnected_components(add_text, add_doc, add_citation):
    """
    OSP_Graph#trim_unconnected_components() should remove all subgraphs that
    aren't connected to the largest subgraph.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()

    d1 = add_doc()
    d2 = add_doc()

    # 3-node component.
    add_citation(document=d1, text=t1)
    add_citation(document=d1, text=t2)
    add_citation(document=d1, text=t3)

    # 2-node component.
    add_citation(document=d2, text=t4)
    add_citation(document=d2, text=t5)

    g = OSP_Graph()

    g.add_edges()

    g.trim_unconnected_components()

    # Keep largest component.
    assert g.graph.has_node(t1.id)
    assert g.graph.has_node(t2.id)
    assert g.graph.has_node(t3.id)

    # Remove smaller component.
    assert not g.graph.has_node(t4.id)
    assert not g.graph.has_node(t5.id)