def test_add_nodes(add_text, add_citation): """ OSP_Graph#add_nodes() should register nodes for all texts. """ t1 = add_text(title='title1', surname='surname1') t2 = add_text(title='title2', surname='surname2') for i in range(3): add_citation(text=t1) for i in range(1): add_citation(text=t2) g = OSP_Graph() g.add_nodes() n1 = g.graph.node[t1.id] n2 = g.graph.node[t2.id] assert n1['label'] == t1.pretty('title') assert n2['label'] == t2.pretty('title') assert n1['author'] == t1.pretty('surname') assert n2['author'] == t2.pretty('surname') assert n1['count'] == 3 assert n2['count'] == 1 assert n1['score'] == 2/2 assert n2['score'] == 1/2
def test_ignore_docs_with_too_many_texts(add_text, add_doc, add_citation): """ Ignore docs with more than a given number of texts. """ d1 = add_doc() d2 = add_doc() t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() # 2 citations on d1. add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) # 3 citations on d1. add_citation(document=d2, text=t3) add_citation(document=d2, text=t4) add_citation(document=d2, text=t5) g = OSP_Graph() g.add_edges(max_texts=2) assert g.graph.has_node(t1.id) assert g.graph.has_node(t2.id) # Ignore texts on d2, which has too many texts. assert not g.graph.has_node(t3.id) assert not g.graph.has_node(t4.id) assert not g.graph.has_node(t5.id)
def test_ignore_hidden_texts(add_text, add_doc, add_citation): """ Ignore citations for un-displayed texts. """ d1 = add_doc() t1 = add_text() t2 = add_text() t3 = add_text(display=False) add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) add_citation(document=d1, text=t3) g = OSP_Graph() g.add_edges() assert g.graph.has_node(t1.id) assert g.graph.has_node(t2.id) # Ignore hidden t3. assert not g.graph.has_node(t3.id)
def test_ignore_invalid_texts(add_text, add_doc, add_citation): """ Ignore citations for invalid texts. """ d1 = add_doc() t1 = add_text() t2 = add_text() t3 = add_text(valid=False) add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) add_citation(document=d1, text=t3) g = OSP_Graph() g.add_edges() assert g.graph.has_node(t1.id) assert g.graph.has_node(t2.id) # Ignore invalid t3. assert not g.graph.has_node(t3.id)
def test_add_edges(add_text, add_doc, add_citation): """ OSP_Graph#add_edges() should register edges from the citation table. """ d1 = add_doc() d2 = add_doc() d3 = add_doc() t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() t6 = add_text() # Texts 1-4 in d1. add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) add_citation(document=d1, text=t3) add_citation(document=d1, text=t4) # Texts 2-5 in d2. add_citation(document=d2, text=t2) add_citation(document=d2, text=t3) add_citation(document=d2, text=t4) add_citation(document=d2, text=t5) # Texts 3-6 in d3. add_citation(document=d3, text=t3) add_citation(document=d3, text=t4) add_citation(document=d3, text=t5) add_citation(document=d3, text=t6) g = OSP_Graph() g.add_edges() assert g.graph.edge[t1.id][t2.id]["weight"] == 1 assert g.graph.edge[t2.id][t3.id]["weight"] == 2 assert g.graph.edge[t3.id][t4.id]["weight"] == 3 assert g.graph.edge[t4.id][t5.id]["weight"] == 2 assert g.graph.edge[t5.id][t6.id]["weight"] == 1
def test_add_edges(add_text, add_doc, add_citation): """ OSP_Graph#add_edges() should register edges from the citation table. """ d1 = add_doc() d2 = add_doc() d3 = add_doc() t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() t6 = add_text() # Texts 1-4 in d1. add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) add_citation(document=d1, text=t3) add_citation(document=d1, text=t4) # Texts 2-5 in d2. add_citation(document=d2, text=t2) add_citation(document=d2, text=t3) add_citation(document=d2, text=t4) add_citation(document=d2, text=t5) # Texts 3-6 in d3. add_citation(document=d3, text=t3) add_citation(document=d3, text=t4) add_citation(document=d3, text=t5) add_citation(document=d3, text=t6) g = OSP_Graph() g.add_edges() assert g.graph.edge[t1.id][t2.id]['weight'] == 1 assert g.graph.edge[t2.id][t3.id]['weight'] == 2 assert g.graph.edge[t3.id][t4.id]['weight'] == 3 assert g.graph.edge[t4.id][t5.id]['weight'] == 2 assert g.graph.edge[t5.id][t6.id]['weight'] == 1
def test_trim_unconnected_components(add_text, add_doc, add_citation): """ OSP_Graph#trim_unconnected_components() should remove all subgraphs that aren't connected to the largest subgraph. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() d1 = add_doc() d2 = add_doc() # 3-node component. add_citation(document=d1, text=t1) add_citation(document=d1, text=t2) add_citation(document=d1, text=t3) # 2-node component. add_citation(document=d2, text=t4) add_citation(document=d2, text=t5) g = OSP_Graph() g.add_edges() g.trim_unconnected_components() # Keep largest component. assert g.graph.has_node(t1.id) assert g.graph.has_node(t2.id) assert g.graph.has_node(t3.id) # Remove smaller component. assert not g.graph.has_node(t4.id) assert not g.graph.has_node(t5.id)
def test_trim_texts_by_count(add_text, add_doc, add_citation): """ OSP_Graph#trim_texts_by_count() should remove all texts with fewer than a given number of total citations. """ docs = [ add_doc(), add_doc(), add_doc(), add_doc(), ] t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() # 4 citations to t1. for i in range(4): add_citation(document=docs[i], text=t1) # 3 citations to t2. for i in range(3): add_citation(document=docs[i], text=t2) # 2 citations to t3. for i in range(2): add_citation(document=docs[i], text=t3) # 1 citation to t4. for i in range(1): add_citation(document=docs[i], text=t4) g = OSP_Graph() g.add_edges() g.add_nodes() g.trim_texts_by_count(min_count=3) assert g.graph.has_node(t1.id) assert g.graph.has_node(t2.id) assert not g.graph.has_node(t3.id) assert not g.graph.has_node(t4.id)