def test_search_on_total_counts(params, add_text, add_citation): """ If a search query is provided, filter the results on the query. """ t1 = add_text(**params('match one')) t2 = add_text(**params('two')) t3 = add_text(**params('match three')) t4 = add_text(**params('four')) for i in range(4): add_citation(text=t1) for i in range(3): add_citation(text=t2) for i in range(2): add_citation(text=t3) for i in range(1): add_citation(text=t4) Text_Index.es_insert() texts = Text_Index.materialize_ranking(query='match') assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t3.id)
def assigned_with(text_id, size=1000): """ Given a "seed" text, rank other texts assigned on the same syllabi. Args: text_id (int): The text id. Returns: dict: Elasticsearch hits. """ # Get syllabi that assign the text. doc_ids = Citation_Index.docs_with_text(text_id) # Rank texts assigned by those sylalbi. ranks = Citation_Index.compute_ranking(dict( document_id=doc_ids )) # Omit the seed text. ranks.pop(str(text_id)) # Materialize the text metadata. texts = Text_Index.materialize_ranking(ranks, size=size) return texts
def test_sort_on_total_counts_by_default(add_text, add_citation): """ By default return results sorted on the total citation count. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(3): add_citation(text=t1) for i in range(2): add_citation(text=t2) for i in range(1): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking() assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation): """ If a text -> count map is passed, sort on the filtered counts. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(30): add_citation(text=t1) for i in range(20): add_citation(text=t2) for i in range(10): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking(ranks={ t1.id: 1, t2.id: 2, t3.id: 3, }) assert texts['hits'][0]['_id'] == str(t3.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t1.id)
def test_sort_on_total_counts_by_default(add_text, add_citation): """ By default return results sorted on the total citation count. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(3): add_citation(text=t1) for i in range(2): add_citation(text=t2) for i in range(1): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking() assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t3.id)
def test_search_on_total_counts(params, add_text, add_citation): """ If a search query is provided, filter the results on the query. """ t1 = add_text(**params('match one')) t2 = add_text(**params('two')) t3 = add_text(**params('match three')) t4 = add_text(**params('four')) for i in range(4): add_citation(text=t1) for i in range(3): add_citation(text=t2) for i in range(2): add_citation(text=t3) for i in range(1): add_citation(text=t4) Text_Index.es_insert() texts = Text_Index.materialize_ranking(query='match') assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation): """ If a text -> count map is passed, sort on the filtered counts. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(30): add_citation(text=t1) for i in range(20): add_citation(text=t2) for i in range(10): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking(ranks={ t1.id: 1, t2.id: 2, t3.id: 3, }) assert texts['hits'][0]['_id'] == str(t3.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t1.id)
def assigned_with(text_id, size=200): """ Given a "seed" text, rank other texts assigned on the same syllabi. Args: text_id (int): The text id. Returns: dict: Elasticsearch hits. """ # Get syllabi that assign the text. doc_ids = Citation_Index.docs_with_text(text_id) # Rank texts assigned by those sylalbi. ranks = Citation_Index.compute_ranking(dict( document_id=doc_ids )) # Omit the seed text. ranks.pop(str(text_id)) # Materialize the text metadata. texts = Text_Index.materialize_ranking(ranks, size=size) return texts
def rank_texts(filters={}, query=None, size=1000, page=1): """ Filter and rank texts. Args: filters (dict): Citation metadata filters. query (str): A text metadata search query. size (str): Return N results. page (str): 1-indexed page offset. Returns: dict: Elasticsearch hits. """ # Filter citation counts, if non-empty filters. if any(filters.values()): ranks = Citation_Index.compute_ranking(filters) else: ranks = None # Materialize the text metadata. texts = Text_Index.materialize_ranking(ranks, query, size, page) return texts
def test_paginate_results(add_text, add_citation): """ When a page is provided, return the 1-indexed page. """ # 9 texts - the first with 9 citations, second with 8, etc. texts = [] for i in reversed(range(1, 10)): text = add_text() for j in range(i): add_citation(text=text) texts.append(text) Text_Index.es_insert() # Get first page by default. p1 = Text_Index.materialize_ranking(size=3) assert len(p1['hits']) == 3 assert p1['hits'][0]['_id'] == str(texts[0].id) assert p1['hits'][1]['_id'] == str(texts[1].id) assert p1['hits'][2]['_id'] == str(texts[2].id) p2 = Text_Index.materialize_ranking(size=3, page=2) assert len(p1['hits']) == 3 assert p2['hits'][0]['_id'] == str(texts[3].id) assert p2['hits'][1]['_id'] == str(texts[4].id) assert p2['hits'][2]['_id'] == str(texts[5].id) p3 = Text_Index.materialize_ranking(size=3, page=3) assert len(p1['hits']) == 3 assert p3['hits'][0]['_id'] == str(texts[6].id) assert p3['hits'][1]['_id'] == str(texts[7].id) assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_paginate_results(add_text, add_citation): """ When a page is provided, return the 1-indexed page. """ # 9 texts - the first with 9 citations, second with 8, etc. texts = [] for i in reversed(range(1, 10)): text = add_text() for j in range(i): add_citation(text=text) texts.append(text) Text_Index.es_insert() # Get first page by default. p1 = Text_Index.materialize_ranking(size=3) assert len(p1['hits']) == 3 assert p1['hits'][0]['_id'] == str(texts[0].id) assert p1['hits'][1]['_id'] == str(texts[1].id) assert p1['hits'][2]['_id'] == str(texts[2].id) p2 = Text_Index.materialize_ranking(size=3, page=2) assert len(p1['hits']) == 3 assert p2['hits'][0]['_id'] == str(texts[3].id) assert p2['hits'][1]['_id'] == str(texts[4].id) assert p2['hits'][2]['_id'] == str(texts[5].id) p3 = Text_Index.materialize_ranking(size=3, page=3) assert len(p1['hits']) == 3 assert p3['hits'][0]['_id'] == str(texts[6].id) assert p3['hits'][1]['_id'] == str(texts[7].id) assert p3['hits'][2]['_id'] == str(texts[8].id)