def test_sort_on_total_counts_by_default(add_text, add_citation): """ By default return results sorted on the total citation count. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(3): add_citation(text=t1) for i in range(2): add_citation(text=t2) for i in range(1): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking() assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t3.id)
def test_size(add_text, add_citation): """ The 'size' argument should control the page length. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(3): add_citation(t1) for i in range(2): add_citation(t2) for i in range(1): add_citation(t3) Citation_Index.es_insert() Text_Index.es_insert() texts = rank_texts(size=2) assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t2.id)
def test_search_on_total_counts(params, add_text, add_citation): """ If a search query is provided, filter the results on the query. """ t1 = add_text(**params('match one')) t2 = add_text(**params('two')) t3 = add_text(**params('match three')) t4 = add_text(**params('four')) for i in range(4): add_citation(text=t1) for i in range(3): add_citation(text=t2) for i in range(2): add_citation(text=t3) for i in range(1): add_citation(text=t4) Text_Index.es_insert() texts = Text_Index.materialize_ranking(query='match') assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation): """ If a text -> count map is passed, sort on the filtered counts. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(30): add_citation(text=t1) for i in range(20): add_citation(text=t2) for i in range(10): add_citation(text=t3) Text_Index.es_insert() texts = Text_Index.materialize_ranking(ranks={ t1.id: 1, t2.id: 2, t3.id: 3, }) assert texts['hits'][0]['_id'] == str(t3.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t1.id)
def test_metadata_filters(add_text, add_citation): """ Citation metadata filters should be applied. """ t1 = add_text(corpus='corpus1') t2 = add_text(corpus='corpus2') t3 = add_text(corpus='corpus1') t4 = add_text(corpus='corpus2') for i in range(4): add_citation(t1) for i in range(3): add_citation(t2) for i in range(2): add_citation(t3) for i in range(1): add_citation(t4) Citation_Index.es_insert() Text_Index.es_insert() texts = rank_texts(filters=dict( corpus='corpus2' )) assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t2.id) assert texts['hits'][1]['_id'] == str(t4.id)
def test_unfiltered(add_text, add_citation): """ When no filters or query is passed, return the overall rankings. """ t1 = add_text() t2 = add_text() t3 = add_text() for i in range(3): add_citation(t1) for i in range(2): add_citation(t2) for i in range(1): add_citation(t3) Citation_Index.es_insert() Text_Index.es_insert() texts = rank_texts() assert len(texts['hits']) == 3 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t2.id) assert texts['hits'][2]['_id'] == str(t3.id)
def test_metadata_filters(add_text, add_citation): """ Citation metadata filters should be applied. """ t1 = add_text(corpus='corpus1') t2 = add_text(corpus='corpus2') t3 = add_text(corpus='corpus1') t4 = add_text(corpus='corpus2') for i in range(4): add_citation(t1) for i in range(3): add_citation(t2) for i in range(2): add_citation(t3) for i in range(1): add_citation(t4) Citation_Index.es_insert() Text_Index.es_insert() texts = rank_texts(filters=dict(corpus='corpus2')) assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t2.id) assert texts['hits'][1]['_id'] == str(t4.id)
def test_search_filter(add_text, add_citation): """ Free-text search query should be applied. """ t1 = add_text(title='match one') t2 = add_text(title='two') t3 = add_text(title='match three') t4 = add_text(title='four') for i in range(4): add_citation(t1) for i in range(3): add_citation(t2) for i in range(2): add_citation(t3) for i in range(1): add_citation(t4) Citation_Index.es_insert() Text_Index.es_insert() texts = rank_texts(query='match') assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t1.id) assert texts['hits'][1]['_id'] == str(t3.id)
def test_size(add_text, add_doc, add_citation): """ The 'size' argument should control the page length. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() for i in range(3): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t2, document=doc) for i in range(2): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t3, document=doc) for i in range(1): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t4, document=doc) Citation_Index.es_insert() Text_Index.es_insert() texts = assigned_with(t1.id, size=2) assert len(texts['hits']) == 2 assert texts['hits'][0]['_id'] == str(t2.id) assert texts['hits'][1]['_id'] == str(t3.id)
def test_index_metadata(add_text, add_citation): """ Text_Index.es_insert() should index texts. """ text = add_text( corpus='corpus', identifier='identifier', url='url', title='title', authors=['author1', 'author2'], publisher='publisher', date='date', journal='journal', ) # Cite the text. add_citation(text=text) Text_Index.es_insert() doc = config.es.get( index='text', id=text.id, ) assert doc['_source']['corpus'] == text.corpus assert doc['_source']['identifier'] == text.identifier assert doc['_source']['url'] == text.url assert doc['_source']['title'] == text.pretty('title') assert doc['_source']['authors'] == text.pretty('authors') assert doc['_source']['publisher'] == text.pretty('publisher') assert doc['_source']['date'] == text.pretty('date') assert doc['_source']['journal'] == text.pretty('journal_title')
def test_index_counts_and_ranks(add_text, add_citation): """ Index total citation counts and ranks. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() t6 = add_text() for i in range(9): add_citation(text=t1) for i in range(3): add_citation(text=t2) add_citation(text=t3) for i in range(1): add_citation(text=t4) add_citation(text=t5) add_citation(text=t6) Text_Index.es_insert() for t in [t1]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 9 assert doc['_source']['rank'] == 1 assert doc['_source']['score'] == 3/3 for t in [t2, t3]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 3 assert doc['_source']['rank'] == 2 assert doc['_source']['score'] == 2/3 for t in [t4, t5, t6]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 1 assert doc['_source']['rank'] == 4 assert doc['_source']['score'] == 1/3
def test_index_counts_and_ranks(add_text, add_citation): """ Index total citation counts and ranks. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() t5 = add_text() t6 = add_text() for i in range(9): add_citation(text=t1) for i in range(3): add_citation(text=t2) add_citation(text=t3) for i in range(1): add_citation(text=t4) add_citation(text=t5) add_citation(text=t6) Text_Index.es_insert() for t in [t1]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 9 assert doc['_source']['rank'] == 1 assert doc['_source']['score'] == 3 / 3 for t in [t2, t3]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 3 assert doc['_source']['rank'] == 2 assert doc['_source']['score'] == 2 / 3 for t in [t4, t5, t6]: doc = config.es.get(index='text', id=t.id) assert doc['_source']['count'] == 1 assert doc['_source']['rank'] == 4 assert doc['_source']['score'] == 1 / 3
def test_paginate_results(add_text, add_citation): """ When a page is provided, return the 1-indexed page. """ # 9 texts - the first with 9 citations, second with 8, etc. texts = [] for i in reversed(range(1, 10)): text = add_text() for j in range(i): add_citation(text=text) texts.append(text) Text_Index.es_insert() # Get first page by default. p1 = Text_Index.materialize_ranking(size=3) assert len(p1['hits']) == 3 assert p1['hits'][0]['_id'] == str(texts[0].id) assert p1['hits'][1]['_id'] == str(texts[1].id) assert p1['hits'][2]['_id'] == str(texts[2].id) p2 = Text_Index.materialize_ranking(size=3, page=2) assert len(p1['hits']) == 3 assert p2['hits'][0]['_id'] == str(texts[3].id) assert p2['hits'][1]['_id'] == str(texts[4].id) assert p2['hits'][2]['_id'] == str(texts[5].id) p3 = Text_Index.materialize_ranking(size=3, page=3) assert len(p1['hits']) == 3 assert p3['hits'][0]['_id'] == str(texts[6].id) assert p3['hits'][1]['_id'] == str(texts[7].id) assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_index_metadata(add_text, add_citation): """ Text_Index.es_insert() should index texts. """ text = add_text( corpus = 'corpus', identifier = 'identifier', url = 'url', title = 'title', authors = ['author1', 'author2'], publisher = 'publisher', date = 'date', journal = 'journal', ) # Cite the text. add_citation(text=text) Text_Index.es_insert() doc = config.es.get( index='text', id=text.id, ) assert doc['_source']['corpus'] == text.corpus assert doc['_source']['identifier'] == text.identifier assert doc['_source']['url'] == text.url assert doc['_source']['title'] == text.pretty('title') assert doc['_source']['authors'] == text.pretty('authors') assert doc['_source']['publisher'] == text.pretty('publisher') assert doc['_source']['date'] == text.pretty('date') assert doc['_source']['journal'] == text.pretty('journal_title')
def test_assigned_with(add_text, add_doc, add_citation): """ Given a seed text, assigned_with() should pull a ranking for all texts that are co-assigned on a syllabus with the seed. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() for i in range(3): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t2, document=doc) for i in range(2): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t3, document=doc) for i in range(1): doc = add_doc() add_citation(text=t1, document=doc) add_citation(text=t4, document=doc) Citation_Index.es_insert() Text_Index.es_insert() texts = assigned_with(t1.id) assert len(texts['hits']) == 3 assert texts['hits'][0]['_id'] == str(t2.id) assert texts['hits'][1]['_id'] == str(t3.id) assert texts['hits'][2]['_id'] == str(t4.id)
def test_size(add_text, add_citation): """ The 'page' argument should control the page offset. """ t1 = add_text() t2 = add_text() t3 = add_text() t4 = add_text() for i in range(4): add_citation(t1) for i in range(3): add_citation(t2) for i in range(2): add_citation(t3) for i in range(1): add_citation(t4) Citation_Index.es_insert() Text_Index.es_insert() p1 = rank_texts(size=2, page=1) assert len(p1['hits']) == 2 assert p1['hits'][0]['_id'] == str(t1.id) assert p1['hits'][1]['_id'] == str(t2.id) p2 = rank_texts(size=2, page=2) assert len(p2['hits']) == 2 assert p2['hits'][0]['_id'] == str(t3.id) assert p2['hits'][1]['_id'] == str(t4.id)