def test_sort_on_total_counts_by_default(add_text, add_citation):
    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
Пример #2
0
def test_size(add_text, add_citation):
    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
def test_search_on_total_counts(params, add_text, add_citation):
    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation):
    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def test_metadata_filters(add_text, add_citation):

    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(
        corpus='corpus2'
    ))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def test_unfiltered(add_text, add_citation):

    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
Пример #7
0
def test_metadata_filters(add_text, add_citation):
    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(corpus='corpus2'))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def test_search_on_total_counts(params, add_text, add_citation):

    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation):

    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def test_search_filter(add_text, add_citation):

    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_size(add_text, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
def test_sort_on_total_counts_by_default(add_text, add_citation):

    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_size(add_text, add_doc, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(3):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t2, document=doc)

    for i in range(2):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t3, document=doc)

    for i in range(1):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t4, document=doc)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = assigned_with(t1.id, size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Пример #14
0
def test_index_metadata(add_text, add_citation):
    """
    Text_Index.es_insert() should index texts.
    """

    text = add_text(
        corpus='corpus',
        identifier='identifier',
        url='url',
        title='title',
        authors=['author1', 'author2'],
        publisher='publisher',
        date='date',
        journal='journal',
    )

    # Cite the text.
    add_citation(text=text)

    Text_Index.es_insert()

    doc = config.es.get(
        index='text',
        id=text.id,
    )

    assert doc['_source']['corpus'] == text.corpus
    assert doc['_source']['identifier'] == text.identifier
    assert doc['_source']['url'] == text.url

    assert doc['_source']['title'] == text.pretty('title')
    assert doc['_source']['authors'] == text.pretty('authors')
    assert doc['_source']['publisher'] == text.pretty('publisher')
    assert doc['_source']['date'] == text.pretty('date')
    assert doc['_source']['journal'] == text.pretty('journal_title')
Пример #15
0
def test_unfiltered(add_text, add_citation):
    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
Пример #16
0
def test_search_filter(add_text, add_citation):
    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_index_counts_and_ranks(add_text, add_citation):

    """
    Index total citation counts and ranks.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    Text_Index.es_insert()

    for t in [t1]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 9
        assert doc['_source']['rank'] == 1
        assert doc['_source']['score'] == 3/3

    for t in [t2, t3]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 3
        assert doc['_source']['rank'] == 2
        assert doc['_source']['score'] == 2/3

    for t in [t4, t5, t6]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 1
        assert doc['_source']['rank'] == 4
        assert doc['_source']['score'] == 1/3
Пример #18
0
def test_index_counts_and_ranks(add_text, add_citation):
    """
    Index total citation counts and ranks.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    Text_Index.es_insert()

    for t in [t1]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 9
        assert doc['_source']['rank'] == 1
        assert doc['_source']['score'] == 3 / 3

    for t in [t2, t3]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 3
        assert doc['_source']['rank'] == 2
        assert doc['_source']['score'] == 2 / 3

    for t in [t4, t5, t6]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 1
        assert doc['_source']['rank'] == 4
        assert doc['_source']['score'] == 1 / 3
def test_paginate_results(add_text, add_citation):

    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_paginate_results(add_text, add_citation):
    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_index_metadata(add_text, add_citation):

    """
    Text_Index.es_insert() should index texts.
    """

    text = add_text(

        corpus      = 'corpus',
        identifier  = 'identifier',
        url         = 'url',

        title       = 'title',
        authors     = ['author1', 'author2'],
        publisher   = 'publisher',
        date        = 'date',
        journal     = 'journal',

    )

    # Cite the text.
    add_citation(text=text)

    Text_Index.es_insert()

    doc = config.es.get(
        index='text',
        id=text.id,
    )

    assert doc['_source']['corpus']     == text.corpus
    assert doc['_source']['identifier'] == text.identifier
    assert doc['_source']['url']        == text.url

    assert doc['_source']['title']      == text.pretty('title')
    assert doc['_source']['authors']    == text.pretty('authors')
    assert doc['_source']['publisher']  == text.pretty('publisher')
    assert doc['_source']['date']       == text.pretty('date')
    assert doc['_source']['journal']    == text.pretty('journal_title')
def test_assigned_with(add_text, add_doc, add_citation):

    """
    Given a seed text, assigned_with() should pull a ranking for all texts that
    are co-assigned on a syllabus with the seed.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(3):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t2, document=doc)

    for i in range(2):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t3, document=doc)

    for i in range(1):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t4, document=doc)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = assigned_with(t1.id)

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
    assert texts['hits'][2]['_id'] == str(t4.id)
def test_size(add_text, add_citation):

    """
    The 'page' argument should control the page offset.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    p1 = rank_texts(size=2, page=1)

    assert len(p1['hits']) == 2
    assert p1['hits'][0]['_id'] == str(t1.id)
    assert p1['hits'][1]['_id'] == str(t2.id)

    p2 = rank_texts(size=2, page=2)

    assert len(p2['hits']) == 2
    assert p2['hits'][0]['_id'] == str(t3.id)
    assert p2['hits'][1]['_id'] == str(t4.id)
Пример #24
0
def test_size(add_text, add_citation):
    """
    The 'page' argument should control the page offset.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    p1 = rank_texts(size=2, page=1)

    assert len(p1['hits']) == 2
    assert p1['hits'][0]['_id'] == str(t1.id)
    assert p1['hits'][1]['_id'] == str(t2.id)

    p2 = rank_texts(size=2, page=2)

    assert len(p2['hits']) == 2
    assert p2['hits'][0]['_id'] == str(t3.id)
    assert p2['hits'][1]['_id'] == str(t4.id)