def test_search_on_total_counts(params, add_text, add_citation):

    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Пример #2
0
def assigned_with(text_id, size=1000):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def test_sort_on_total_counts_by_default(add_text, add_citation):

    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation):

    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def test_sort_on_total_counts_by_default(add_text, add_citation):
    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_search_on_total_counts(params, add_text, add_citation):
    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation):
    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def assigned_with(text_id, size=200):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def rank_texts(filters={}, query=None, size=1000, page=1):

    """
    Filter and rank texts.

    Args:
        filters (dict): Citation metadata filters.
        query (str): A text metadata search query.
        size (str): Return N results.
        page (str): 1-indexed page offset.

    Returns:
        dict: Elasticsearch hits.
    """

    # Filter citation counts, if non-empty filters.
    if any(filters.values()):
        ranks = Citation_Index.compute_ranking(filters)

    else:
        ranks = None

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, query, size, page)

    return texts
def test_paginate_results(add_text, add_citation):

    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_paginate_results(add_text, add_citation):
    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)