def test_search_on_total_counts(params, add_text, add_citation):
    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_sort_on_filtered_counts(add_text, add_citation):

    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def test_search_on_total_counts(params, add_text, add_citation):

    """
    If a search query is provided, filter the results on the query.
    """

    t1 = add_text(**params('match one'))
    t2 = add_text(**params('two'))
    t3 = add_text(**params('match three'))
    t4 = add_text(**params('four'))

    for i in range(4):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)

    for i in range(2):
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Пример #4
0
def test_index_metadata(add_text, add_citation):
    """
    Text_Index.es_insert() should index texts.
    """

    text = add_text(
        corpus='corpus',
        identifier='identifier',
        url='url',
        title='title',
        authors=['author1', 'author2'],
        publisher='publisher',
        date='date',
        journal='journal',
    )

    # Cite the text.
    add_citation(text=text)

    Text_Index.es_insert()

    doc = config.es.get(
        index='text',
        id=text.id,
    )

    assert doc['_source']['corpus'] == text.corpus
    assert doc['_source']['identifier'] == text.identifier
    assert doc['_source']['url'] == text.url

    assert doc['_source']['title'] == text.pretty('title')
    assert doc['_source']['authors'] == text.pretty('authors')
    assert doc['_source']['publisher'] == text.pretty('publisher')
    assert doc['_source']['date'] == text.pretty('date')
    assert doc['_source']['journal'] == text.pretty('journal_title')
Пример #5
0
def test_search_filter(add_text, add_citation):
    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Пример #6
0
def test_unfiltered(add_text, add_citation):
    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_size(add_text, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
Пример #8
0
def test_metadata_filters(add_text, add_citation):
    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(corpus='corpus2'))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def test_sort_on_total_counts_by_default(add_text, add_citation):
    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
Пример #10
0
def test_size(add_text, add_citation):
    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
def test_sort_on_filtered_counts(add_text, add_citation):
    """
    If a text -> count map is passed, sort on the filtered counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(30):
        add_citation(text=t1)

    for i in range(20):
        add_citation(text=t2)

    for i in range(10):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking(ranks={
        t1.id: 1,
        t2.id: 2,
        t3.id: 3,
    })

    assert texts['hits'][0]['_id'] == str(t3.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t1.id)
def test_sort_on_total_counts_by_default(add_text, add_citation):

    """
    By default return results sorted on the total citation count.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Text_Index.es_insert()

    texts = Text_Index.materialize_ranking()

    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_unfiltered(add_text, add_citation):

    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_metadata_filters(add_text, add_citation):

    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(
        corpus='corpus2'
    ))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def test_size(add_text, add_doc, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(3):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t2, document=doc)

    for i in range(2):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t3, document=doc)

    for i in range(1):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t4, document=doc)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = assigned_with(t1.id, size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_search_filter(add_text, add_citation):

    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Пример #17
0
def isbn_to_text(in_file, out_file):
    """
    Link ISBNs -> text rankings.
    """

    isbns = pickle.load(in_file)

    cols = ['isbn', 'title', 'author', 'count']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    ranks = Text_Index.rank_texts()

    # Sort count DESC.
    ranks = sorted(
        ranks,
        key=lambda r: r['text'].count,
        reverse=True,
    )

    for i, text in enumerate(ranks):

        isbn = isbns.get(text['text'].identifier)

        writer.writerow(
            dict(
                isbn=isbn,
                title=text['text'].title,
                author=text['text'].authors[0],
                count=text['text'].count,
            ))

        if i % 10000 == 0:
            print(i)
def rank_texts(filters={}, query=None, size=1000, page=1):

    """
    Filter and rank texts.

    Args:
        filters (dict): Citation metadata filters.
        query (str): A text metadata search query.
        size (str): Return N results.
        page (str): 1-indexed page offset.

    Returns:
        dict: Elasticsearch hits.
    """

    # Filter citation counts, if non-empty filters.
    if any(filters.values()):
        ranks = Citation_Index.compute_ranking(filters)

    else:
        ranks = None

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, query, size, page)

    return texts
def test_join_citation_count(add_text, add_citation):

    """
    Text_Index.rank_texts() should join the citation count for each text.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    texts = Text_Index.rank_texts()

    assert texts[0]['text'] == t1
    assert texts[0]['text'].count == 3

    assert texts[1]['text'] == t2
    assert texts[1]['text'].count == 2

    assert texts[2]['text'] == t3
    assert texts[2]['text'].count == 1
def assigned_with(text_id, size=200):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
Пример #21
0
def ranks(out_file, depth):

    """
    Write the top N text ranks.
    """

    cols = [
        'count',
        'title',
        'author',
    ]

    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    ranks = Text_Index.rank_texts()
    ranks = sorted(ranks, key=lambda x: x['rank'])

    for r in ranks[:depth]:

        text = r['text']

        writer.writerow(dict(
            count=text.count,
            title=text.title,
            author=text.authors[0],
        ))
Пример #22
0
def assigned_with(text_id, size=1000):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
Пример #23
0
def test_join_citation_count(add_text, add_citation):
    """
    Text_Index.rank_texts() should join the citation count for each text.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    texts = Text_Index.rank_texts()

    assert texts[0]['text'] == t1
    assert texts[0]['text'].count == 3

    assert texts[1]['text'] == t2
    assert texts[1]['text'].count == 2

    assert texts[2]['text'] == t3
    assert texts[2]['text'].count == 1
def test_index_counts_and_ranks(add_text, add_citation):

    """
    Index total citation counts and ranks.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    Text_Index.es_insert()

    for t in [t1]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 9
        assert doc['_source']['rank'] == 1
        assert doc['_source']['score'] == 3/3

    for t in [t2, t3]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 3
        assert doc['_source']['rank'] == 2
        assert doc['_source']['score'] == 2/3

    for t in [t4, t5, t6]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 1
        assert doc['_source']['rank'] == 4
        assert doc['_source']['score'] == 1/3
Пример #25
0
def test_index_counts_and_ranks(add_text, add_citation):
    """
    Index total citation counts and ranks.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    Text_Index.es_insert()

    for t in [t1]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 9
        assert doc['_source']['rank'] == 1
        assert doc['_source']['score'] == 3 / 3

    for t in [t2, t3]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 3
        assert doc['_source']['rank'] == 2
        assert doc['_source']['score'] == 2 / 3

    for t in [t4, t5, t6]:
        doc = config.es.get(index='text', id=t.id)
        assert doc['_source']['count'] == 1
        assert doc['_source']['rank'] == 4
        assert doc['_source']['score'] == 1 / 3
def corpus_facets():
    """
    Materialize corpus facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets('corpus')
    return Text_Index.materialize_corpus_facets(counts)
Пример #27
0
def corpus_facets():

    """
    Materialize corpus facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets("corpus")
    return Text_Index.materialize_corpus_facets(counts)
def test_paginate_results(add_text, add_citation):

    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_paginate_results(add_text, add_citation):
    """
    When a page is provided, return the 1-indexed page.
    """

    # 9 texts - the first with 9 citations, second with 8, etc.

    texts = []
    for i in reversed(range(1, 10)):

        text = add_text()

        for j in range(i):
            add_citation(text=text)

        texts.append(text)

    Text_Index.es_insert()

    # Get first page by default.
    p1 = Text_Index.materialize_ranking(size=3)

    assert len(p1['hits']) == 3
    assert p1['hits'][0]['_id'] == str(texts[0].id)
    assert p1['hits'][1]['_id'] == str(texts[1].id)
    assert p1['hits'][2]['_id'] == str(texts[2].id)

    p2 = Text_Index.materialize_ranking(size=3, page=2)

    assert len(p1['hits']) == 3
    assert p2['hits'][0]['_id'] == str(texts[3].id)
    assert p2['hits'][1]['_id'] == str(texts[4].id)
    assert p2['hits'][2]['_id'] == str(texts[5].id)

    p3 = Text_Index.materialize_ranking(size=3, page=3)

    assert len(p1['hits']) == 3
    assert p3['hits'][0]['_id'] == str(texts[6].id)
    assert p3['hits'][1]['_id'] == str(texts[7].id)
    assert p3['hits'][2]['_id'] == str(texts[8].id)
def test_index_metadata(add_text, add_citation):

    """
    Text_Index.es_insert() should index texts.
    """

    text = add_text(

        corpus      = 'corpus',
        identifier  = 'identifier',
        url         = 'url',

        title       = 'title',
        authors     = ['author1', 'author2'],
        publisher   = 'publisher',
        date        = 'date',
        journal     = 'journal',

    )

    # Cite the text.
    add_citation(text=text)

    Text_Index.es_insert()

    doc = config.es.get(
        index='text',
        id=text.id,
    )

    assert doc['_source']['corpus']     == text.corpus
    assert doc['_source']['identifier'] == text.identifier
    assert doc['_source']['url']        == text.url

    assert doc['_source']['title']      == text.pretty('title')
    assert doc['_source']['authors']    == text.pretty('authors')
    assert doc['_source']['publisher']  == text.pretty('publisher')
    assert doc['_source']['date']       == text.pretty('date')
    assert doc['_source']['journal']    == text.pretty('journal_title')
def test_size(add_text, add_citation):

    """
    The 'page' argument should control the page offset.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    p1 = rank_texts(size=2, page=1)

    assert len(p1['hits']) == 2
    assert p1['hits'][0]['_id'] == str(t1.id)
    assert p1['hits'][1]['_id'] == str(t2.id)

    p2 = rank_texts(size=2, page=2)

    assert len(p2['hits']) == 2
    assert p2['hits'][0]['_id'] == str(t3.id)
    assert p2['hits'][1]['_id'] == str(t4.id)
def test_assigned_with(add_text, add_doc, add_citation):

    """
    Given a seed text, assigned_with() should pull a ranking for all texts that
    are co-assigned on a syllabus with the seed.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(3):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t2, document=doc)

    for i in range(2):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t3, document=doc)

    for i in range(1):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t4, document=doc)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = assigned_with(t1.id)

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
    assert texts['hits'][2]['_id'] == str(t4.id)
Пример #33
0
def test_size(add_text, add_citation):
    """
    The 'page' argument should control the page offset.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    p1 = rank_texts(size=2, page=1)

    assert len(p1['hits']) == 2
    assert p1['hits'][0]['_id'] == str(t1.id)
    assert p1['hits'][1]['_id'] == str(t2.id)

    p2 = rank_texts(size=2, page=2)

    assert len(p2['hits']) == 2
    assert p2['hits'][0]['_id'] == str(t3.id)
    assert p2['hits'][1]['_id'] == str(t4.id)
Пример #34
0
def test_skip_uncited_texts(add_text, add_citation):
    """
    Texts without any citations should be excluded.
    """

    t1 = add_text()
    t2 = add_text()

    add_citation(text=t1)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=1),
        # Exclude t2.
    ]
def test_skip_uncited_texts(add_text, add_citation):

    """
    Texts without any citations should be excluded.
    """

    t1 = add_text()
    t2 = add_text()

    add_citation(text=t1)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=1),
        # Exclude t2.
    ]
Пример #36
0
    def add_nodes(self):
        """
        Register displayed texts.
        """

        for t in progress.bar(Text_Index.rank_texts()):

            text = t['text']

            self.graph.add_node(
                text.id,
                dict(
                    label=text.pretty('title'),
                    author=text.pretty('surname'),
                    count=text.count,
                    score=t['score'],
                ))
Пример #37
0
def test_only_consider_displayed_texts(add_text, add_citation):
    """
    Only rank texts that have been marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
Пример #38
0
def test_only_consider_valid_texts(add_text, add_citation):
    """
    Only rank texts that have passed validation.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
def test_only_consider_valid_texts(add_text, add_citation):

    """
    Only rank texts that have passed validation.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
    def add_nodes(self):

        """
        Register displayed texts.
        """

        for t in progress.bar(Text_Index.rank_texts()):

            text = t['text']

            self.graph.add_node(text.id, dict(

                label   = text.pretty('title'),
                author  = text.pretty('surname'),

                count   = text.count,
                score   = t['score'],

            ))
def test_only_consider_displayed_texts(add_text, add_citation):

    """
    Only rank texts that have been marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
def test_compute_metrics(add_text, add_citation):

    """
    Zip ranks and scores with the texts.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    texts = Text_Index.rank_texts()

    assert texts == [

        dict(text=t1, rank=1, score=3/3),

        dict(text=t2, rank=2, score=2/3),
        dict(text=t3, rank=2, score=2/3),

        dict(text=t4, rank=4, score=1/3),
        dict(text=t5, rank=4, score=1/3),
        dict(text=t6, rank=4, score=1/3),

    ]
Пример #43
0
def test_compute_metrics(add_text, add_citation):
    """
    Zip ranks and scores with the texts.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=3 / 3),
        dict(text=t2, rank=2, score=2 / 3),
        dict(text=t3, rank=2, score=2 / 3),
        dict(text=t4, rank=4, score=1 / 3),
        dict(text=t5, rank=4, score=1 / 3),
        dict(text=t6, rank=4, score=1 / 3),
    ]