Пример #1
0
def ranks(out_file, depth):

    """
    Write the top N text ranks.
    """

    cols = [
        'count',
        'title',
        'author',
    ]

    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    ranks = Text_Index.rank_texts()
    ranks = sorted(ranks, key=lambda x: x['rank'])

    for r in ranks[:depth]:

        text = r['text']

        writer.writerow(dict(
            count=text.count,
            title=text.title,
            author=text.authors[0],
        ))
def test_join_citation_count(add_text, add_citation):

    """
    Text_Index.rank_texts() should join the citation count for each text.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    texts = Text_Index.rank_texts()

    assert texts[0]['text'] == t1
    assert texts[0]['text'].count == 3

    assert texts[1]['text'] == t2
    assert texts[1]['text'].count == 2

    assert texts[2]['text'] == t3
    assert texts[2]['text'].count == 1
Пример #3
0
def test_join_citation_count(add_text, add_citation):
    """
    Text_Index.rank_texts() should join the citation count for each text.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    texts = Text_Index.rank_texts()

    assert texts[0]['text'] == t1
    assert texts[0]['text'].count == 3

    assert texts[1]['text'] == t2
    assert texts[1]['text'].count == 2

    assert texts[2]['text'] == t3
    assert texts[2]['text'].count == 1
Пример #4
0
def isbn_to_text(in_file, out_file):
    """
    Link ISBNs -> text rankings.
    """

    isbns = pickle.load(in_file)

    cols = ['isbn', 'title', 'author', 'count']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    ranks = Text_Index.rank_texts()

    # Sort count DESC.
    ranks = sorted(
        ranks,
        key=lambda r: r['text'].count,
        reverse=True,
    )

    for i, text in enumerate(ranks):

        isbn = isbns.get(text['text'].identifier)

        writer.writerow(
            dict(
                isbn=isbn,
                title=text['text'].title,
                author=text['text'].authors[0],
                count=text['text'].count,
            ))

        if i % 10000 == 0:
            print(i)
Пример #5
0
def test_skip_uncited_texts(add_text, add_citation):
    """
    Texts without any citations should be excluded.
    """

    t1 = add_text()
    t2 = add_text()

    add_citation(text=t1)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=1),
        # Exclude t2.
    ]
def test_skip_uncited_texts(add_text, add_citation):

    """
    Texts without any citations should be excluded.
    """

    t1 = add_text()
    t2 = add_text()

    add_citation(text=t1)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=1),
        # Exclude t2.
    ]
Пример #7
0
    def add_nodes(self):
        """
        Register displayed texts.
        """

        for t in progress.bar(Text_Index.rank_texts()):

            text = t['text']

            self.graph.add_node(
                text.id,
                dict(
                    label=text.pretty('title'),
                    author=text.pretty('surname'),
                    count=text.count,
                    score=t['score'],
                ))
Пример #8
0
def test_only_consider_displayed_texts(add_text, add_citation):
    """
    Only rank texts that have been marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
Пример #9
0
def test_only_consider_valid_texts(add_text, add_citation):
    """
    Only rank texts that have passed validation.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
def test_only_consider_valid_texts(add_text, add_citation):

    """
    Only rank texts that have passed validation.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
def test_only_consider_displayed_texts(add_text, add_citation):

    """
    Only rank texts that have been marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    add_citation(text=t1)
    add_citation(text=t2)
    add_citation(text=t3)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t3, rank=1, score=1),
    ]
    def add_nodes(self):

        """
        Register displayed texts.
        """

        for t in progress.bar(Text_Index.rank_texts()):

            text = t['text']

            self.graph.add_node(text.id, dict(

                label   = text.pretty('title'),
                author  = text.pretty('surname'),

                count   = text.count,
                score   = t['score'],

            ))
def test_compute_metrics(add_text, add_citation):

    """
    Zip ranks and scores with the texts.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    texts = Text_Index.rank_texts()

    assert texts == [

        dict(text=t1, rank=1, score=3/3),

        dict(text=t2, rank=2, score=2/3),
        dict(text=t3, rank=2, score=2/3),

        dict(text=t4, rank=4, score=1/3),
        dict(text=t5, rank=4, score=1/3),
        dict(text=t6, rank=4, score=1/3),

    ]
Пример #14
0
def test_compute_metrics(add_text, add_citation):
    """
    Zip ranks and scores with the texts.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(9):
        add_citation(text=t1)

    for i in range(3):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    texts = Text_Index.rank_texts()

    assert texts == [
        dict(text=t1, rank=1, score=3 / 3),
        dict(text=t2, rank=2, score=2 / 3),
        dict(text=t3, rank=2, score=2 / 3),
        dict(text=t4, rank=4, score=1 / 3),
        dict(text=t5, rank=4, score=1 / 3),
        dict(text=t6, rank=4, score=1 / 3),
    ]