示例#1
0
def main(json_file, genre_file):

    # Load the points file from CSV
    genres = load_points_file(genre_file)
    keywords = set(g.keyword for g in genres)

    results = {}
    for book in load_json(json_file):  # Loads the json as a stream/iterable
        hits = count_keyword_hits(keywords, book['description'])
        results[book['title']] = calc_genre_fit(genres, hits)

    print_results(results)  # Print at the end for alphabetical sort
示例#2
0
def test_calc_genre_fit():
    """
    {'ran': 1, 'boy': 1, 'dog': 3, 'barked': 1}
    canine average -> 'dog': 6 + 'barked': 8 + 'ran': 4 = 18 / 3 = 6
    human average  -> 'boy': 6 + 'ran': 4 = 10 / 2 = 5

    Three unique keyword hits for DOG keywords: 3x10 = 30
    Two unique keyword hits for HUMAN keywords: 2x5 = 10
    No hits for BIRD: 0
    {'canine': 30, 'human': 10, 'bird': 0}
    """

    keywords = set(g.keyword for g in GENRES)
    hits = count_keyword_hits(keywords, TEST_TEXT)
    res = calc_genre_fit(GENRES, hits)

    assert res['canine'] == 30
    assert res['human'] == 10
    assert res['bird'] == 0
示例#3
0
def test_count_keyword_missing_keywords():
    res = count_keyword_hits([], TEST_TEXT)
    assert not res
示例#4
0
def test_count_keyword_missing_text():
    res = count_keyword_hits(['test'], "")
    assert not res
示例#5
0
def test_count_keyword_hits_case_sensitive():
    res = count_keyword_hits(['the', 'a'], TEST_TEXT, case_sensitive=True)
    assert 'the' not in res
    assert res['a'] == 6
示例#6
0
def test_count_keyword_hits():
    res = count_keyword_hits(['boy', 'dog'], TEST_TEXT)
    assert res['boy'] == 1
    assert res['dog'] == 3