def main(json_file, genre_file): # Load the points file from CSV genres = load_points_file(genre_file) keywords = set(g.keyword for g in genres) results = {} for book in load_json(json_file): # Loads the json as a stream/iterable hits = count_keyword_hits(keywords, book['description']) results[book['title']] = calc_genre_fit(genres, hits) print_results(results) # Print at the end for alphabetical sort
def test_calc_genre_fit(): """ {'ran': 1, 'boy': 1, 'dog': 3, 'barked': 1} canine average -> 'dog': 6 + 'barked': 8 + 'ran': 4 = 18 / 3 = 6 human average -> 'boy': 6 + 'ran': 4 = 10 / 2 = 5 Three unique keyword hits for DOG keywords: 3x10 = 30 Two unique keyword hits for HUMAN keywords: 2x5 = 10 No hits for BIRD: 0 {'canine': 30, 'human': 10, 'bird': 0} """ keywords = set(g.keyword for g in GENRES) hits = count_keyword_hits(keywords, TEST_TEXT) res = calc_genre_fit(GENRES, hits) assert res['canine'] == 30 assert res['human'] == 10 assert res['bird'] == 0