示例#1
0
def test_top_words():
    # Seed random to ensure same results
    random.seed(1)
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    for letter in string.ascii_lowercase:
        test_parser.final_words[letter] = random.randint(1, 20)
    top_words = test_parser.get_top_words(20)
    print(test_parser.final_words.most_common(20))
    print(top_words)
    assert top_words == [
        ("p", 20),
        ("b", 19),
        ("u", 19),
        ("f", 16),
        ("h", 16),
        ("l", 16),
        ("g", 15),
        ("r", 15),
        ("o", 14),
        ("i", 13),
        ("n", 13),
        ("w", 11),
        ("d", 9),
        ("s", 9),
        ("t", 8),
        ("j", 7),
        ("a", 5),
        ("e", 4),
        ("k", 4),
        ("v", 4),
    ]
示例#2
0
def test_get_root_word():
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    root_words = []
    with open(f"{FILE_PATH}/test_stem.txt") as f:
        for word in f.read().splitlines():
            root_words.append(test_parser.get_root_word(word))
    assert root_words == ["jump", "jump", "jump"]
示例#3
0
def parse_file(input_file, rules_file, number_of_results,
               use_collections_counter):
    parser = Parser(input_file=input_file, rules_file=rules_file)
    parser.parse_file()
    if not use_collections_counter:
        top_words = parser.get_top_words(number_of_results)
    else:
        print("INFO: Using collections counter")
        top_words = parser.final_words.most_common(number_of_results)
    print(f'| {"Word":<12} | Occurrences |')
    print(
        f'|{"-".join(["" for _ in range(15)])}|{"-".join(["" for _ in range(14)])}|'
    )
    for row in top_words:
        print(f"| { row[0]:<12} | {row[1]:>11} |")
示例#4
0
def test_parse_file():
    test_parser = Parser(
        input_file=f"{FILE_PATH}/test_input.txt",
        rules_file=f"{FILE_PATH}/test_stopwords.txt",
    )
    test_parser.parse_file()
    assert test_parser.final_words == Counter({
        "there": 1,
        "stori": 1,
        "simpl": 1,
        "man": 1,
        "try": 1,
        "to": 1,
        "make": 1,
        "hi": 1,
        "wai": 1,
        "in": 1,
        "the": 1,
        "univers": 1,
    })
示例#5
0
def test_parse_line():
    test_parser = Parser(input_file="fake.txt",
                         rules_file=f"{FILE_PATH}/test_stopwords.txt")
    with open(f"{FILE_PATH}/test_input.txt") as input_file:
        line = input_file.read().splitlines()[0]
        test_parser.parse_line(line)
    assert test_parser.final_words == Counter({
        "there": 1,
        "stori": 1,
        "simpl": 1,
        "man": 1,
        "try": 1,
        "to": 1,
        "make": 1,
        "hi": 1,
        "wai": 1,
        "in": 1,
        "the": 1,
        "univers": 1,
    })
示例#6
0
def test_non_alpha_removed(test_input):
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    final_list = []
    for word in test_input:
        final_list.append(test_parser.remove_non_alpha(word))
    assert final_list == [
        "Theres",
        "a",
        "story",
        "about",
        "a",
        "simple",
        "man",
        "trying",
        "to",
        "make",
        "his",
        "way",
        "in",
        "the",
        "universe",
    ]
示例#7
0
def test_stop_words_removed(test_input):
    test_parser = Parser(input_file="fake.txt",
                         rules_file=f"{FILE_PATH}/test_stopwords.txt")

    final_list = []
    for word in test_input:
        if not test_parser.exclude_stop_words(word):
            final_list.append(word)
    assert final_list == [
        "There's",
        "story",
        "simple",
        "man",
        "trying",
        "to",
        "make",
        "his",
        "way",
        "in",
        "the",
        "universe",
    ]
示例#8
0
def test_get_stop_words():
    test_parser = Parser(input_file="fake.txt",
                         rules_file=f"{FILE_PATH}/test_stopwords.txt")
    assert test_parser.stop_words == {"a", "about", "above", "after", "again"}
示例#9
0
def test_insert_top_word_beyond_max():
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    top_words = [("a", 4), ("b", 3)]
    test_parser.insert_top_word(3, top_words, 2, "c", 2)
    assert top_words == [("a", 4), ("b", 3)]
示例#10
0
def test_insert_top_word():
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    top_words = [("a", 4), ("b", 3)]
    test_parser.insert_top_word(0, top_words, 5, "c", 5)
    assert top_words == [("c", 5), ("a", 4), ("b", 3)]