def test_core_word_suffix_collision(self): word = 'ABCPZL' test_analyzer = analyze.Analyzer() # rule collision but assume suffix replacement with longer suffix takes precedent suffix_result = test_analyzer.ends_in_suffix(word) self.assertEqual('PZL', suffix_result) result = test_analyzer.get_resulting_word(word, suffix_result) self.assertNotEqual('ABCA', result) self.assertEqual('ABCAZ', result)
def __init__(self, stop_resource='./resources/stopwords.txt'): words_to_skip = set() script_dir = path.dirname(__file__) file_path = path.join(script_dir, stop_resource) with open(file_path, 'r') as stop_words_resource: for word in stop_words_resource.read().split('\n'): words_to_skip.add(word) self.words_to_skip = words_to_skip self.analyzer = analyze.Analyzer() # write_dir = 'output ' + str(datetime.now()) write_dir_loc = path.join(script_dir, 'resources', 'output', datetime.now().date().isoformat()) write_dir = pathlib.Path(write_dir_loc).mkdir(parents=True, exist_ok=True) self.write_directory = write_dir_loc self.cache = db.Db()
def test_possible_valid_root_word(self): file_str = open('tests/resources/sample5.txt', 'r') test_analyzer = analyze.Analyzer() sorted_list = test_analyzer.parse_file(file_str.read(), core_words=True) self.assertEqual(len(sorted_list), 2) first_val = sorted_list[0] self.assertEqual('E', first_val[0]) self.assertEqual(2, first_val[1]) ''' Since algo assumes rest of words are valid, even though there's a rule to replace 'L' with empty space (like in the case of EL becoming E), since there is no C in the whole file, algo assumes the 'L' in 'CL' is a false suffix and actual word is actually CL. ''' second_val = sorted_list[1] self.assertEqual('CL', second_val[0]) self.assertEqual(2, second_val[1]) file_str.close()
def test_core_word(self): word = 'ALZ' test_analyzer = analyze.Analyzer() self.assertEqual('A', test_analyzer.get_resulting_word(word, 'LZ'))
def test_check_good_run(self): file_str = open('tests/resources/sample2.txt', 'r') test_analyzer = analyze.Analyzer() sorted_list = test_analyzer.parse_file(file_str.read()) self.assertEqual(len(sorted_list), 25) file_str.close()
def test_word_punctuation(self): some_word = 'DVQ,' test_analyzer = analyze.Analyzer() self.assertEqual(test_analyzer.process_punctuation(some_word), 'DVQ')
def test_invalid_word_2(self): some_word = 'KMDKâ??L' test_analyzer = analyze.Analyzer() self.assertFalse(test_analyzer.is_all_letters(some_word))
def test_invalid_word(self): some_word = 'abc?def' test_analyzer = analyze.Analyzer() self.assertFalse(test_analyzer.is_all_letters(some_word))
def test_valid_word(self): some_word = 'abcd' test_analyzer = analyze.Analyzer() self.assertTrue(test_analyzer.is_all_letters(some_word))
def test_casing_doesnt_matter(self): file_str = open('tests/resources/sample6.txt' ,'r') test_analyzer = analyze.Analyzer() result = test_analyzer.parse_file(file_str.read()) self.assertEqual(len(result), 1) file_str.close()