def test_short_line_no_punc(): from trigrams import parse_line source_line = 'Chapter 1\n' trigram_dict = {} last_two = ('no', 'punctuation') (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == {} assert last_two == ('', '')
def test_parse_line(): from trigrams import parse_line # byte_line = b'__test__\n' # source_line = byte_line.decode('utf-8') source_line = '__test__\n' trigram_dict = {} last_two = ('', '') (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == {"not real": ("data")} assert last_two == ('fake', 'fake')
def test_curse_string_end(): from trigrams import parse_line source_line = "that #(%(*#!@?\n" trigram_dict = {} last_two = ('who', 'was') result_dict = {'who was': ('that',), 'was that': ("#(%(*#!@",), 'that #(%(*#!@': ('?',), } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('#(%(*#!@', '?')
def test_curse_string_middle(): from trigrams import parse_line source_line = "that #(%(*#!@ idiot!\n" trigram_dict = {} last_two = ('who', 'was') result_dict = {'who was': ('that',), 'was that': ("#(%(*#!@",), "that #(%(*#!@": ('idiot',), "#(%(*#!@ idiot": ('!',) } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('idiot', '!')
def test_internal_apostrophe(): from trigrams import parse_line source_line = "Surely you're joking!\n" trigram_dict = {} last_two = ('die', '!') result_dict = {'die !': ('surely',), '! surely': ("you're",), "surely you're": ('joking',), "you're joking": ('!',) } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('joking', '!')
def test_short_line_punc(): from trigrams import parse_line source_line = 'and he died.\n' trigram_dict = {} last_two = ('he', 'fell') result_dict = {'he fell': ('and',), 'fell and': ('he',), 'and he': ('died',), 'he died': ('.',) } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('died', '.')
def test_plural_possessive(): from trigrams import parse_line source_line = "The dogs' kennel stinks.\n" trigram_dict = {} last_two = ('pets', '.') result_dict = {'pets .': ('the',), '. the': ("dogs'",), "the dogs'": ('kennel',), "dogs' kennel": ('stinks',), 'kennel stinks': ('.',) } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('stinks', '.')
def test_quotes_begin_end(): from trigrams import parse_line source_line = '''"Surely you jest!"\n''' trigram_dict = {} last_two = ('.', '"') result_dict = {'. "': ('"',), '" "': ('surely',), '" surely': ('you',), 'surely you': ('jest',), 'you jest': ('!',) } (trigram_dict, last_two) = parse_line(source_line, trigram_dict, last_two) assert trigram_dict == result_dict assert last_two == ('jest', '!')