def soundex_convert(name_string): """Combine the three FSTs above and use it to convert a name into a Soundex""" f1 = letters_to_numbers() f2 = truncate_to_three_digits() f3 = add_zero_padding() output = compose(name_string, f1, f2, f3)[0] return ''.join(output)
def soundex_convert(name_string): """Combine the three FSTs above and use it to convert a name into a Soundex""" char_list = [char for char in name_string] target = compose(char_list, letters_to_numbers(), truncate_to_three_digits(), add_zero_padding()) return ''.join(target[0])
def parse(self, word): """Parse a word morphologically e.g. p = Parser() word = ['p','a','n','i','c','k','i','n','g'] p.parse(word) ---> 'panic+present participle form' """ # Ok so now let's do the second FST f2 = FST('morphology-parse') # Detecting affixes ('ing' and 'ed') # Indicate initial and final states states = ['start', 'i', 'n', 'g', 'e', 'd'] for state in states: f2.add_state(state) f2.initial_state = 'start' f2.set_final('d') f2.set_final('g') for letter in string.ascii_lowercase: if not letter in 'ie': f2.add_arc('start', 'start', letter, letter) if not letter == 'n': f2.add_arc('i', 'start', letter, ('i', letter)) if not letter == 'd': f2.add_arc('e', 'start', letter, ['e', letter]) if not letter == 'g': f2.add_arc('n', 'start', letter, ['i', 'n', letter]) f2.add_arc('start', 'i', 'i', '') f2.add_arc('i', 'n', 'n', '') f2.add_arc('n', 'g', 'g', '+present participle form') f2.add_arc('start', 'e', 'e', '') f2.add_arc('e', 'd', 'd', '+past form') f3 = FST('morphology-parse') # K-deletion # Indicate initial and final states f3.add_state('start') f3.add_state('vowel') f3.add_state('consonant') f3.add_state('c') f3.add_state('k') f3.add_state('lick_l') f3.add_state('lick_i') f3.add_state('lick_c') f3.add_state('lick_k') f3.add_state('parse') f3.initial_state = 'start' f3.set_final('parse') vowels = 'aeiou' for vowel in vowels: f3.add_arc('start', 'vowel', vowel, vowel) f3.add_arc('vowel', 'vowel', vowel, vowel) f3.add_arc('consonant', 'vowel', vowel, vowel) f3.add_arc('c', 'vowel', vowel, vowel) for letter in string.ascii_lowercase: f3.add_arc('parse', 'parse', letter, letter) if letter in vowels: continue if not letter == 'c': f3.add_arc('vowel', 'consonant', letter, letter) if not letter == 'l': f3.add_arc('start', 'consonant', letter, letter) if not letter == 'k': f3.add_arc('c', 'consonant', letter, letter) f3.add_arc('consonant', 'consonant', letter, letter) f3.add_arc('vowel', 'c', 'c', 'c') f3.add_arc('c', 'k', 'k', '') f3.add_arc('start', 'lick_l', 'l', 'l') f3.add_arc('lick_l', 'lick_i', 'i', 'i') f3.add_arc('lick_i', 'lick_c', 'c', 'c') f3.add_arc('lick_c', 'lick_k', 'k', 'k') f3.add_arc('lick_k', 'parse', '+', '+') f3.add_arc('k', 'parse', '+', '+') f3.add_arc('consonant', 'parse', '+', '+') f3.add_arc('parse', 'parse', ' ', ' ') output = compose(word, f2, f3)[0] return "".join(output)
def parse(self, word): """Parse a word morphologically want, sync, panic, havoc, and lick e.g. -ed and -ing p = Parser() past form \ present participle form word = ['p', 'a', 'n', 'i', 'c', 'k','e','d'] p.parse(word) ---> 'panic+past form' """ f2=fst.FST('lexicon') for i in range(1,56): f2.add_state(str(i)) f2.initial_state = '1' f2.add_arc('1','2','w','w') f2.add_arc('2','3','a','a') f2.add_arc('3','4','n','n') f2.add_arc('4','5','t','t') f2.add_arc('5','6','e','^') f2.add_arc('6','7','d','d') f2.add_arc('7','8','','#') f2.add_arc('5','9','i','^') f2.add_arc('9','10','n','n') f2.add_arc('10','11','g','g') f2.add_arc('11','8','','#') f2.add_arc('1','12','s','s') f2.add_arc('12','13','y','y') f2.add_arc('13','14','n','n') f2.add_arc('14','15','c','c') f2.add_arc('15','16','e','^') f2.add_arc('16','17','d','d') f2.add_arc('17','18','','#') f2.add_arc('15','19','i','^') f2.add_arc('19','20','n','n') f2.add_arc('20','21','g','g') f2.add_arc('21','18','','#') f2.add_arc('1','22','p','p') f2.add_arc('22','23','a','a') f2.add_arc('23','24','n','n') f2.add_arc('24','25','i','i') f2.add_arc('25','26','c','c') f2.add_arc('26','27','k','^') f2.add_arc('27','28','e','e') f2.add_arc('28','29','d','d') f2.add_arc('29','30','','#') f2.add_arc('27','31','i','i') f2.add_arc('31','32','n','n') f2.add_arc('32','33','g','g') f2.add_arc('33','30','','#') f2.add_arc('1','34','h','h') f2.add_arc('34','35','a','a') f2.add_arc('35','36','v','v') f2.add_arc('36','37','o','o') f2.add_arc('37','38','c','c') f2.add_arc('38','39','k','^') f2.add_arc('39','40','e','e') f2.add_arc('40','41','d','d') f2.add_arc('41','42','','#') f2.add_arc('39','43','i','i') f2.add_arc('43','44','n','n') f2.add_arc('44','45','g','g') f2.add_arc('45','42','','#') f2.add_arc('1','46','l','l') f2.add_arc('46','47','i','i') f2.add_arc('47','48','c','c') f2.add_arc('48','49','k','k') f2.add_arc('49','50','e','^') f2.add_arc('50','51','d','d') f2.add_arc('51','52','','#') f2.add_arc('49','53','i','^') f2.add_arc('53','54','n','n') f2.add_arc('54','55','g','g') f2.add_arc('55','52','','#') f2.set_final('8') f2.set_final('18') f2.set_final('30') f2.set_final('42') f2.set_final('52') f3=fst.FST('rule') for i in range(1,53): f3.add_state(str(i)) f3.initial_state = '1' f3.add_arc('1','2','p','p') f3.add_arc('2','3','a','a') f3.add_arc('3','4','n','n') f3.add_arc('4','5','i','i') f3.add_arc('5','6','c','c') f3.add_arc('6','7','^','') f3.add_arc('7','8','e','') f3.add_arc('8','9','d','+past form') f3.add_arc('9','10','#','') f3.add_arc('7','11','i','') f3.add_arc('11','12','n','') f3.add_arc('12','13','g','+present participle') f3.add_arc('13','10','#','') f3.add_arc('1','14','h','h') f3.add_arc('14','15','a','a') f3.add_arc('15','16','v','v') f3.add_arc('16','17','o','o') f3.add_arc('17','18','c','c') f3.add_arc('18','19','^','') f3.add_arc('19','20','e','') f3.add_arc('20','21','d','+past form') f3.add_arc('21','22','#','') f3.add_arc('19','23','i','') f3.add_arc('23','24','n','') f3.add_arc('24','25','g','+present participle') f3.add_arc('25','22','#','') f3.add_arc('1','26','l','l') f3.add_arc('26','27','i','i') f3.add_arc('27','28','c','c') f3.add_arc('28','29','k','k') f3.add_arc('29','30','^','') f3.add_arc('30','31','d','+past form') f3.add_arc('31','32','#','') f3.add_arc('30','33','n','') f3.add_arc('33','34','g','+present participle') f3.add_arc('34','32','#','') f3.add_arc('1','35','s','s') f3.add_arc('35','36','y','y') f3.add_arc('36','37','n','n') f3.add_arc('37','38','c','c') f3.add_arc('38','39','^','') f3.add_arc('39','40','d','+past form') f3.add_arc('40','41','#','') f3.add_arc('38','42','n','') f3.add_arc('42','43','g','+present participle') f3.add_arc('43','41','#','') f3.add_arc('1','44','w','w') f3.add_arc('44','45','a','a') f3.add_arc('45','46','n','n') f3.add_arc('46','47','t','t') f3.add_arc('47','48','^','') f3.add_arc('48','49','d','+past form') f3.add_arc('49','50','#','') f3.add_arc('47','51','n','') f3.add_arc('51','52','g','+present participle') f3.add_arc('52','50','#','') f3.set_final('10') f3.set_final('22') f3.set_final('32') f3.set_final('41') f3.set_final('50') output=''.join(fsmutils.compose(word,f2,f3)[0]) return output '''output = ['p','a','n','i','c','+past form']