def getPhonScores(puns): b = BlickLoader() scoredPuns = {} for pun in puns: words = pun.split('#') del words[len(words) - 1] goodWords = [] for w in words: if "D I C T" not in w: goodWords.append(w) scoredPuns[pun] = sum(b.assessWord(w.strip()) for w in goodWords) return scoredPuns
def train_wakeword_model(audio_train_loader, vocab_list, label_model, beam_size=3, num_hypotheses=5, query_by_string=False): wakeword_model = {} if query_by_string: # load ww model produced by MFA from config keywords = config["wakeword_model"] # load blick b = BlickLoader() for i, _, y_hat in enumerate(keywords.items()): w = b.assessWord(y_hat) # for each keyword, append the tuple(hypotheses + weights) to the list # only one hypothesis if using MFA wakeword_model[i] = (y_hat, w) else: # train ww model from scratch for i in audio_train_loader: posteriors_i = label_model(i) # decode using CTC, vocab_list is A (labels) decoder = CTCBeamDecoder(self.vocab_list, beam_width=self.beam_size, blank_id=self.vocab_list.index('_')) beam, beam_scores, _, _ = decoder.decode(posteriors_i) for j in range(num_hypotheses): y_hat = beam[j] # hypothesis log_prob_post = beam_scores[j] w = log_prob_post**-1 # for each keyword, append the tuple(hypotheses + weights) to the list wakeword_model[i].append((y_hat, w)) return wakeword_model
if syl.nucleus in SHORT_VOWELS: return False if is_first and syl.onset and syl.onset[0] == "ZH": return False # if is_last and stress_lvl == 1 and len(syl.coda) == 0: # return False if syl.onset and syl.coda and syl.onset[0] == "S" and not syl.onset[ -1] == "T" and syl.nucleus in SHORT_VOWELS: if syl.coda[0] == syl.onset[-1]: return False if stress_lvl != 1 and syl.nucleus not in { "AH", "ER", "IH", "IY", "OW", "UW" }: return False return True blick_rater = BlickLoader() words = [] for i in range(100): word = getWord() score, rules = blick_rater.assessWord(word.replace(" ", " "), includeConstraints=True) score = exp(-score) words.append([word, score, rules]) for word, score, rules in sorted(words, key=itemgetter(1)): if score > 0.00001: print(word, score, rules) print()
from blick import BlickLoader parser = argparse.ArgumentParser( description='Add phonotactic probability to a file of phone strings.') parser.add_argument('filename', type=open, help='name of file of phone strings') parser.add_argument('-d', '--debug', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-c', '--constraints', action='store_true', default=False, help='whether or not debug mode should be activated') parser.add_argument('-g', '--grammar', type=str, choices=set(['HayesWhite', 'NoStress', 'default']), default='default', help='type of grammar to be used') args = parser.parse_args() argdict = vars(args) #print argdict b = BlickLoader(debug=argdict['debug'], grammarType=argdict['grammar']) b.assessFile(argdict['filename'].name, includeConstraints=argdict['constraints'])