def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for i, _ in enumerate(test_set.wordlist): logL_all = dict() X, lengths = test_set.get_item_Xlengths(i) for word, model in models.items(): try: logL = model.score(X, lengths) logL_all[word] = logL except (ValueError, AttributeError): continue probabilities.append(logL_all) guesses.append(max(logL_all, key=logL_all.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in range(0, test_set.num_items): word_probabilities_for_item = dict() x_item, length_item = test_set.get_item_Xlengths(item) for word, model in models.items(): word_probabilities_for_item[word] = score_model( model, x_item, length_item) probabilities.append(word_probabilities_for_item) for prob_dict in probabilities: guesses.append([(k, v) for k, v in prob_dict.items() if v == max(prob_dict.values())][0][0]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(0, len(test_set.get_all_Xlengths())): X, lengths = test_set.get_item_Xlengths(i) test_word_probs = {} for word, model in models.items(): try: test_word_probs[word] = model.score(X, lengths) except Exception: test_word_probs[word] = float(-100000000000000000) continue probabilities.append(test_word_probs) guesses.append(max(test_word_probs, key=test_word_probs.get)) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as scores, guesses both lists are ordered by the test set word_id scores is a list of dictionaries where each key is a word and value is Log Liklihood [{'SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {'SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) scores = [{} for _ in range(test_set.num_items)] guesses = ["" for _ in range(test_set.num_items)] for i in range(test_set.num_items): X, lengths = test_set.get_item_Xlengths(i) best_score = float("-inf") best_word = "" for word, model in models.items(): try: score = model.score(X, lengths) scores[i][word] = score if score > best_score: best_score = score best_word = word except: pass guesses[i] = best_word return scores, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for ind, word in enumerate(test_set.wordlist): X, lengths = test_set.get_item_Xlengths(ind) word_guess = {} for guess_word, model in models.items(): try: logL = model.score(X, lengths) except: logL = float("-inf") word_guess[guess_word] = logL probabilities.append(word_guess) guesses.append(max(word_guess.keys(), key=lambda w: word_guess[w])) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id, val in test_set.get_all_Xlengths().items(): current_sequence, current_lengths = test_set.get_item_Xlengths(word_id) log_likelihoods = {} for word, model in models.items(): try: LogLvalue = model.score(current_sequence, current_lengths) log_likelihoods[word] = LogLvalue except: log_likelihoods[word] = float("-inf") continue probabilities.append(log_likelihoods) guesses.append(get_best_word(log_likelihoods)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for id in test_set.df.index: data = test_set.get_item_Xlengths(id) dict = {} for word in models: try: model = models[word] dict[word] = model.score(*data) except: continue probabilities.append(dict) guesses.append(max(dict.items(), key=lambda x: x[1])[0]) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in range(0, len(test_set.get_all_Xlengths())): word_probability = {} feature_sequences, lengths = test_set.get_item_Xlengths(word_id) for word, model in models.items(): try: score = model.score(feature_sequences, lengths) word_probability[word] = score except: pass probabilities.append(word_probability) guessed_word = max(word_probability, key=word_probability.get) #guesses.append(guessed_word) guesses.append( ''.join(c for c in guessed_word if c not in '0123456789')) #Get rid of digits end of recognized word return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # implement the recognizer for index in range(test_set.num_items): top_prob, top_word = float("-inf"), None word_probabilities = {} seq, lengths = test_set.get_item_Xlengths(index) for word, model in models.items(): try: word_probabilities[word] = model.score(seq, lengths) except Exception as e: word_probabilities[word] = float("-inf") if word_probabilities[word] > top_prob: top_prob, top_word = word_probabilities[word], word probabilities.append(word_probabilities) guesses.append(top_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # Implement the recognizer for word_select in [i for i, word in enumerate(test_set.wordlist)]: word_loc = dict() for key, model in models.items(): try: X, lengths = test_set.get_item_Xlengths(word_select) word_score = model.score(X, lengths) except: word_score = float("-inf") word_loc[key] = word_score probabilities.append(word_loc) guesses.append(max(word_loc, key=word_loc.get)) # return probabilities, guesses return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id in range(len(test_set.wordlist)): X, lengths = test_set.get_item_Xlengths(word_id) bestScore, bestGuess, dic = float('-inf'), None, {} for key, model in models.items(): try: dic[key] = model.score(X, lengths) except: dic[key] = float('-inf') if bestScore < dic[key]: bestScore = dic[key] bestGuess = key guesses.append(bestGuess) probabilities.append(dic) # return probabilities, guesses return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for v in test_set.sentences_index: for test_word in test_set.sentences_index[v]: probability = {} for model_word in models: model = models[model_word] X, lengths = test_set.get_item_Xlengths(test_word) try: probability[model_word] = model.score(X, lengths) except: probability[model_word] = -1000000 probabilities.append(probability) for probability in probabilities: guesses.append(max(probability.items(), key=lambda x: x[1])[0]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] len_all = len(test_set.get_all_Xlengths()) for word_id in range(0, len_all): x, lens = test_set.get_item_Xlengths(word_id) probs = {} for word, model in models.items(): try: logl = model.score(x, lens) probs[word] = logl except: probs[word] = float('-inf') probabilities.append(probs) w, _ = max(probs.items(), key=lambda x: x[1]) guesses.append(w) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for word_id in range(0, len(test_set.get_all_Xlengths())): X, lengths = test_set.get_item_Xlengths(word_id) likelihoods = {} for word, model in models.items(): try: the_score = models[word].score(X, lengths) except: the_score = float("-inf") likelihoods[word] = the_score probabilities.append(likelihoods) guess = max(probabilities[word_id], key = probabilities[word_id].get) guesses.append(guess) return (probabilities, guesses)
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(0, len(test_set.get_all_sequences())): prob_dict = {} X, lengths = test_set.get_item_Xlengths(i) for word, model in models.items(): try: logL = model.score(X, lengths) prob_dict[word] = logL except: prob_dict[word] = float('-inf') probabilities.append(prob_dict) guess = max([(max_log_value, max_word) for max_word, max_log_value in prob_dict.items()])[1] guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores, guess, maxScore = {}, None, None for word, model in models.items(): try: scores[word] = model.score(X, length) if maxScore is None or maxScore < scores[word]: maxScore, guess = scores[word], word except: scores[word] = None probabilities.append(scores) guesses.append(guess) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in range(test_set.num_items): X, Xlength = test_set.get_item_Xlengths(item=i) wordLogValues = {} # calculate for each word the log-likelihood for word, model in models.items(): try: wordLogValues[word] = model.score(X, Xlength) except Exception as error: wordLogValues[word] = float("-inf") # add word probabilities to the output list probabilities.append(wordLogValues) # extract best fit best_word = max(wordLogValues, key=wordLogValues.get) guesses.append(best_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for x in test_set.get_all_sequences(): temp_dict = {} for word,model in models.items(): try: X,lengths = test_set.get_item_Xlengths(x) temp_dict[word] = model.score(X,lengths) except: temp_dict[word] = float('-inf') if temp_dict: probabilities.append(dict(temp_dict)) guesses.append(max(temp_dict, key=temp_dict.get)) return probabilities,guesses raise NotImplementedError
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) probabilities = [] guesses = [] # TODO implement the recognizer for this_word in test_set.get_all_sequences(): X_test, lengths_test = test_set.get_item_Xlengths(this_word) logL = {} for this_key, this_model in models.items(): try: logL[this_key] = this_model.score(X_test, lengths_test) except ValueError: logL[this_key] = float("-inf") probabilities.append(logL) guesses.append(max(logL, key=logL.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer for i in range(test_set.num_items): word_probability = {} for word, model in models.items(): try: sequences, lengths = test_set.get_item_Xlengths(i) best_prob = model.score(sequences, lengths) except: best_prob = float("-inf") word_probability[word] = best_prob probabilities.append(word_probability) guesses.append(max(word_probability, key=word_probability.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in range(test_set.num_items): log_likelihoods = {} for word, model in models.items(): try: log_likelihoods[word] = model.score( *test_set.get_item_Xlengths(word_id)) except ValueError: log_likelihoods[word] = float("-inf") probabilities.append(log_likelihoods) guesses.append(max(log_likelihoods.items(), key=itemgetter(1))[0]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for word_id in range(len(test_set.get_all_Xlengths())): x, lengths = test_set.get_item_Xlengths(word_id) word_probabilities = {} for word, model in models.items(): try: log_l = model.score(x, lengths) word_probabilities[word] = log_l except (ValueError, AttributeError): continue probabilities.append(word_probabilities) top_word_probabilities = sorted(word_probabilities.items(), key=lambda item: item[1], reverse=True) guesses.append([guess for guess, score in top_word_probabilities][0]) #print(guesses[:10]) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] ##test_set.wordlist -> List of words for i, word in enumerate(test_set.wordlist): X, lengths = test_set.get_item_Xlengths(i) best_guess, guess_word = float("-inf"), None tmp = {} for key in models: try: tmp[key] = models[key].score(X, lengths) if tmp[key] > best_guess: best_guess = tmp[key] guess_word = key except: tmp[key] = float("-inf") continue probabilities.append(tmp) guesses.append(guess_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # iterate through word (or sentence?) for item, _ in test_set.get_all_Xlengths().items(): X, lengths = test_set.get_item_Xlengths(item) words_logL = {} for word, model in models.items(): try: words_logL[word] = model.score(X, lengths) except: words_logL[word] = float('-inf') probabilities.append(words_logL) guesses.append(max(words_logL, key=words_logL.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # for each word in the testing set for word_index, _ in test_set.get_all_Xlengths().items(): x, length = test_set.get_item_Xlengths(word_index) word_log_l_dict = {} # try the word on every model and score the probabilities of matching for word, model in models.items(): try: word_log_l_dict[word] = model.score(x, length) except: word_log_l_dict[word] = float("-inf") probabilities.append(word_log_l_dict) guesses.append(max(word_log_l_dict, key=word_log_l_dict.get)) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for i in test_set.get_all_Xlengths().items(): x, lens = test_set.get_item_Xlengths(i[0]) # Create a dict where key = word, value = log liklihood word_liklihoods = {} for word, model in models.items(): try: word_liklihoods[word] = model.score(x, lens) except: word_liklihoods[word] = float('-inf') probabilities.append(word_liklihoods) guesses.append(max(word_liklihoods, key=word_liklihoods.get)) # TODO implement the recognizer return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer # return probabilities, guesses valid_models = { descript: model for descript, model in models.items() if model != None } probabilities = [ get_word_probs(valid_models, *test_set.get_item_Xlengths(i)) for i, _ in enumerate(test_set.wordlist) ] guesses = [ max(word_probs.keys(), key=lambda word: word_probs[word]) for word_probs in probabilities ] return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # Implement the recognizer # return probabilities, guesses for test in range(test_set.num_items): top_prob, top_word = float("-inf"), None probs = {} sequence, lengths = test_set.get_item_Xlengths(test) for word, model in models.items(): try: probs[word] = model.score(sequence, lengths) except: probs[word] = float("-inf") if probs[word] > top_prob: top_prob, top_word = probs[word], word probabilities.append(probs) guesses.append(top_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) logging.basicConfig(level=logging.DEBUG) probabilities = [] guesses = [] # for each unknown word in the test set score it with each model provided. # Pick the one with the best fit for item in range(0,test_set.num_items): logging.debug("Recognizing sample {} with these sequences {}".format(item,test_set.get_item_Xlengths(item))) probs = dict() X, lengths = test_set.get_item_Xlengths(item) for word,model in models.items(): logging.debug(" Comparing to {}".format(word)) try: score = model.score(X, lengths) logging.debug(" Got this score {}:{}".format(word,score)) probs[word]=score except Exception as e: logging.warning("{} caught while scoring model for word {}: {}".format(type(e),word,e)) probs[word] = -math.inf pass probabilities.append(probs) if len(probs)==0: guesses.append("None") logging.debug(" No results found for item {}!".format(item)) else: best_guess=max(probs, key=lambda key: probs[key]) guesses.append(best_guess) logging.debug(" Best option {}:{}".format(best_guess, probs[best_guess])) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # TODO implement the recognizer transmat_fail=0 nb_scored=0 for test_item in range(test_set.num_items): #print("test item is : {}".format(test_item)) probabilities.append(dict()) for model_word in models: X,lengths=test_set.get_item_Xlengths(test_item) try: nb_scored=nb_scored+1 logL = models[model_word].score(X,lengths) except Exception as inst: if re.match("^rows of transmat_ must sum to 1.0",str(inst)): #Ok. That's a known issue... transmat_fail=transmat_fail+1 pass else: print("Exception {}\nSetting logL to -inf".format(inst)) # print # '-' * 60 # traceback.print_exc(file=sys.stdout) # print # '-' * 60 logL=float('-inf') probabilities[test_item][model_word]=logL max_logL=None for word in probabilities[test_item]: if max_logL is None: max_logL=probabilities[test_item][word] guess=word if probabilities[test_item][word] > max_logL: max_logL=probabilities[test_item][word] guess=word guesses.append(guess) print('Got {} transmat failed for a total of {} score calculations'.format(transmat_fail,nb_scored)) return probabilities, guesses raise NotImplementedError
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] # implement the recognizer for index in range(test_set.num_items): top_prob, top_word = float("-inf"), None word_probabilities = {} seq, lengths = test_set.get_item_Xlengths(index) for word, model in models.items(): try: word_probabilities[word] = model.score(seq, lengths) except Exception as e: word_probabilities[word] = float("-inf") if word_probabilities[word] > top_prob: top_prob, top_word = word_probabilities[word], word probabilities.append(word_probabilities) guesses.append(top_word) return probabilities, guesses
def recognize(models: dict, test_set: SinglesData): """ Recognize test word sequences from word models set :param models: dict of trained models {'SOMEWORD': GaussianHMM model object, 'SOMEOTHERWORD': GaussianHMM model object, ...} :param test_set: SinglesData object :return: (list, list) as probabilities, guesses both lists are ordered by the test set word_id probabilities is a list of dictionaries where each key a word and value is Log Liklihood [{SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, {SOMEWORD': LogLvalue, 'SOMEOTHERWORD' LogLvalue, ... }, ] guesses is a list of the best guess words ordered by the test set word_id ['WORDGUESS0', 'WORDGUESS1', 'WORDGUESS2',...] """ warnings.filterwarnings("ignore", category=DeprecationWarning) probabilities = [] guesses = [] for item in test_set.get_all_sequences(): X, length = test_set.get_item_Xlengths(item) scores = {} best_guess = None best_score = None for word, model in models.items(): try: scores[word] = model.score(X, length) if not best_score or best_score < scores[word]: best_score = scores[word] best_guess = word except: scores[word] = None probabilities.append(scores) guesses.append(best_guess) return probabilities, guesses