def load_transcription(transcription_file_name): """ :return: a list of tuple: [ (word: string, phones: list), (word: string, phones: list), ..., (word: string, phones: list), ] """ transcription_list = list() with open(transcription_file_name, "r") as transcription_file: while 1: lines = transcription_file.readlines(10000) if not lines: break for line in lines: line = line.strip() word = line.split("\t")[0] phones = line.split("\t")[1].split(" ") transcription_list.append((word, phones)) pass pass transcription_list = transcription_list logging.debug("transcription_list:") logging.debug(transcription_list) return transcription_list
def init_prob_matrix(self): """ :return: matrix containing probabilities of a grapheme match a phoneme, initialized with 0 value. """ g_count = len(self.grapheme_dict) p_count = len(self.phoneme_dict) self.prob_matrix = np.zeros(shape=(g_count, p_count), dtype=np.float32) logging.debug("prob_matrix:") logging.debug(self.prob_matrix) return self.prob_matrix
def normalize_prob_matrix(self): """ Probability matrix is a matrix with shape: (grapheme_count, phoneme_count). Normalization is to keep sum of each row in the matrix to 1. :return: a normalized probability matrix. """ shape = self.prob_matrix.shape sum_array = np.sum(self.prob_matrix, axis=1) for i in range(shape[0]): for j in range(shape[1]): self.prob_matrix[i][j] /= sum_array[i] pass pass logging.debug("prob_matrix:") logging.debug(self.prob_matrix) return self.prob_matrix
def load_grapheme_dict(transcription_list): """ :return: a dictionary of grapheme-id pair like: {"a": 0, "b": 1, "c": 2, ...,} """ grapheme_set = set() for (word, _) in transcription_list: grapheme_set = grapheme_set.union(word) pass grapheme_list = list(grapheme_set) grapheme_dict = dict() for i in range(len(grapheme_list)): grapheme_dict[grapheme_list[i]] = i pass grapheme_dict = grapheme_dict logging.debug("grapheme_dict:") logging.debug(grapheme_dict) return grapheme_dict
def load_phoneme_dict(transcription_list): """ :return: a dictionary of phoneme-id pair like: {"ey1":0, "b":1, "iy2": 2, "s": 3, "iy2": 4, ...,} """ phoneme_set = set() for (_, phones) in transcription_list: phoneme_set = phoneme_set.union(phones) pass phoneme_list = list(phoneme_set) phoneme_list.append("*") phoneme_dict = dict() for i in range(len(phoneme_list)): phoneme_dict[phoneme_list[i]] = i pass phoneme_dict = phoneme_dict logging.debug("phoneme_dict:") logging.debug(phoneme_dict) return phoneme_dict
def e_step(self): """ Expectation step that computes a optimized path with maximum probability for each word-phones pair. :return: a list of align paths, like: [ [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"), ], [("a", "ey1"), ("b", "b_iy10), ], [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"), ], [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"), ("d", "d_iy0"), ], ] """ align_paths = [] for (word, phones) in self.transcription_list: pair_list = introduce_epsilon_phone_seq(word, phones) logging.debug("pair list:") logging.debug(pair_list) candidate_path_list = [] # Construct a candidate path list for all word-phones for (w, p) in pair_list: align_path, prob_value = self.dynamic_time_wrapping(w, p) candidate_path_list.append((align_path, prob_value)) candidate_path_list.sort(key=lambda x: x[1], reverse=True) # Sort by probability align_paths.append(candidate_path_list[0][0]) # Pick up the promising path with the biggest probability. pass return align_paths
def reset_prob_matrix(self, align_paths): """ Reset prob matrix according to align paths. :param align_paths: a list of step lists, like: [ [ ("a", "ey1"), ("b", "b_iy1"), ..., ("c", "s_iy1"), ], [ ("a", "ey1"), ("b", "b_iy1"), ..., ("c", "s_iy1"), ], ..., [ ("a", "ey1"), ("b", "b_iy1"), ..., ("c", "s_iy1"), ], ] :return: prob matrix """ logging.debug("before reset prob matrix:") logging.debug(self.prob_matrix) for align_path in align_paths: for step in align_path: g_id = self.get_grapheme_id(step[0]) p_id = self.get_phoneme_id(step[1]) self.prob_matrix[g_id][p_id] += 1 pass pass self.normalize_prob_matrix() logging.debug("after reset prob matrix:") logging.debug(self.prob_matrix) return self.prob_matrix