Пример #1
0
def load_transcription(transcription_file_name):
    """
    :return: a list of tuple:
        [
        (word: string, phones: list),
        (word: string, phones: list),
        ...,
        (word: string, phones: list),
        ]
    """
    transcription_list = list()
    with open(transcription_file_name, "r") as transcription_file:
        while 1:
            lines = transcription_file.readlines(10000)
            if not lines:
                break
            for line in lines:
                line = line.strip()
                word = line.split("\t")[0]
                phones = line.split("\t")[1].split(" ")
                transcription_list.append((word, phones))
                pass
        pass
        transcription_list = transcription_list
        logging.debug("transcription_list:")
        logging.debug(transcription_list)
        return transcription_list
Пример #2
0
 def init_prob_matrix(self):
     """
     :return: matrix containing probabilities of a grapheme match a phoneme, initialized with 0 value.
     """
     g_count = len(self.grapheme_dict)
     p_count = len(self.phoneme_dict)
     self.prob_matrix = np.zeros(shape=(g_count, p_count), dtype=np.float32)
     logging.debug("prob_matrix:")
     logging.debug(self.prob_matrix)
     return self.prob_matrix
Пример #3
0
 def normalize_prob_matrix(self):
     """
     Probability matrix is a matrix with shape: (grapheme_count, phoneme_count).
     Normalization is to keep sum of each row in the matrix to 1.
     :return: a normalized probability matrix.
     """
     shape = self.prob_matrix.shape
     sum_array = np.sum(self.prob_matrix, axis=1)
     for i in range(shape[0]):
         for j in range(shape[1]):
             self.prob_matrix[i][j] /= sum_array[i]
             pass
         pass
     logging.debug("prob_matrix:")
     logging.debug(self.prob_matrix)
     return self.prob_matrix
Пример #4
0
def load_grapheme_dict(transcription_list):
    """
    :return: a dictionary of grapheme-id pair like: {"a": 0, "b": 1, "c": 2, ...,}
    """
    grapheme_set = set()
    for (word, _) in transcription_list:
        grapheme_set = grapheme_set.union(word)
        pass
    grapheme_list = list(grapheme_set)
    grapheme_dict = dict()
    for i in range(len(grapheme_list)):
        grapheme_dict[grapheme_list[i]] = i
        pass
    grapheme_dict = grapheme_dict
    logging.debug("grapheme_dict:")
    logging.debug(grapheme_dict)
    return grapheme_dict
Пример #5
0
def load_phoneme_dict(transcription_list):
    """
    :return: a dictionary of phoneme-id pair like: {"ey1":0, "b":1, "iy2": 2, "s": 3, "iy2": 4, ...,}
    """
    phoneme_set = set()
    for (_, phones) in transcription_list:
        phoneme_set = phoneme_set.union(phones)
        pass
    phoneme_list = list(phoneme_set)
    phoneme_list.append("*")
    phoneme_dict = dict()
    for i in range(len(phoneme_list)):
        phoneme_dict[phoneme_list[i]] = i
        pass
    phoneme_dict = phoneme_dict
    logging.debug("phoneme_dict:")
    logging.debug(phoneme_dict)
    return phoneme_dict
Пример #6
0
 def e_step(self):
     """
     Expectation step that computes a optimized path with maximum probability for each word-phones pair.
     :return: a list of align paths, like:
         [
             [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"), ],
             [("a", "ey1"), ("b", "b_iy10), ],
             [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"),  ],
             [("a", "ey1"), ("b", "b_iy10), ("c", "s_iy0"),  ("d", "d_iy0"), ],
         ]
     """
     align_paths = []
     for (word, phones) in self.transcription_list:
         pair_list = introduce_epsilon_phone_seq(word, phones)
         logging.debug("pair list:")
         logging.debug(pair_list)
         candidate_path_list = []  # Construct a candidate path list for all word-phones
         for (w, p) in pair_list:
             align_path, prob_value = self.dynamic_time_wrapping(w, p)
             candidate_path_list.append((align_path, prob_value))
         candidate_path_list.sort(key=lambda x: x[1], reverse=True)  # Sort by probability
         align_paths.append(candidate_path_list[0][0])  # Pick up the promising path with the biggest probability.
         pass
     return align_paths
Пример #7
0
 def reset_prob_matrix(self, align_paths):
     """
     Reset prob matrix according to align paths.
     :param align_paths: a list of step lists, like:
         [
             [
                 ("a", "ey1"),
                 ("b", "b_iy1"),
                 ...,
                 ("c", "s_iy1"),
             ],
             [
                 ("a", "ey1"),
                 ("b", "b_iy1"),
                 ...,
                 ("c", "s_iy1"),
             ],
             ...,
             [
                 ("a", "ey1"),
                 ("b", "b_iy1"),
                 ...,
                 ("c", "s_iy1"),
             ],
         ]
     :return: prob matrix
     """
     logging.debug("before reset prob matrix:")
     logging.debug(self.prob_matrix)
     for align_path in align_paths:
         for step in align_path:
             g_id = self.get_grapheme_id(step[0])
             p_id = self.get_phoneme_id(step[1])
             self.prob_matrix[g_id][p_id] += 1
             pass
         pass
     self.normalize_prob_matrix()
     logging.debug("after reset prob matrix:")
     logging.debug(self.prob_matrix)
     return self.prob_matrix