示例#1
0
    def initialize_unit(self, unit_type='XIF'):
        """
        初始化基元
        :param unit_type: 基元类型
        :return:
        """
        unit_type_path = unit + unit_type
        self.__unit_type = unit_type
        if os.path.exists(unit_type_path) is False:
            raise FileExistsError('Error: 基元文件%s不存在' % unit_type)
        else:
            if os.path.exists(path_parameter + unit_type) is False:
                os.mkdir(path_parameter + unit_type)
        with open(unit_type_path) as f:
            u = f.readline()
            print('使用基元:', u)
            print('载入基元中...')
            while u:
                u = f.readline()
                if len(u) == 0:
                    break
                u = u.strip('\n').split(',')
                for i in range(len(u)):
                    '''状态集合'''
                    states = {_: u[i] for _ in range(self.__state_num)}
                    '''观测概率表示(GMM)'''
                    observations = ['GMM_probability']
                    '''状态转移矩阵'''
                    A = np.zeros((self.__state_num, self.__state_num))
                    '''开始状态,为虚状态,只允许向下一个状态转移'''
                    A[0][1] = 1.
                    for j in range(1, self.__state_num - 1):
                        for k in range(j, j + 2):
                            A[j][k] = 0.5
                    '''初始化GMM'''
                    gmm = [
                        Clustering.GMM(self.__vector_size, self.__mix_level)
                        for _ in range(self.__state_num - 2)
                    ]
                    '''初始化虚状态评分类'''
                    virtual_gmm_1 = AcousticModel.VirtualState(0.)
                    virtual_gmm_2 = AcousticModel.VirtualState(0.)

                    gmm.insert(0, virtual_gmm_1)
                    gmm.append(virtual_gmm_2)
                    '''生成hmm实例'''
                    lhmm = LHMM(states, observations, None, A=A, profunc=gmm)
                    '''数据结构:{基元:[训练次数,HMM],}'''
                    self.__unit[u[i]] = [0, lhmm]
        print('基元载入完成 √')
示例#2
0
        def generate(_unit):
            """"""
            '''状态集合'''
            states = {_: _unit for _ in range(self.__state_num)}
            '''观测概率表示(GMM)'''
            observations = ['GMM_probability']
            '''状态转移矩阵'''
            A = np.zeros((self.__state_num, self.__state_num))
            '''开始状态,为虚状态,只允许向下一个状态转移'''
            A[0][1] = 1.
            for j in range(1, self.__state_num - 1):
                for k in range(j, j + 2):
                    A[j][k] = 0.5
            '''创建基元文件夹'''
            unit_path = PARAMETERS_FILE_PATH + '/%s/%s' % (self.__unit_type,
                                                           _unit)
            if not os.path.exists(unit_path):
                os.mkdir(unit_path)
            '''''' '''''' ''''''
            log = Log(self.__unit_type, _unit, console=self.__console)
            if new_log:
                log.generate()
            else:
                log.append()
            '''初始化GMM'''
            gmm = [
                Clustering.GMM(self.__vector_size, self.__mix_level, log)
                for _ in range(self.__state_num - 2)
            ]
            '''初始化虚状态评分类'''
            virtual_gmm_1 = AcousticModel.VirtualState(0.)
            virtual_gmm_2 = AcousticModel.VirtualState(0.)

            gmm.insert(0, virtual_gmm_1)
            gmm.append(virtual_gmm_2)
            '''生成hmm实例'''
            lhmm = LHMM(states,
                        observations,
                        log,
                        T=None,
                        A=A,
                        profunc=gmm,
                        pi=None)
            '''数据结构:{基元:HMM,...}'''
            self.__unit[_unit] = lhmm
示例#3
0
 def viterbi(self, complex_states, complex_transmat, complex_prob,
             complex_pi):
     """
     维特比切分
     :param complex_states: 复合状态矩阵
     :param complex_transmat: 复合状态转移矩阵
     :param complex_prob: 复合观测矩阵
     :param complex_pi: 复合初始概率矩阵
     :return:
     """
     '''维特比强制对齐'''
     return LHMM.viterbi(self.log,
                         complex_states,
                         complex_transmat,
                         complex_prob,
                         complex_pi,
                         convert=True,
                         show_mark_state=True)
示例#4
0
    def multi_embedded_training_1(self, label, data, init, *args):
        """
        多进程嵌入式训练HMM
        :param label: 音频标注(基元为单位的列表[a,b,c,...])
        :param data: 音频数据
        :param init: 是否初始化
        :param args: 其他参数(show_q、当前处理音频数、总音频数、fix_code)
        :return:
        """
        hmm_list = []
        data_list = [data]
        data_t_list = [len(data)]
        '''为每个HMM计算观测概率密度'''
        for unit in label:
            hmm = self.init_unit(unit=unit, new_log=init)
            self.init_parameter(unit, hmm=hmm)
            hmm_list.append(hmm)
            hmm.cal_observation_pro(data_list, data_t_list)
            hmm.clear_data()
        '''生成嵌入式HMM'''
        complex_states, complex_transmat, complex_prob, complex_pi = self.embedded(
            label, hmm_list, 0, 15)

        embed_hmm = LHMM(complex_states,
                         self.__state_num,
                         self.log,
                         transmat=complex_transmat,
                         probmat=[complex_prob],
                         pi=complex_pi,
                         hmm_list=hmm_list,
                         fix_code=args[3])
        embed_hmm.add_data(data_list)
        embed_hmm.add_T(data_t_list)
        embed_hmm.baulm_welch(show_q=args[0])

        for index in range(len(label)):
            self.__save_acc(label[index], hmm_list[index])
        self.log.note('当前已处理音频:%d / %d' % (args[1], args[2]), cls='i')
        '''关闭日志'''
        self.log.close()
示例#5
0
 def viterbi(self, label, data_size, data_index):
     """
     维特比切分
     :param label: 标注
     :param data_size: 数据长度 
     :param data_index: 数据索引
     :return: 
     """
     complex_states, complex_observation, complex_A, complex_B, complex_π = self.embedded(
         label, data_index, 31)
     '''维特比强制对齐'''
     return LHMM.viterbi(complex_states,
                         complex_observation,
                         complex_A,
                         complex_B,
                         complex_π,
                         O_size=data_size,
                         matrix=False,
                         convert=True,
                         end_state_back=False)
示例#6
0
    def init_unit(self, unit, new_log=True, fix_code=0):
        """
        初始化基元,生成基元的复合数据结构
        :param unit: 初始化指定基元
        :param new_log: 是否删除先前日志
        :param fix_code: 关闭参数更新,000=0 001=1 010=2 100=4...
        :return:
        """
        """"""
        '''状态集合'''
        states = {_: unit for _ in range(self.__state_num)}
        '''状态转移矩阵'''
        transmat = np.zeros((self.__state_num, self.__state_num))
        '''开始状态,为虚状态,只允许向下一个状态转移'''
        transmat[0][1] = 1.
        for j in range(1, self.__state_num - 1):
            transmat[j][j] = 0.5  # 第一个转移概率
            transmat[j][j + 1] = 0.5  # 第二个转移概率
        '''创建基元文件夹'''
        unit_path = PARAMETERS_FILE_PATH + '/%s/%s' % (self.__unit_type, unit)
        log_hmm_path = unit_path + '/HMM'
        log_gmm_path = [
            unit_path + '/GMM_%d' % gmm_id
            for gmm_id in range(self.__state_num - 2)
        ]
        try:
            os.mkdir(unit_path)
        except FileExistsError:
            pass
        try:
            os.mkdir(log_hmm_path)
        except FileExistsError:
            pass
        try:
            for gmm_id in range(self.__state_num - 2):
                os.mkdir(log_gmm_path[gmm_id])
        except FileExistsError:
            pass
        '''''' '''''' ''''''
        log_hmm = Log(self.__unit_type, log_hmm_path, console=self.__console)
        log_gmm = [
            Log(self.__unit_type,
                path=log_gmm_path[gmm_id],
                console=self.__console)
            for gmm_id in range(self.__state_num - 2)
        ]
        if new_log:
            log_hmm.generate()
            for gmm_id in range(self.__state_num - 2):
                log_gmm[gmm_id].generate()
        else:
            log_hmm.append()
            for gmm_id in range(self.__state_num - 2):
                log_gmm[gmm_id].append()
        '''初始化GMM'''
        gmm = []
        for gmm_id in range(self.__state_num - 2):
            gmm.append(
                Clustering.GMM(log_gmm[gmm_id],
                               dimension=self.__vector_size,
                               mix_level=self.__mix_level,
                               gmm_id=gmm_id))
        '''初始化虚状态评分类'''
        virtual_gmm_1 = AcousticModel.VirtualState(1.)
        virtual_gmm_2 = AcousticModel.VirtualState(0.)

        gmm.insert(0, virtual_gmm_1)
        gmm.append(virtual_gmm_2)
        '''生成hmm实例'''
        lhmm = LHMM(states,
                    self.__state_num,
                    log_hmm,
                    transmat=transmat,
                    profunc=gmm,
                    fix_code=fix_code)
        return lhmm