Пример #1
0
    def ctc_cost2(self):
        #data = self.data
        data = DataSpeech(self.datapath)
        data.LoadDataList('dev')
        x = next(data.data_genetator(32, self.AUDIO_LENGTH))
        [test_input_data, y, test_input_length, label_length], labels = x
        xx = [test_input_data, y, test_input_length, label_length]
        y_pred2 = self._model.predict(xx)

        _mean = sum(y_pred2) / len(y_pred2)
        print(y_pred2, _mean)
        return _mean
Пример #2
0
    def __init__(self, datapath):
        '''
		初始化
		默认输出的拼音的表示大小是1283,即1282个拼音+1个空白块
		'''
        MS_OUTPUT_SIZE = 1417
        self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE  # 神经网络最终输出的每一个字符向量维度的大小
        #self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch
        self.label_max_string_length = 64
        self.AUDIO_LENGTH = 1600
        self.AUDIO_FEATURE_LENGTH = 200

        self.datapath = datapath
        self.data = DataSpeech(datapath)

        self._model = self.CreateModel()
Пример #3
0
    def RecognizeSpeech(self, wavsignal, fs):
        '''
		最终做语音识别用的函数,识别一个wav序列的语音
		不过这里现在还有bug
		'''

        #data = self.data
        data = DataSpeech('E:\\语音数据集')
        data.LoadDataList('dev')
        # 获取输入特征
        #data_input = data.GetMfccFeature(wavsignal, fs)
        data_input = data.GetFrequencyFeature(wavsignal, fs)

        list_symbol_dic = data.list_symbol  # 获取拼音列表

        labels = [
            'dong1', 'bei3', 'jun1', 'de5', 'yi4', 'xie1', 'ai4', 'guo2',
            'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'li3', 'du4', 'tang2',
            'ju4', 'wu3', 'su1', 'bing3', 'ai4', 'deng4', 'tie3', 'mei2',
            'deng3', 'ye3', 'fen4', 'qi3', 'kang4', 'zhan4'
        ]
        #labels = [ list_symbol_dic[-1] ]
        #labels = [ list_symbol_dic[-1] ]
        #while(len(labels) < 32):
        #	labels.append(list_symbol_dic[-1])

        feat_out = []
        #print("数据编号",n_start,filename)
        for i in labels:
            if ('' != i):
                n = data.SymbolToNum(i)
                feat_out.append(n)

        print(feat_out)
        labels = feat_out

        x = next(
            self.data_gen(data_input=np.array(data_input),
                          data_labels=np.array(labels),
                          input_length=len(data_input),
                          labels_length=len(labels),
                          batch_size=2))

        [test_input_data, y, test_input_length, label_length], labels = x
        xx = [test_input_data, y, test_input_length, label_length]

        pred = self._model.predict(x=xx)

        print(pred)

        shape = pred[:, :].shape
        print(shape)

        print('test_input_data:', test_input_data)
        y_p = self.test_func([test_input_data])
        print(type(y_p))
        print('y_p:', y_p)

        for j in range(0, 0):
            mean = sum(y_p[0][0][j]) / len(y_p[0][0][j])
            print('max y_p:', max(y_p[0][0][j]), 'min y_p:', min(y_p[0][0][j]),
                  'mean y_p:', mean, 'mid y_p:', y_p[0][0][j][100])
            print('argmin:', np.argmin(y_p[0][0][j]), 'argmax:',
                  np.argmax(y_p[0][0][j]))
            count = 0
            for i in y_p[0][0][j]:
                if (i < mean):
                    count += 1
            print('count:', count)

        print(K.is_sparse(y_p))
        #y_p = K.to_dense(y_p)
        print(K.is_sparse(y_p))

        _list = []
        for i in y_p:
            list_i = []
            for j in i:
                list_j = []
                for k in j:
                    list_j.append(np.argmin(k))
                list_i.append(list_j)
            _list.append(list_i)

        #y_p = np.array(_list, dtype = np.float)
        y_p = _list
        #print(y_p,type(y_p),y_p.shape)
        #y_p = tf.sparse_to_dense(y_p,(2,397),1417,0)
        print(test_input_length.T)
        test_input_length = test_input_length.reshape(2, 1)
        func_in_len = self.test_func_input_length([test_input_length])
        print(type(func_in_len))
        print(func_in_len)
        #in_len = np.ones(shape[0]) * shape[1]
        ctc_decoded = K.ctc_decode(y_p[0][0],
                                   input_length=tf.squeeze(
                                       func_in_len[0][0][0]))

        print(ctc_decoded)
        #ctc_decoded = ctc_decoded[0][0]
        #out = K.get_value(ctc_decoded)[:,:64]
        #pred = self._model.predict_on_batch([data_input, labels_num, input_length, label_length])
        return pred[0][0]

        pass