Пример #1
0
    def test(self):
        index = 0
        next_idx = 20
        
        for index in range(10):
           next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch(
               next_idx,
               1,
               n_input,
               n_context,
               self.text_labels,
               self.wav_files,
               self.word_num_map)

           print('读入语音文件: ', wav_files[0])
           print('开始识别语音数据......')

           d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
           dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
           dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words)
        
           for orig, decoded_array in zip(dense_labels, dense_decoded):
               # 转成string
               decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words)
               print('语音原始文本: {}'.format(orig))
               print('识别出来的文本:  {}'.format(decoded_str))
               break

        self.sess.close()
Пример #2
0
    def test(self):
        index = 0
        next_idx = 20
        
        for index in range(20):
           next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch(
               next_idx,
               1,
               n_input,
               n_context,
               self.text_labels,
               self.wav_files,
               self.word_num_map)

           print('读入语音文件: ', wav_files[0])
           print('开始识别语音数据......')

           d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
           dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
           dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words)
        
           for orig, decoded_array in zip(dense_labels, dense_decoded):
               # 转成string
               decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words)
               print('语音的原始文本: {}'.format(orig))
               print('识别出来的文本: {}'.format(decoded_str))
               break

        self.sess.close()
Пример #3
0
    def train(self):
        epochs = 120
        # 准备运行训练步骤
        section = '\n{0:=^40}\n'
        print(section.format('开始训练'))

        train_start = time.time()
        for epoch in range(epochs):  # 样本集迭代次数
            epoch_start = time.time()
            if epoch < self.startepo:
                continue
            print("第:", epoch, " 次迭代,一共要迭代 ", epochs, "次")
            #######################run batch####
            n_batches_epoch = int(np.ceil(len(self.text_labels) / batch_size))
            print("在本次迭代中一共循环: ", n_batches_epoch, "每次取:", batch_size)
            train_cost = 0
            train_err = 0
            next_idx = 0
            for batch in range(n_batches_epoch):  # 一次batch_size,取多少次
                # 取数据
                # temp_next_idx, temp_audio_features, temp_audio_features_len, temp_sparse_labels
                next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch(
                    next_idx,
                    batch_size,
                    n_input,
                    n_context,
                    self.text_labels,
                    self.wav_files,
                    self.word_num_map)
                # 计算 avg_loss optimizer ;
                batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer], feed_dict=self.get_feed_dict())
                train_cost += batch_cost

                if (batch + 1) % 70 == 0:
                    rs = self.sess.run(self.merged, feed_dict=self.get_feed_dict())
                    self.writer.add_summary(rs, batch)
                    print('循环次数:', batch, '损失: ', train_cost / (batch + 1))
                    d, train_err = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
                    dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
                    dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words)
                    print('错误率: ', train_err)
                    for orig, decoded_array in zip(dense_labels, dense_decoded):
                        # convert to strings
                        decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words)
                        print('语音原始文本: {}'.format(orig))
                        print('识别出来的文本:  {}'.format(decoded_str))
                        break

            epoch_duration = time.time() - epoch_start

            log = '迭代次数 {}/{}, 训练损失: {:.3f}, 错误率: {:.3f}, time: {:.2f} sec'
            print(log.format(epoch, epochs, train_cost, train_err, epoch_duration))
            self.saver.save(self.sess, self.savedir + self.conf.get("FILE_DATA").savefile, global_step=epoch)

        train_duration = time.time() - train_start
        print('Training complete, total duration: {:.2f} min'.format(train_duration / 60))
        self.sess.close()
Пример #4
0
    def test_target_wav_file(self, wav_files, txt_labels):
        print('读入语音文件: ', wav_files[0])
        print('开始识别语音数据......')

        self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features(
            None, wav_files, n_input, n_context, self.word_num_map, txt_labels)
        self.sparse_labels = utils.sparse_tuple_from(text_vector)
        d, train_ler = self.sess.run([self.decoded[0], self.label_err],
                                     feed_dict=self.get_feed_dict(dropout=1.0))
        dense_decoded = tf.sparse_tensor_to_dense(
            d, default_value=-1).eval(session=self.sess)
        decoded_str = utils.trans_array_to_text_ch(dense_decoded[0],
                                                   self.words)
        print('语音原始文本: {}'.format(txt_labels[0]))
        print('识别出来的文本:  {}'.format(decoded_str))

        self.sess.close()
Пример #5
0
    def test_target_wav_file(self, wav_files, txt_labels):
        print('读入语音文件: ', wav_files[0])
        print('开始识别语音数据......')

        self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features(
            None,
            wav_files,
            n_input,
            n_context,
            self.word_num_map,
            txt_labels)
        self.sparse_labels = utils.sparse_tuple_from(text_vector)
        d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
        dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
        decoded_str = utils.trans_array_to_text_ch(dense_decoded[0], self.words)
        print('语音原始文本: {}'.format(txt_labels[0]))
        print('识别出来的文本:  {}'.format(decoded_str))

        self.sess.close()
Пример #6
0
    def train(self):
        epochs = 200

        # 准备运行训练步骤
        section = '\n{0:=^40}\n'
        tf.logging.info(section.format('开始训练'))

        train_start = time.time()
        for epoch in range(epochs):  # 样本集迭代次数
            epoch_start = time.time()
            if epoch < self.startepo:
                continue

            tf.logging.info("第" + str(epoch + 1) + "次迭代,一共要迭代" + str(epochs) +
                            "次")
            #######################run batch####
            n_batches_epoch = int(np.ceil(len(self.text_labels) / batch_size))
            tf.logging.info("在本次迭代中一共循环" + str(n_batches_epoch) + ",每次取" +
                            str(batch_size))

            train_cost = 0
            train_err = 0
            next_idx = 0

            for batch in range(n_batches_epoch):  # 一次batch_size,取多少次
                # 取数据
                # temp_next_idx, temp_audio_features, temp_audio_features_len, temp_sparse_labels
                tf.logging.info(
                    '%d/%d:%s', batch + 1, n_batches_epoch,
                    datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch(
                    next_idx,
                    batch_size,
                    n_input,
                    n_context,
                    self.text_labels,
                    self.wav_files,
                    self.word_num_map,
                    pick_deterministically=False)

                # 计算 avg_loss optimizer ;
                batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer],
                                              feed_dict=self.get_feed_dict())
                train_cost += batch_cost

                if (batch + 1) % 100 == 0:
                    #     rs = self.sess.run(self.merged, feed_dict=self.get_feed_dict())
                    #     self.writer.add_summary(rs, batch)

                    tf.logging.info('循环次数:' + str(batch + 1) + '损失:' +
                                    str(train_cost / (batch + 1)))

                    d, train_err = self.sess.run(
                        [self.decoded[0], self.label_err],
                        feed_dict=self.get_feed_dict(dropout=1.0))
                    dense_decoded = tf.sparse_tensor_to_dense(
                        d, default_value=-1).eval(session=self.sess)
                    dense_labels = utils.trans_tuple_to_texts_ch(
                        self.sparse_labels, self.words)

                    tf.logging.info('错误率:' + str(train_err))
                    for orig, decoded_array in zip(dense_labels,
                                                   dense_decoded):
                        # convert to strings
                        decoded_str = utils.trans_array_to_text_ch(
                            decoded_array, self.words)
                        tf.logging.info('语音原始文本:{}'.format(
                            orig.encode('utf-8')))
                        tf.logging.info('识别出来的文本:{}'.format(
                            decoded_str.encode('utf-8')))
                        break

            epoch_duration = time.time() - epoch_start

            log = '迭代次数 {}/{}, 训练损失:{:.3f}, 错误率:{:.3f}, time:{:.2f} sec'
            tf.logging.info(
                log.format(epoch + 1, epochs, train_cost, train_err,
                           epoch_duration))
            self.saver.save(self.sess,
                            self.savedir + self.conf.get("FILE_DATA").savefile,
                            global_step=epoch + 1)

        train_duration = time.time() - train_start
        tf.logging.info('Training complete, total duration:{:.2f} min'.format(
            train_duration / 60))
        self.sess.close()
Пример #7
0
    def train(self):
        epochs = 120

        # 准备运行训练步骤
        section = '\n{0:=^40}\n'
        print(section.format('开始训练'))

        train_start = time.time()
        for epoch in range(epochs):  # 样本集迭代次数
            epoch_start = time.time()
            if epoch < self.startepo:
                continue

            print("第:", epoch, " 次迭代,一共要迭代 ", epochs, "次")
            #######################run batch####
            n_batches_epoch = int(np.ceil(len(self.text_labels) / batch_size))
            print("在本次迭代中一共循环: ", n_batches_epoch, "每次取:", batch_size)

            train_cost = 0
            train_err = 0
            next_idx = 0

            for batch in range(n_batches_epoch):  # 一次batch_size,取多少次
                # 取数据
                # temp_next_idx, temp_audio_features, temp_audio_features_len, temp_sparse_labels
                next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch(
                    next_idx,
                    batch_size,
                    n_input,
                    n_context,
                    self.text_labels,
                    self.wav_files,
                    self.word_num_map)

                # 计算 avg_loss optimizer ;
                batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer], feed_dict=self.get_feed_dict())
                train_cost += batch_cost

                if (batch + 1) % 70 == 0:
                    rs = self.sess.run(self.merged, feed_dict=self.get_feed_dict())
                    self.writer.add_summary(rs, batch)

                    print('循环次数:', batch, '损失: ', train_cost / (batch + 1))

                    d, train_err = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
                    dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
                    dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words)

                    print('错误率: ', train_err)
                    for orig, decoded_array in zip(dense_labels, dense_decoded):
                        # convert to strings
                        decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words)
                        print('语音原始文本: {}'.format(orig))
                        print('识别出来的文本:  {}'.format(decoded_str))
                        break

            epoch_duration = time.time() - epoch_start

            log = '迭代次数 {}/{}, 训练损失: {:.3f}, 错误率: {:.3f}, time: {:.2f} sec'
            print(log.format(epoch, epochs, train_cost, train_err, epoch_duration))
            self.saver.save(self.sess, self.savedir + self.conf.get("FILE_DATA").savefile, global_step=epoch)

        train_duration = time.time() - train_start
        print('Training complete, total duration: {:.2f} min'.format(train_duration / 60))
        self.sess.close()