def iter_asr(set_name, set_train_mode, set_rr):
            assert set_name in ['train', 'dev', 'test']
            rr = set_rr
            rr = sort_reverse(rr, feat_len[set_name])
            rr_key = feat_iterator[set_name].get_key_by_index(rr)
            curr_feat_list = feat_iterator[set_name].get_feat_by_key(rr_key)
            curr_text_list = text_iterator[set_name].get_text_by_key(rr_key)

            curr_feat_mat, curr_feat_len, curr_text_mat, curr_text_len = batch_speech_text(
                opts['gpu'], curr_feat_list, curr_text_list)

            _loss, _acc = fn_batch_asr(model_asr,
                                       curr_feat_mat,
                                       curr_feat_len,
                                       curr_text_mat,
                                       curr_text_len,
                                       train_step=set_train_mode,
                                       coeff_loss=opts['coeff_pair'])
            _loss /= opts['coeff_pair']
            assert_nan(_loss)
            _count = len(rr)
            m_asr_loss[set_name] += _loss * _count
            m_asr_acc[set_name] += _acc * _count
            m_asr_count[set_name] += _count
            if tf_writer is not None:
                auto_writer_info_asr(set_name, _loss, _acc)
        def iter_tts(set_name, set_train_mode, set_rr):
            assert set_name in ['train', 'dev', 'test']
            rr = set_rr
            rr = sort_reverse(rr, text_len[set_name])
            rr_key = text_iterator[set_name].get_key_by_index(rr)
            curr_feat_list = feat_iterator[set_name].get_feat_by_key(rr_key)
            curr_text_list = text_iterator[set_name].get_text_by_key(rr_key)
            if model_tts.TYPE == TacotronType.MULTI_SPEAKER:
                curr_spkvec_list = feat_spkvec_iterator.get_feat_by_key(rr_key)
                curr_aux_info = {'speaker_vector': curr_spkvec_list}
            else:
                curr_aux_info = None
            curr_feat_mat, curr_feat_len, curr_text_mat, curr_text_len = batch_speech_text(
                opts['gpu'],
                curr_feat_list,
                curr_text_list,
                feat_sil=feat_sil,
                group=opts['tts_group'],
                start_sil=1,
                end_sil=opts['tts_pad_sil'])
            _loss, _loss_feat, _loss_bce_fend, _loss_spk_emb, _acc_fend = fn_batch_tts(
                model_tts,
                curr_text_mat,
                curr_text_len,
                curr_feat_mat,
                curr_feat_len,
                aux_info=curr_aux_info,
                train_step=set_train_mode,
                coeff_loss=opts['coeff_pair'])
            _loss /= opts['coeff_pair']
            assert_nan(_loss)
            _count = len(rr)
            m_tts_loss[set_name] += _loss * _count
            m_tts_loss_feat[set_name] += _loss_feat * _count
            m_tts_loss_bce[set_name] += _loss_bce_fend * _count
            m_tts_loss_spk_emb[set_name] += _loss_spk_emb * _count
            m_tts_acc[set_name] += _acc_fend * _count
            m_tts_count[set_name] += _count

            if tf_writer is not None:
                auto_writer_info_tts(set_name, _loss, _loss_feat,
                                     _loss_bce_fend, _loss_spk_emb, _acc_fend)
示例#3
0
                    curr_key_list)
                if opts['strip_sil']:
                    curr_feat_list = list_feat_sil_strip(curr_feat_list)
                curr_label_list = text_iterator[set_name].get_text_by_key(
                    curr_key_list)
                aux_info = None
                if model.TYPE == TacotronType.MULTI_SPEAKER:
                    curr_spkvec_list = feat_spkvec_iterator.get_feat_by_key(
                        curr_key_list)
                    aux_info = {'speaker_vector': curr_spkvec_list}

                # print(1, timeit.default_timer() - tic); tic = timeit.default_timer()
                feat_mat, feat_len, text_mat, text_len = batch_speech_text(
                    opts['gpu'],
                    curr_feat_list,
                    curr_label_list,
                    feat_sil=feat_sil,
                    group=opts['group'],
                    start_sil=1,
                    end_sil=opts['pad_sil'])
                # print(2, timeit.default_timer() - tic); tic = timeit.default_timer()
                _tmp_loss, _tmp_loss_feat, _tmp_loss_bernend, _tmp_acc_bernend = fn_batch(
                    text_mat,
                    text_len,
                    feat_mat,
                    feat_len,
                    aux_info=aux_info,
                    train_step=set_train_mode)
                # print(3, timeit.default_timer() - tic); tic = timeit.default_timer()
                _tmp_count = len(rr)
                assert_nan(_tmp_loss)
                mloss[set_name] += _tmp_loss * _tmp_count
示例#4
0
        for set_name, set_rr, set_train_mode in [('train', train_rr, True),
                                                 ('dev', dev_rr, False),
                                                 ('test', test_rr, False)]:
            for rr in tqdm_wrapper(set_rr):
                start = time.time()
                curr_key_list = feat_iterator[set_name].get_key_by_index(rr)
                curr_feat_list = feat_iterator[set_name].get_feat_by_key(
                    curr_key_list)
                curr_label_list = text_iterator[set_name].get_text_by_key(
                    curr_key_list)
                assert (feat_iterator[set_name].get_key_by_index(
                    rr) == text_iterator[set_name].get_key_by_index(rr)
                        ), 'key(s) not same'
                # print(1, start-time.time()); start = time.time()
                feat_mat, feat_len, text_mat, text_len = batch_speech_text(
                    opts['gpu'], curr_feat_list, curr_label_list)
                # print(2, start-time.time()); start = time.time()
                _tmp_loss, _tmp_acc = fn_batch(feat_mat,
                                               feat_len,
                                               text_mat,
                                               text_len,
                                               train_step=set_train_mode)
                # print(3, start-time.time()); start = time.time()
                _tmp_count = len(rr)
                assert_nan(_tmp_loss)
                mloss[set_name] += _tmp_loss * _tmp_count
                macc[set_name] += _tmp_acc * _tmp_count
                mcount[set_name] += _tmp_count
            pass
        info_header = ['set', 'loss', 'acc']
        info_table = []
示例#5
0
                max_target=opts['max_target'])
        elif opts['mode'] == 'tf':
            curr_key_list = text_iterator.get_key_by_index(rr)
            curr_text_list = text_iterator.get_text_by_key(curr_key_list)
            curr_feat_list = data_iterator.get_feat_by_key(curr_key_list)
            if model.TYPE == TacotronType.MULTI_SPEAKER:
                _spk_vec = np.stack(
                    feat_spkvec_iterator.get_feat_by_key(
                        curr_key_list)).astype('float32')
                _spk_vec = Variable(
                    tensorauto(opts['gpu'], torch.from_numpy(_spk_vec)))
                aux_info = {'speaker_vector': _spk_vec}
            feat_mat, feat_len, text_mat, text_len = batch_speech_text(
                opts['gpu'],
                curr_feat_list,
                curr_text_list,
                feat_sil=feat_sil,
                group=group,
                start_sil=1,
                end_sil=0)
            pred_feat, pred_len, pred_att = generator_speech.decode_greedy_tf_torch(
                model,
                Variable(text_mat),
                text_len,
                Variable(feat_mat),
                feat_len,
                group=group,
                feat_sil=feat_sil,
                aux_info=aux_info)
            pred_len = data_iterator.get_feat_length_by_key(
                text_iterator.get_key_by_index(rr))