Exemplo n.º 1
0
class ASR():
    def __init__(self, am_config, lm_config):

        self.am = AM(am_config)
        self.am.load_model(False)

        self.lm = LM(lm_config)
        self.lm.load_model(False)

    def decode_am_result(self, result):
        return self.am.decode_result(result)

    def stt(self, wav_path):

        am_result = self.am.predict(wav_path)
        if self.am.model_type == 'Transducer':
            am_result = self.decode_am_result(am_result[1:-1])
            lm_result = self.lm.predict(am_result)
            lm_result = self.lm.decode(lm_result[0].numpy(),
                                       self.lm.lm_featurizer)
        else:
            am_result = self.decode_am_result(am_result[0])
            lm_result = self.lm.predict(am_result)
            lm_result = self.lm.decode(lm_result[0].numpy(),
                                       self.lm.lm_featurizer)
        return am_result, lm_result

    def am_test(self, wav_path):
        # am_result is token id
        am_result = self.am.predict(wav_path)
        # token to vocab
        if self.am.model_type == 'Transducer':
            am_result = self.decode_am_result(am_result[1:-1])
        else:
            am_result = self.decode_am_result(am_result[0])
        return am_result

    def lm_test(self, txt):
        if self.lm.config['am_token']['for_multi_task']:
            pys = pypinyin.pinyin(txt, 8, neutral_tone_with_five=True)
            input_py = [i[0] for i in pys]

        else:
            pys = pypinyin.pinyin(txt)
            input_py = [i[0] for i in pys]

        # now lm_result is token id
        lm_result = self.lm.predict(input_py)
        # token to vocab
        lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer)
        return lm_result
Exemplo n.º 2
0
class ASR():
    def __init__(self, am_config):

        self.am = AM(am_config)
        self.am.load_model(False)

    def decode_am_result(self, result):
        return self.am.decode_result(result)

    def am_test(self, wav_path):
        # am_result is token id
        am_result = self.am.predict(wav_path)
        # token to vocab
        if self.am.model_type == 'Transducer':
            am_result = self.decode_am_result(am_result[1:-1])
        else:
            am_result = self.decode_am_result(am_result[0])
        return am_result
Exemplo n.º 3
0
class ASR():
    def __init__(self, am_config, lm_config):

        self.am = AM(am_config)
        self.am.load_model(False)

        self.lm = LM(lm_config)
        self.lm.load_model()

    def decode_am_result(self, result):
        return self.am.decode_result(result[0])

    def stt(self, wav_path):

        am_result = self.am.predict(wav_path)

        lm_result = self.lm.predict(self.decode_am_result(am_result))

        return am_result, lm_result