Exemplo n.º 1
0
    def recognize(self, filename, lang_id):
        # recognize a single file

        assert str(filename).endswith(
            '.wav'), "only wave file is supported in allosaurus"

        # load wav audio
        audio = read_audio(filename)

        # extract feature
        feat = self.pm.compute(audio)

        # add batch dim
        feats = np.expand_dims(feat, 0)
        feat_len = np.array([feat.shape[0]], dtype=np.int32)

        tensor_batch_feat, tensor_batch_feat_len = move_to_tensor(
            [feats, feat_len], self.config.device_id)

        tensor_batch_lprobs = self.am(tensor_batch_feat, tensor_batch_feat_len)

        if self.config.device_id >= 0:
            batch_lprobs = tensor_batch_lprobs.cpu().detach().numpy()
        else:
            batch_lprobs = tensor_batch_lprobs.detach().numpy()

        token = self.lm.compute(batch_lprobs[0], lang_id)
        return token
Exemplo n.º 2
0
    def recognize(self, filename, lang_id='ipa', topk=1, emit=1.0, timestamp=False):
        # recognize a single file

        # filename check (skipping for BytesIO objects)
        if not isinstance(filename, BytesIO):
            assert str(filename).endswith('.wav'), "only wave file is supported in allosaurus"

        # load wav audio
        audio = read_audio(filename)

        # extract feature
        feat = self.pm.compute(audio)

        # add batch dim
        feats = np.expand_dims(feat, 0)
        feat_len = np.array([feat.shape[0]], dtype=np.int32)

        tensor_batch_feat, tensor_batch_feat_len = move_to_tensor([feats, feat_len], self.config.device_id)

        tensor_batch_lprobs = self.am(tensor_batch_feat, tensor_batch_feat_len)

        if self.config.device_id >= 0:
            batch_lprobs = tensor_batch_lprobs.cpu().detach().numpy()
        else:
            batch_lprobs = tensor_batch_lprobs.detach().numpy()

        token = self.lm.compute(batch_lprobs[0], lang_id, topk, emit=emit, timestamp=timestamp)
        return token
Exemplo n.º 3
0
def prepare_feature(data_path, model):

    model_path = Path(__file__).parent.parent / 'pretrained' / model

    # create pm (pm stands for preprocess model: audio -> feature etc..)
    pm = read_pm(model_path, None)

    # data path should be pointing the absolute path
    data_path = data_path.absolute()

    # writer for feats
    feat_writer = KaldiWriter(data_path / 'feat')

    # writer for the shape of each utterance
    # format: utt_id shape[0] shape[1]
    shape_writer = open(data_path / 'shape', 'w')

    for line in tqdm(
            open(data_path / 'wave', 'r', encoding='utf-8').readlines()):
        fields = line.strip().split()
        utt_id = fields[0]
        audio_path = fields[1]

        assert Path(audio_path).exists(), audio_path + " does not exist!"

        audio = read_audio(audio_path)

        # extract feature
        feat = pm.compute(audio)

        # write shape
        shape_writer.write(f'{utt_id} {feat.shape[0]} {feat.shape[1]}\n')

        feat_writer.write(utt_id, feat)

    feat_writer.close()
    shape_writer.close()
Exemplo n.º 4
0
    def recognize(self, filename, lang_id):

        # load wav audio
        audio = read_audio(filename)

        # extract feature
        feat = self.pm.compute(audio)

        # add batch dim
        feats = np.expand_dims(feat, 0)
        feat_len = np.array([feat.shape[0]], dtype=np.int32)

        tensor_batch_feat, tensor_batch_feat_len = move_to_tensor(
            [feats, feat_len], self.config.device_id)

        tensor_batch_lprobs = self.am(tensor_batch_feat, tensor_batch_feat_len)

        if self.config.device_id >= 0:
            batch_lprobs = tensor_batch_lprobs.cpu().detach().numpy()
        else:
            batch_lprobs = tensor_batch_lprobs.detach().numpy()

        token = self.lm.compute(batch_lprobs[0], lang_id)
        return token