Exemplo n.º 1
0
def read_recognizer(inference_config_or_name='latest'):

    # download specified model automatically if no model exists
    if len(get_all_models()) == 0:
        download_model('latest')

    # create default config if input is the model's name
    if isinstance(inference_config_or_name, str):
        model_name = resolve_model_name(inference_config_or_name)
        inference_config = Namespace(model=model_name, device_id=-1, lang='ipa', approximate=False)
    else:
        assert isinstance(inference_config_or_name, Namespace)
        inference_config = inference_config_or_name

    model_path = Path(__file__).parent / 'pretrained' / inference_config.model

    if inference_config.model == 'latest' and not model_path.exists():
        download_model(inference_config)

    assert model_path.exists(), f"{inference_config.model} is not a valid model"

    # create pm (pm stands for preprocess model: audio -> feature etc..)
    pm = read_pm(model_path, inference_config)

    # create am (acoustic model: feature -> logits )
    am = read_am(model_path, inference_config)

    # create lm (language model: logits -> phone)
    lm = read_lm(model_path, inference_config)

    return Recognizer(pm, am, lm, inference_config)
Exemplo n.º 2
0
def read_recognizer(inference_config):

    model_path = Path(__file__).parent / 'pretrained' / inference_config.model

    if inference_config.model == 'latest' and not model_path.exists():
        download_model(inference_config)

    assert model_path.exists(
    ), f"{inference_config.model} is not a valid model"

    # create pm (pm stands for preprocess model: audio -> feature etc..)
    pm = read_pm(model_path, inference_config)

    # create am (acoustic model: feature -> logits )
    am = read_am(model_path, inference_config)

    # create lm (language model: logits -> phone)
    lm = read_lm(model_path, inference_config)

    return Recognizer(pm, am, lm, inference_config)
Exemplo n.º 3
0
def prepare_feature(data_path, model):

    model_path = Path(__file__).parent.parent / 'pretrained' / model

    # create pm (pm stands for preprocess model: audio -> feature etc..)
    pm = read_pm(model_path, None)

    # data path should be pointing the absolute path
    data_path = data_path.absolute()

    # writer for feats
    feat_writer = KaldiWriter(data_path / 'feat')

    # writer for the shape of each utterance
    # format: utt_id shape[0] shape[1]
    shape_writer = open(data_path / 'shape', 'w')

    for line in tqdm(
            open(data_path / 'wave', 'r', encoding='utf-8').readlines()):
        fields = line.strip().split()
        utt_id = fields[0]
        audio_path = fields[1]

        assert Path(audio_path).exists(), audio_path + " does not exist!"

        audio = read_audio(audio_path)

        # extract feature
        feat = pm.compute(audio)

        # write shape
        shape_writer.write(f'{utt_id} {feat.shape[0]} {feat.shape[1]}\n')

        feat_writer.write(utt_id, feat)

    feat_writer.close()
    shape_writer.close()