Пример #1
0
def allosaurus_featurize(wavfile):
    # load your model
    model = read_recognizer()
    labels = [
        'I', 'a', 'aː', 'ã', 'ă', 'b', 'bʲ', 'bʲj', 'bʷ', 'bʼ', 'bː', 'b̞',
        'b̤', 'b̥', 'c', 'd', 'dʒ', 'dʲ', 'dː', 'd̚', 'd̥', 'd̪', 'd̯', 'd͡z',
        'd͡ʑ', 'd͡ʒ', 'd͡ʒː', 'd͡ʒ̤', 'e', 'eː', 'e̞', 'f', 'fʲ', 'fʷ', 'fː',
        'g', 'gʲ', 'gʲj', 'gʷ', 'gː', 'h', 'hʷ', 'i', 'ij', 'iː', 'i̞', 'i̥',
        'i̯', 'j', 'k', 'kx', 'kʰ', 'kʲ', 'kʲj', 'kʷ', 'kʷʼ', 'kʼ', 'kː',
        'k̟ʲ', 'k̟̚', 'k͡p̚', 'l', 'lʲ', 'lː', 'l̪', 'm', 'mʲ', 'mʲj', 'mʷ',
        'mː', 'n', 'nj', 'nʲ', 'nː', 'n̪', 'n̺', 'o', 'oː', 'o̞', 'o̥', 'p',
        'pf', 'pʰ', 'pʲ', 'pʲj', 'pʷ', 'pʷʼ', 'pʼ', 'pː', 'p̚', 'q', 'r', 'rː',
        's', 'sʲ', 'sʼ', 'sː', 's̪', 't', 'ts', 'tsʰ', 'tɕ', 'tɕʰ', 'tʂ',
        'tʂʰ', 'tʃ', 'tʰ', 'tʲ', 'tʷʼ', 'tʼ', 'tː', 't̚', 't̪', 't̪ʰ', 't̪̚',
        't͡s', 't͡sʼ', 't͡ɕ', 't͡ɬ', 't͡ʃ', 't͡ʃʲ', 't͡ʃʼ', 't͡ʃː', 'u', 'uə',
        'uː', 'u͡w', 'v', 'vʲ', 'vʷ', 'vː', 'v̞', 'v̞ʲ', 'w', 'x', 'x̟ʲ', 'y',
        'z', 'zj', 'zʲ', 'z̪', 'ä', 'æ', 'ç', 'çj', 'ð', 'ø', 'ŋ', 'ŋ̟', 'ŋ͡m',
        'œ', 'œ̃', 'ɐ', 'ɐ̞', 'ɑ', 'ɑ̱', 'ɒ', 'ɓ', 'ɔ', 'ɔ̃', 'ɕ', 'ɕː', 'ɖ̤',
        'ɗ', 'ə', 'ɛ', 'ɛ̃', 'ɟ', 'ɡ', 'ɡʲ', 'ɡ̤', 'ɡ̥', 'ɣ', 'ɣj', 'ɤ', 'ɤɐ̞',
        'ɤ̆', 'ɥ', 'ɦ', 'ɨ', 'ɪ', 'ɫ', 'ɯ', 'ɯ̟', 'ɯ̥', 'ɰ', 'ɱ', 'ɲ', 'ɳ',
        'ɴ', 'ɵ', 'ɸ', 'ɹ', 'ɹ̩', 'ɻ', 'ɻ̩', 'ɽ', 'ɾ', 'ɾj', 'ɾʲ', 'ɾ̠', 'ʀ',
        'ʁ', 'ʁ̝', 'ʂ', 'ʃ', 'ʃʲː', 'ʃ͡ɣ', 'ʈ', 'ʉ̞', 'ʊ', 'ʋ', 'ʋʲ', 'ʌ', 'ʎ',
        'ʏ', 'ʐ', 'ʑ', 'ʒ', 'ʒ͡ɣ', 'ʔ', 'ʝ', 'ː', 'β', 'β̞', 'θ', 'χ', 'ә', 'ḁ'
    ]
    phone_transcript = model.recognize(wavfile).split()
    print(phone_transcript)
    features = list()
    for i in range(len(labels)):
        features.append(phone_transcript.count(labels[i]))

    return features, labels
Пример #2
0
                        help='specify your input wav file')

    args = parser.parse_args()

    # check file format
    assert args.input.endswith(
        '.wav'
    ), " Error: Please use a wav file. other audio files can be converted to wav by sox"

    # download specified model automatically if no model exists
    if len(get_all_models()) == 0:
        download_model('latest')

    # resolve model's name
    model_name = resolve_model_name(args.model)
    if model_name == "none":
        print(
            "Model ", model_name,
            " does not exist. Please download this model or use an existing model in list_model"
        )
        exit(0)

    args.model = model_name

    # create recognizer
    recognizer = read_recognizer(args)

    # run inference
    phones = recognizer.recognize(args.input, args.lang)

    print(phones)
Пример #3
0
 def test_latest_nonempty(self):
     audio_file = Path(__file__).parent.parent / 'sample.wav'
     model = read_recognizer('latest')
     results = model.recognize(audio_file)
     self.assertTrue(len(results) > 0)
Пример #4
0
 def test_eng_nonempty_external_model(self):
     audio_file = Path(__file__).parent.parent / 'sample.wav'
     model_path = Path(__file__).parent.parent / 'test_model'
     model = read_recognizer('latest', model_path)
     results = model.recognize(audio_file, 'eng')
     self.assertTrue(len(results) > 0)
Пример #5
0
import json
import pickle
import subprocess
from allosaurus.app import read_recognizer

model = read_recognizer()
folders = ['map', 'music', 'video', 'weather']
sets = ['train', 'development']


def convert(folder, set):
    all_phones = {}

    # get IDs from training set, and find matching recording and translate
    with open(f'../data/{folder}/{set}.json', 'r', encoding='utf-8') as f:
        entries = json.load(f)
        for entry in entries:
            dlg_id = entry['dlg_id']
            for utterence in entry['utterances']:
                wav_id = utterence['wav_id']
                try:
                    transcript = model.recognize(
                        f'../data/{folder}/audios/{wav_id}.wav')
                except Exception:
                    print('Problematic file:', wav_id)
                    continue
                phones_list = [phone for phone in transcript.split()]
                all_phones[wav_id] = phones_list

    with open(f'../data/{folder}/{set}_transcript.pkl', 'wb') as ff:
        pickle.dump(all_phones, ff)