示例#1
0
def transfer_am(train_config):
    """
    initialize the acoustic model with a pretrained model for fine-tuning

    :param model_path: path to the
    :return:
    """

    pretrained_model_path = get_model_path(train_config.pretrained_model)

    am_config = Namespace(**json.load(open(str(pretrained_model_path / 'am_config.json'))))

    assert am_config.model == 'allosaurus', "This project only support allosaurus model"

    # load inventory
    inventory = Inventory(pretrained_model_path)

    # get unit_mask which maps the full phone inventory to the target phone inventory
    unit_mask = inventory.get_mask(train_config.lang, approximation=True)

    # reset the new phone_size
    am_config.phone_size = len(unit_mask.target_unit)

    model = AllosaurusTorchModel(am_config)

    # load the pretrained model and setup the phone_layer with correct weights
    torch_load(model, str(pretrained_model_path / 'model.pt'), train_config.device_id, unit_mask)

    # update new model
    new_model = train_config.new_model

    # get its path
    model_path = get_model_path(new_model)

    # overwrite old am_config
    new_am_config_json = vars(am_config)
    json.dump(new_am_config_json, open(str(model_path / 'am_config.json'), 'w'), indent=4)

    # overwrite old phones
    write_unit(unit_mask.target_unit, model_path / 'phone.txt')

    # overwrite old model
    torch_save(model, model_path / 'model.pt')

    return model
示例#2
0
from allosaurus.lm.inventory import Inventory
from allosaurus.model import get_model_path
import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser('Update language inventory')
    parser.add_argument('-l', '--lang',  type=str, required=True, help='specify which language inventory to update.')
    parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory')
    parser.add_argument('-i', '--input', type=str, required=True, help='your new inventory file')

    args = parser.parse_args()

    model_path = get_model_path(args.model)

    inventory = Inventory(model_path)

    lang = args.lang

    # verify lang is not ipa as it is an alias to the entire inventory
    assert args.lang != 'ipa', "ipa is not a proper lang to update. use list_lang to find a proper language"

    assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list'

    new_unit_file = Path(args.input)

    # check existence of the file
    assert new_unit_file.exists(), args.input+' does not exist'

    # update this new unit
    inventory.update_unit(lang, new_unit_file)
示例#3
0
if __name__ == '__main__':

    parser = argparse.ArgumentParser('List language phone inventory')
    parser.add_argument(
        '-l',
        '--lang',
        type=str,
        default='ipa',
        help=
        'specify which language inventory to use for recognition. default "ipa" is to use all phone inventory'
    )
    parser.add_argument('-m',
                        '--model',
                        type=str,
                        default='latest',
                        help='specify which model inventory')

    args = parser.parse_args()
    model_path = get_model_path(args.model)

    inventory = Inventory(model_path)

    print("Available Languages")
    for lang_id, glotto_id, lang_name in zip(inventory.lang_ids,
                                             inventory.glotto_ids,
                                             inventory.lang_names):
        lang_name = lang_name.encode('ascii', 'ignore')
        print('- ISO639-3: ', lang_id, 'Glotto Code', glotto_id, ' name: ',
              lang_name)
示例#4
0
from pathlib import Path
from allosaurus.lm.inventory import Inventory
import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser('List language phone inventory')
    parser.add_argument('-l', '--lang', type=str, default='ipa', help='specify which language inventory to use for recognition. default "ipa" is to use all phone inventory')

    args = parser.parse_args()

    model_dir = Path(__file__).parent / 'pretrained'

    models = sorted(model_dir.glob('*'))
    if len(models) == 0:
        print("No models are available, you can maually download a model with download command or just run inference to download the latest one automatically")
        exit(0)

    inventory = Inventory(models[0])

    if args.lang == 'ipa':
        print(list(inventory.unit.id_to_unit.values())[1:])
    else:
        assert args.lang.lower() in inventory.lang_names, f'language {args.lang} is not supported. Please verify it is in the language list'

        unit = inventory.get_mask(args.lang.lower()).target_unit
        print(list(unit.id_to_unit.values())[1:])
示例#5
0
from pathlib import Path
from allosaurus.lm.inventory import Inventory
from allosaurus.model import get_model_path
import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser('List language phone inventory')
    parser.add_argument('-l', '--lang', type=str,  default='ipa',         help='specify which language inventory to use for recognition. default "ipa" is to use all phone inventory')
    parser.add_argument('-m', '--model', type=str, default='latest',     help='specify which model inventory')
    parser.add_argument('-a', '--approximate', type=bool, default=False, help='the phone inventory can still hardly to cover all phones. You can use turn on this flag to map missing phones to other similar phones to recognize. The similarity is measured with phonological features')

    args = parser.parse_args()

    model_path = get_model_path(args.model)

    inventory = Inventory(model_path)

    if args.lang == 'ipa':
        print(' '.join(list(inventory.unit.id_to_unit.values())[1:]))
    else:
        lang = args.lang
        assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list'

        mask = inventory.get_mask(args.lang.lower(), approximation=args.approximate)

        unit = mask.target_unit
        print(' '.join(list(unit.id_to_unit.values()))[1:])

        if args.approximate:
            mask.print_maps()
示例#6
0
from pathlib import Path
from allosaurus.lm.inventory import Inventory

if __name__ == '__main__':

    model_dir = Path(__file__).parent / 'pretrained'

    models = sorted(model_dir.glob('*'))
    if len(models) == 0:
        print("No models are available, you can maually download a model with download command or just run inference to download the latest one automatically")
        exit(0)

    inventory = Inventory(models[0])

    print("Available Languages")
    for lang_id, lang_name in zip(inventory.lang_ids, inventory.lang_names):
        print('- language id: ',lang_id, ' name: ', lang_name)
示例#7
0
                        '--lang',
                        type=str,
                        required=True,
                        help='specify which language inventory to update.')
    parser.add_argument('-m',
                        '--model',
                        type=str,
                        default='latest',
                        help='specify which model inventory')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        required=True,
                        help='write out your current phone file.')
    parser.add_argument('-f',
                        '--format',
                        type=str,
                        default='simple',
                        choices=['simple', 'kaldi'],
                        help='select your output format')

    args = parser.parse_args()

    model_path = get_model_path(args.model)

    inventory = Inventory(model_path)

    lang = args.lang

    unit = inventory.get_unit(lang)
    write_unit(unit, args.output, args.format)
示例#8
0
from pathlib import Path
from allosaurus.lm.inventory import Inventory
from allosaurus.model import get_model_path
import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser('Restore language inventory')
    parser.add_argument('-l', '--lang',  type=str, required=True, help='specify which language inventory to restore.')
    parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory')

    args = parser.parse_args()

    model_path = get_model_path(args.model)

    inventory = Inventory(model_path)

    lang = args.lang

    # verify lang is not ipa as it is an alias to the entire inventory
    assert args.lang != 'ipa', "ipa is not a proper lang to update. use list_lang to find a proper language"

    assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list'

    # restore this lang's inventory
    inventory.restore_unit(lang)