def transfer_am(train_config): """ initialize the acoustic model with a pretrained model for fine-tuning :param model_path: path to the :return: """ pretrained_model_path = get_model_path(train_config.pretrained_model) am_config = Namespace(**json.load(open(str(pretrained_model_path / 'am_config.json')))) assert am_config.model == 'allosaurus', "This project only support allosaurus model" # load inventory inventory = Inventory(pretrained_model_path) # get unit_mask which maps the full phone inventory to the target phone inventory unit_mask = inventory.get_mask(train_config.lang, approximation=True) # reset the new phone_size am_config.phone_size = len(unit_mask.target_unit) model = AllosaurusTorchModel(am_config) # load the pretrained model and setup the phone_layer with correct weights torch_load(model, str(pretrained_model_path / 'model.pt'), train_config.device_id, unit_mask) # update new model new_model = train_config.new_model # get its path model_path = get_model_path(new_model) # overwrite old am_config new_am_config_json = vars(am_config) json.dump(new_am_config_json, open(str(model_path / 'am_config.json'), 'w'), indent=4) # overwrite old phones write_unit(unit_mask.target_unit, model_path / 'phone.txt') # overwrite old model torch_save(model, model_path / 'model.pt') return model
from allosaurus.lm.inventory import Inventory from allosaurus.model import get_model_path import argparse if __name__ == '__main__': parser = argparse.ArgumentParser('Update language inventory') parser.add_argument('-l', '--lang', type=str, required=True, help='specify which language inventory to update.') parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory') parser.add_argument('-i', '--input', type=str, required=True, help='your new inventory file') args = parser.parse_args() model_path = get_model_path(args.model) inventory = Inventory(model_path) lang = args.lang # verify lang is not ipa as it is an alias to the entire inventory assert args.lang != 'ipa', "ipa is not a proper lang to update. use list_lang to find a proper language" assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list' new_unit_file = Path(args.input) # check existence of the file assert new_unit_file.exists(), args.input+' does not exist' # update this new unit inventory.update_unit(lang, new_unit_file)
if __name__ == '__main__': parser = argparse.ArgumentParser('List language phone inventory') parser.add_argument( '-l', '--lang', type=str, default='ipa', help= 'specify which language inventory to use for recognition. default "ipa" is to use all phone inventory' ) parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory') args = parser.parse_args() model_path = get_model_path(args.model) inventory = Inventory(model_path) print("Available Languages") for lang_id, glotto_id, lang_name in zip(inventory.lang_ids, inventory.glotto_ids, inventory.lang_names): lang_name = lang_name.encode('ascii', 'ignore') print('- ISO639-3: ', lang_id, 'Glotto Code', glotto_id, ' name: ', lang_name)
from pathlib import Path from allosaurus.lm.inventory import Inventory import argparse if __name__ == '__main__': parser = argparse.ArgumentParser('List language phone inventory') parser.add_argument('-l', '--lang', type=str, default='ipa', help='specify which language inventory to use for recognition. default "ipa" is to use all phone inventory') args = parser.parse_args() model_dir = Path(__file__).parent / 'pretrained' models = sorted(model_dir.glob('*')) if len(models) == 0: print("No models are available, you can maually download a model with download command or just run inference to download the latest one automatically") exit(0) inventory = Inventory(models[0]) if args.lang == 'ipa': print(list(inventory.unit.id_to_unit.values())[1:]) else: assert args.lang.lower() in inventory.lang_names, f'language {args.lang} is not supported. Please verify it is in the language list' unit = inventory.get_mask(args.lang.lower()).target_unit print(list(unit.id_to_unit.values())[1:])
from pathlib import Path from allosaurus.lm.inventory import Inventory from allosaurus.model import get_model_path import argparse if __name__ == '__main__': parser = argparse.ArgumentParser('List language phone inventory') parser.add_argument('-l', '--lang', type=str, default='ipa', help='specify which language inventory to use for recognition. default "ipa" is to use all phone inventory') parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory') parser.add_argument('-a', '--approximate', type=bool, default=False, help='the phone inventory can still hardly to cover all phones. You can use turn on this flag to map missing phones to other similar phones to recognize. The similarity is measured with phonological features') args = parser.parse_args() model_path = get_model_path(args.model) inventory = Inventory(model_path) if args.lang == 'ipa': print(' '.join(list(inventory.unit.id_to_unit.values())[1:])) else: lang = args.lang assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list' mask = inventory.get_mask(args.lang.lower(), approximation=args.approximate) unit = mask.target_unit print(' '.join(list(unit.id_to_unit.values()))[1:]) if args.approximate: mask.print_maps()
from pathlib import Path from allosaurus.lm.inventory import Inventory if __name__ == '__main__': model_dir = Path(__file__).parent / 'pretrained' models = sorted(model_dir.glob('*')) if len(models) == 0: print("No models are available, you can maually download a model with download command or just run inference to download the latest one automatically") exit(0) inventory = Inventory(models[0]) print("Available Languages") for lang_id, lang_name in zip(inventory.lang_ids, inventory.lang_names): print('- language id: ',lang_id, ' name: ', lang_name)
'--lang', type=str, required=True, help='specify which language inventory to update.') parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory') parser.add_argument('-o', '--output', type=str, required=True, help='write out your current phone file.') parser.add_argument('-f', '--format', type=str, default='simple', choices=['simple', 'kaldi'], help='select your output format') args = parser.parse_args() model_path = get_model_path(args.model) inventory = Inventory(model_path) lang = args.lang unit = inventory.get_unit(lang) write_unit(unit, args.output, args.format)
from pathlib import Path from allosaurus.lm.inventory import Inventory from allosaurus.model import get_model_path import argparse if __name__ == '__main__': parser = argparse.ArgumentParser('Restore language inventory') parser.add_argument('-l', '--lang', type=str, required=True, help='specify which language inventory to restore.') parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model inventory') args = parser.parse_args() model_path = get_model_path(args.model) inventory = Inventory(model_path) lang = args.lang # verify lang is not ipa as it is an alias to the entire inventory assert args.lang != 'ipa', "ipa is not a proper lang to update. use list_lang to find a proper language" assert lang.lower() in inventory.lang_ids or lang.lower() in inventory.glotto_ids, f'language {args.lang} is not supported. Please verify it is in the language list' # restore this lang's inventory inventory.restore_unit(lang)