def main(): args = get_args() # No need gpu for model export os.environ['CUDA_VISIBLE_DEVICES'] = '-1' with open(args.config, 'r') as fin: configs = yaml.load(fin, Loader=yaml.FullLoader) model = init_model(configs) print(model) load_checkpoint(model, args.checkpoint) # Export jit torch script model if args.output_file: script_model = torch.jit.script(model) script_model.save(args.output_file) print('Export model successfully, see {}'.format(args.output_file)) # Export quantized jit torch script model if args.output_quant_file: quantized_model = torch.quantization.quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8) print(quantized_model) script_quant_model = torch.jit.script(quantized_model) script_quant_model.save(args.output_quant_file) print('Export quantized model successfully, ' 'see {}'.format(args.output_quant_file))
def main(): torch.manual_seed(777) args = get_args() output_dir = args.output_dir os.system("mkdir -p " + output_dir) os.environ['CUDA_VISIBLE_DEVICES'] = '-1' with open(args.config, 'r') as fin: configs = yaml.load(fin, Loader=yaml.FullLoader) model = init_model(configs) load_checkpoint(model, args.checkpoint) model.eval() print(model) arguments = {} arguments['output_dir'] = output_dir arguments['batch'] = 1 arguments['chunk_size'] = args.chunk_size arguments['left_chunks'] = args.num_decoding_left_chunks arguments['reverse_weight'] = args.reverse_weight arguments['output_size'] = configs['encoder_conf']['output_size'] arguments['num_blocks'] = configs['encoder_conf']['num_blocks'] arguments['cnn_module_kernel'] = configs['encoder_conf'][ 'cnn_module_kernel'] arguments['head'] = configs['encoder_conf']['attention_heads'] arguments['feature_size'] = configs['input_dim'] arguments['vocab_size'] = configs['output_dim'] # NOTE(xcsong): if chunk_size == -1, hardcode to 67 arguments['decoding_window'] = (args.chunk_size - 1) * \ model.encoder.embed.subsampling_rate + \ model.encoder.embed.right_context + 1 if args.chunk_size > 0 else 67 arguments['encoder'] = configs['encoder'] arguments['decoder'] = configs['decoder'] arguments['subsampling_rate'] = model.subsampling_rate() arguments['right_context'] = model.right_context() arguments['sos_symbol'] = model.sos_symbol() arguments['eos_symbol'] = model.eos_symbol() arguments['is_bidirectional_decoder'] = 1 \ if model.is_bidirectional_decoder() else 0 # NOTE(xcsong): Please note that -1/-1 means non-streaming model! It is # not a [16/4 16/-1 16/0] all-in-one model and it should not be used in # streaming mode (i.e., setting chunk_size=16 in `decoder_main`). If you # want to use 16/-1 or any other streaming mode in `decoder_main`, # please export onnx in the same config. if arguments['left_chunks'] > 0: assert arguments['chunk_size'] > 0 # -1/4 not supported export_encoder(model, arguments) export_ctc(model, arguments) export_decoder(model, arguments)
def init_model(self): args = self.args os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring' ] and args.batch_size > 1: logging.fatal( 'decoding mode {} must be running with batch_size == 1'.format( args.mode)) sys.exit(1) with open(args.config, 'r') as fin: configs = yaml.load(fin) raw_wav = configs['raw_wav'] # Init dataset and data loader test_collate_conf = copy.deepcopy(configs['collate_conf']) test_collate_conf['spec_aug'] = False test_collate_conf['spec_sub'] = False test_collate_conf['feature_dither'] = False test_collate_conf['speed_perturb'] = False if raw_wav: test_collate_conf['wav_distortion_conf']['wav_distortion_rate'] = 0 feature_extraction_conf = test_collate_conf['feature_extraction_conf'] # Init asr model from configs model = init_asr_model(configs) # Load dict char_dict = {} with open(args.dict, 'r') as fin: for line in fin: arr = line.strip().split() assert len(arr) == 2 char_dict[int(arr[1])] = arr[0] eos = len(char_dict) - 1 load_checkpoint(model, args.checkpoint) use_cuda = args.gpu >= 0 and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) model.eval() self.args = args self.feature_extraction_conf = feature_extraction_conf self.device = device self.model = model self.char_dict = char_dict self.eos = eos
batch_size=1, num_workers=0) # Init asr model from configs model = init_asr_model(configs) # Load dict char_dict = {} with open(args.dict, 'r') as fin: for line in fin: arr = line.strip().split() assert len(arr) == 2 char_dict[int(arr[1])] = arr[0] eos = len(char_dict) - 1 load_checkpoint(model, args.checkpoint) use_cuda = args.gpu >= 0 and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) model.eval() with torch.no_grad(), open(args.result_file, 'w') as fout: for batch_idx, batch in enumerate(test_data_loader): keys, feats, target, feats_lengths, target_lengths = batch feats = feats.to(device) target = target.to(device) feats_lengths = feats_lengths.to(device) target_lengths = target_lengths.to(device) if args.mode == 'attention': hyps = model.recognize( feats,
# Init asr model from configs model = init_asr_model(configs) print(model) num_params = sum(p.numel() for p in model.parameters()) print('the number of model params: {}'.format(num_params)) # !!!IMPORTANT!!! # Try to export the model by script, if fails, we should refine # the code to satisfy the script export requirements if args.rank == 0: script_model = torch.jit.script(model) script_model.save(os.path.join(args.model_dir, 'init.zip')) executor = Executor() # If specify checkpoint, load some info from checkpoint if args.checkpoint is not None: infos = load_checkpoint(model, args.checkpoint) else: infos = {} start_epoch = infos.get('epoch', -1) + 1 cv_loss = infos.get('cv_loss', 0.0) step = infos.get('step', -1) num_epochs = configs.get('max_epoch', 100) model_dir = args.model_dir writer = None if args.rank == 0: os.makedirs(model_dir, exist_ok=True) exp_id = os.path.basename(model_dir) writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id)) if distributed:
def main(): args = get_args() logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) # Set random seed torch.manual_seed(777) print(args) with open(args.config, 'r') as fin: configs = yaml.load(fin, Loader=yaml.FullLoader) if len(args.override_config) > 0: configs = override_config(configs, args.override_config) distributed = args.world_size > 1 if distributed: logging.info('training on multiple gpus, this gpu {}'.format(args.gpu)) dist.init_process_group(args.dist_backend, init_method=args.init_method, world_size=args.world_size, rank=args.rank) symbol_table = read_symbol_table(args.symbol_table) train_conf = configs['dataset_conf'] cv_conf = copy.deepcopy(train_conf) cv_conf['speed_perturb'] = False cv_conf['spec_aug'] = False cv_conf['shuffle'] = False cv_conf['apply_alaw_codec'] = False cv_conf['add_noise'] = False cv_conf['add_babble'] = False cv_conf['add_reverb'] = False cv_conf['apply_codec'] = False cv_conf['volume_perturb'] = False cv_conf['pitch_shift'] = False non_lang_syms = read_non_lang_symbols(args.non_lang_syms) train_dataset = Dataset(args.data_type, args.train_data, symbol_table, train_conf, args.bpe_model, non_lang_syms, True) cv_dataset = Dataset(args.data_type, args.cv_data, symbol_table, cv_conf, args.bpe_model, non_lang_syms, partition=False) train_data_loader = DataLoader(train_dataset, batch_size=None, pin_memory=args.pin_memory, num_workers=args.num_workers, prefetch_factor=args.prefetch) cv_data_loader = DataLoader(cv_dataset, batch_size=None, pin_memory=args.pin_memory, num_workers=args.num_workers, prefetch_factor=args.prefetch) if 'fbank_conf' in configs['dataset_conf']: input_dim = configs['dataset_conf']['fbank_conf']['num_mel_bins'] else: input_dim = configs['dataset_conf']['mfcc_conf']['num_mel_bins'] vocab_size = len(symbol_table) # Save configs to model_dir/train.yaml for inference and export configs['input_dim'] = input_dim configs['output_dim'] = vocab_size configs['cmvn_file'] = args.cmvn configs['is_json_cmvn'] = True if args.rank == 0: saved_config_path = os.path.join(args.model_dir, 'train.yaml') with open(saved_config_path, 'w') as fout: data = yaml.dump(configs) fout.write(data) # Init asr model from configs model = init_asr_model(configs) if args.rank == 0: print(model) num_params = sum(p.numel() for p in model.parameters()) print('the number of model params: {}'.format(num_params)) # !!!IMPORTANT!!! # Try to export the model by script, if fails, we should refine # the code to satisfy the script export requirements if args.rank == 0: script_model = torch.jit.script(model) script_model.save(os.path.join(args.model_dir, 'init.zip')) executor = Executor() # If specify checkpoint, load some info from checkpoint if args.checkpoint is not None: infos = load_checkpoint(model, args.checkpoint) elif args.enc_init is not None: logging.info('load pretrained encoders: {}'.format(args.enc_init)) infos = load_trained_modules(model, args) else: infos = {} start_epoch = infos.get('epoch', -1) + 1 cv_loss = infos.get('cv_loss', 0.0) step = infos.get('step', -1) num_epochs = configs.get('max_epoch', 100) model_dir = args.model_dir writer = None if args.rank == 0: os.makedirs(model_dir, exist_ok=True) exp_id = os.path.basename(model_dir) writer = SummaryWriter(os.path.join(args.tensorboard_dir, exp_id)) if distributed: assert (torch.cuda.is_available()) # cuda model is required for nn.parallel.DistributedDataParallel model.cuda() model = torch.nn.parallel.DistributedDataParallel( model, find_unused_parameters=True) device = torch.device("cuda") if args.fp16_grad_sync: from torch.distributed.algorithms.ddp_comm_hooks import ( default as comm_hooks, ) model.register_comm_hook( state=None, hook=comm_hooks.fp16_compress_hook ) else: use_cuda = args.gpu >= 0 and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) if configs['optim'] == 'adam': print('optimizer is adam') optimizer = optim.Adam(model.parameters(), **configs['optim_conf']) elif configs['optim'] == 'sgd': print('optimizer is sgd') optimizer = optim.SGD(model.parameters(), **configs['optim_conf']) scheduler = WarmupLR(optimizer, **configs['scheduler_conf']) final_epoch = None configs['rank'] = args.rank configs['is_distributed'] = distributed configs['use_amp'] = args.use_amp if start_epoch == 0 and args.rank == 0: save_model_path = os.path.join(model_dir, 'init.pt') save_checkpoint(model, save_model_path) # Start training loop executor.step = step scheduler.set_step(step) # used for pytorch amp mixed precision training scaler = None if args.use_amp: scaler = torch.cuda.amp.GradScaler() for epoch in range(start_epoch, num_epochs): train_dataset.set_epoch(epoch) configs['epoch'] = epoch lr = optimizer.param_groups[0]['lr'] logging.info('Epoch {} TRAIN info lr {}'.format(epoch, lr)) executor.train(model, optimizer, scheduler, train_data_loader, device, writer, configs, scaler) total_loss, total_loss_att, total_loss_ctc, num_seen_utts = executor.cv( model, cv_data_loader, device, configs) cv_loss = total_loss / num_seen_utts cv_loss_att = total_loss_att / num_seen_utts cv_loss_ctc = total_loss_ctc / num_seen_utts logging.info('Epoch {} CV info cv_loss {}'.format(epoch, cv_loss)) if args.rank == 0: save_model_path = os.path.join(model_dir, '{}.pt'.format(epoch)) save_checkpoint( model, save_model_path, { 'epoch': epoch, 'lr': lr, 'cv_loss': cv_loss, 'cv_loss_att': cv_loss_att, 'cv_loss_ctc': cv_loss_ctc, 'step': executor.step }) writer.add_scalar('epoch/cv_loss', cv_loss, epoch) writer.add_scalar('epoch/cv_loss_att', cv_loss, epoch) writer.add_scalar('epoch/cv_loss_ctc', cv_loss, epoch) writer.add_scalar('epoch/lr', lr, epoch) final_epoch = epoch if final_epoch is not None and args.rank == 0: final_model_path = os.path.join(model_dir, 'final.pt') os.symlink('{}.pt'.format(final_epoch), final_model_path) writer.close()
def main(): args = get_args() logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) if args.mode in ['ctc_prefix_beam_search', 'attention_rescoring' ] and args.batch_size > 1: logging.fatal( 'decoding mode {} must be running with batch_size == 1'.format( args.mode)) sys.exit(1) with open(args.config, 'r') as fin: configs = yaml.load(fin, Loader=yaml.FullLoader) if len(args.override_config) > 0: configs = override_config(configs, args.override_config) symbol_table = read_symbol_table(args.dict) test_conf = copy.deepcopy(configs['dataset_conf']) test_conf['filter_conf']['max_length'] = 102400 test_conf['filter_conf']['min_length'] = 0 test_conf['filter_conf']['token_max_length'] = 102400 test_conf['filter_conf']['token_min_length'] = 0 test_conf['filter_conf']['max_output_input_ratio'] = 102400 test_conf['filter_conf']['min_output_input_ratio'] = 0 test_conf['speed_perturb'] = False test_conf['spec_aug'] = False test_conf['shuffle'] = False test_conf['sort'] = False if 'fbank_conf' in test_conf: test_conf['fbank_conf']['dither'] = 0.0 elif 'mfcc_conf' in test_conf: test_conf['mfcc_conf']['dither'] = 0.0 test_conf['batch_conf']['batch_type'] = "static" test_conf['batch_conf']['batch_size'] = args.batch_size non_lang_syms = read_non_lang_symbols(args.non_lang_syms) test_dataset = Dataset(args.data_type, args.test_data, symbol_table, test_conf, args.bpe_model, non_lang_syms, partition=False) test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0) # Init asr model from configs model = init_asr_model(configs) # Load dict char_dict = {v: k for k, v in symbol_table.items()} eos = len(char_dict) - 1 load_checkpoint(model, args.checkpoint) use_cuda = args.gpu >= 0 and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) model.eval() with torch.no_grad(), open(args.result_file, 'w') as fout: for batch_idx, batch in enumerate(test_data_loader): keys, feats, target, feats_lengths, target_lengths = batch feats = feats.to(device) target = target.to(device) feats_lengths = feats_lengths.to(device) target_lengths = target_lengths.to(device) if args.mode == 'attention': hyps, _ = model.recognize( feats, feats_lengths, beam_size=args.beam_size, decoding_chunk_size=args.decoding_chunk_size, num_decoding_left_chunks=args.num_decoding_left_chunks, simulate_streaming=args.simulate_streaming) hyps = [hyp.tolist() for hyp in hyps] elif args.mode == 'ctc_greedy_search': hyps, _ = model.ctc_greedy_search( feats, feats_lengths, decoding_chunk_size=args.decoding_chunk_size, num_decoding_left_chunks=args.num_decoding_left_chunks, simulate_streaming=args.simulate_streaming) # ctc_prefix_beam_search and attention_rescoring only return one # result in List[int], change it to List[List[int]] for compatible # with other batch decoding mode elif args.mode == 'ctc_prefix_beam_search': assert (feats.size(0) == 1) hyp, _ = model.ctc_prefix_beam_search( feats, feats_lengths, args.beam_size, decoding_chunk_size=args.decoding_chunk_size, num_decoding_left_chunks=args.num_decoding_left_chunks, simulate_streaming=args.simulate_streaming) hyps = [hyp] elif args.mode == 'attention_rescoring': assert (feats.size(0) == 1) hyp, _ = model.attention_rescoring( feats, feats_lengths, args.beam_size, decoding_chunk_size=args.decoding_chunk_size, num_decoding_left_chunks=args.num_decoding_left_chunks, ctc_weight=args.ctc_weight, simulate_streaming=args.simulate_streaming, reverse_weight=args.reverse_weight) hyps = [hyp] for i, key in enumerate(keys): content = '' for w in hyps[i]: if w == eos: break content += char_dict[w] logging.info('{} {}'.format(key, content)) fout.write('{} {}\n'.format(key, content))