class Net(object): def __init__(self, args): self.args = args # set parameters from data section(common) self.mode = self.args.config.get('common', 'mode') # get meta file where character to number conversions are defined self.contexts = parse_contexts(self.args) self.num_gpu = len(self.contexts) self.batch_size = self.args.config.getint('common', 'batch_size') # check the number of gpus is positive divisor of the batch size for data parallel self.is_batchnorm = self.args.config.getboolean('arch', 'is_batchnorm') self.is_bucketing = self.args.config.getboolean('arch', 'is_bucketing') # log current config self.config_logger = ConfigLogger(log) self.config_logger(args.config) save_dir = 'checkpoints' model_name = self.args.config.get('common', 'prefix') max_freq = self.args.config.getint('data', 'max_freq') self.datagen = DataGenerator(save_dir=save_dir, model_name=model_name, max_freq=max_freq) self.datagen.get_meta_from_file( np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std'))) self.buckets = json.loads(self.args.config.get('arch', 'buckets')) default_bucket_key = self.buckets[-1] self.args.config.set('arch', 'max_t_count', str(default_bucket_key)) self.args.config.set('arch', 'max_label_length', str(100)) self.labelUtil = LabelUtil() is_bi_graphemes = self.args.config.getboolean('common', 'is_bi_graphemes') load_labelutil(self.labelUtil, is_bi_graphemes, language="zh") self.args.config.set('arch', 'n_classes', str(self.labelUtil.get_count())) self.max_t_count = self.args.config.getint('arch', 'max_t_count') # self.load_optimizer_states = self.args.config.getboolean('load', 'load_optimizer_states') # load model self.model_loaded, self.model_num_epoch, self.model_path = load_model( self.args) symbol, self.arg_params, self.aux_params = mx.model.load_checkpoint( self.model_path, self.model_num_epoch) # all_layers = symbol.get_internals() # s_sym = all_layers['concat36457_output'] # sm = mx.sym.SoftmaxOutput(data=s_sym, name='softmax') # self.model = STTBucketingModule( # sym_gen=self.model_loaded, # default_bucket_key=default_bucket_key, # context=self.contexts # ) s_mod = mx.mod.BucketingModule(sym_gen=self.model_loaded, context=self.contexts, default_bucket_key=default_bucket_key) from importlib import import_module prepare_data_template = import_module( self.args.config.get('arch', 'arch_file')) self.init_states = prepare_data_template.prepare_data(self.args) self.width = self.args.config.getint('data', 'width') self.height = self.args.config.getint('data', 'height') s_mod.bind(data_shapes=[ ('data', (self.batch_size, default_bucket_key, self.width * self.height)) ] + self.init_states, for_training=False) s_mod.set_params(self.arg_params, self.aux_params, allow_extra=True, allow_missing=True) for bucket in self.buckets: provide_data = [ ('data', (self.batch_size, bucket, self.width * self.height)) ] + self.init_states s_mod.switch_bucket(bucket_key=bucket, data_shapes=provide_data) self.model = s_mod try: from swig_wrapper import Scorer vocab_list = [ chars.encode("utf-8") for chars in self.labelUtil.byList ] log.info("vacab_list len is %d" % len(vocab_list)) _ext_scorer = Scorer(0.26, 0.1, self.args.config.get('common', 'kenlm'), vocab_list) lm_char_based = _ext_scorer.is_character_based() lm_max_order = _ext_scorer.get_max_order() lm_dict_size = _ext_scorer.get_dict_size() log.info("language model: " "is_character_based = %d," % lm_char_based + " max_order = %d," % lm_max_order + " dict_size = %d" % lm_dict_size) self.scorer = _ext_scorer # self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, # scorer=_ext_scorer) except ImportError: import kenlm km = kenlm.Model(self.args.config.get('common', 'kenlm')) # self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, # scorer=km.score) self.scorer = km.score def getTrans(self, wav_file): res = spectrogram_from_file(wav_file, noise_percent=0) buck = bisect.bisect_left(self.buckets, len(res)) bucket_key = self.buckets[buck] res = self.datagen.normalize(res) d = np.zeros((self.batch_size, bucket_key, res.shape[1])) d[0, :res.shape[0], :] = res init_state_arrays = [mx.nd.zeros(x[1]) for x in self.init_states] model_loaded = self.model provide_data = [ ('data', (self.batch_size, bucket_key, self.width * self.height)) ] + self.init_states data_batch = mx.io.DataBatch([mx.nd.array(d)] + init_state_arrays, label=None, bucket_key=bucket_key, provide_data=provide_data, provide_label=None) st = time.time() model_loaded.forward(data_batch, is_train=False) probs = model_loaded.get_outputs()[0].asnumpy() log.info("forward cost %.3f" % (time.time() - st)) st = time.time() res = ctc_greedy_decode(probs, self.labelUtil.byList) log.info("greedy decode cost %.3f, result is:\n%s" % (time.time() - st, res)) beam_size = 5 from stt_metric import ctc_beam_decode st = time.time() results = ctc_beam_decode(scorer=self.scorer, beam_size=beam_size, vocab=self.labelUtil.byList, probs=probs) log.info("beam decode cost %.3f, result is:\n%s" % (time.time() - st, "\n".join(results))) return "greedy:\n" + res + "\nbeam:\n" + "\n".join(results)
def load_data(args, kv=None): mode = args.config.get('common', 'mode') if mode not in ['train', 'predict', 'load']: raise Exception( 'mode must be the one of the followings - train,predict,load') batch_size = args.config.getint('common', 'batch_size') val_batch_size = args.config.getint('common', 'val_batch_size') whcs = WHCS() whcs.width = args.config.getint('data', 'width') whcs.height = args.config.getint('data', 'height') whcs.channel = args.config.getint('data', 'channel') whcs.stride = args.config.getint('data', 'stride') noise_percent = args.config.getfloat('data', 'noise_percent') save_dir = 'checkpoints' # prefix = args.config.get('common', 'prefix') model_name = args.config.get('common', 'prefix') is_bi_graphemes = args.config.getboolean('common', 'is_bi_graphemes') zh_type = args.config.get('data', 'zh_type') max_freq = args.config.getint('data', 'max_freq') overwrite_meta_files = args.config.getboolean('train', 'overwrite_meta_files') overwrite_bi_graphemes_dictionary = args.config.getboolean( 'train', 'overwrite_bi_graphemes_dictionary') max_duration = args.config.getfloat('data', 'max_duration') language = args.config.get('data', 'language') fbank = args.config.getboolean("data", "fbank") log = LogUtil().getlogger() labelUtil = LabelUtil() if mode == "train" or mode == "load": data_json = args.config.get('data', 'train_json') val_json = args.config.get('data', 'val_json') datagen = DataGenerator(save_dir=save_dir, model_name=model_name, max_freq=max_freq) datagen.load_train_data(data_json, max_duration=max_duration) datagen.load_validation_data(val_json, max_duration=max_duration) if is_bi_graphemes and language == "en": if not os.path.isfile( "resources/unicodemap_en_baidu_bi_graphemes.csv" ) or overwrite_bi_graphemes_dictionary: load_labelutil(labelUtil=labelUtil, is_bi_graphemes=False, language=language) generate_bi_graphemes_dictionary(datagen.train_texts + datagen.val_texts) if language == "zh" and zh_type == "phone": if not os.path.isfile("resources/unicodemap_phone.csv" ) or overwrite_bi_graphemes_dictionary: generate_phone_dictionary() elif language == "zh" and zh_type == "zi": if not os.path.isfile("resources/unicodemap_zi.csv" ) or overwrite_bi_graphemes_dictionary: generate_word_dictionary(datagen.train_texts + datagen.val_texts) elif language == "zh" and zh_type == "py": if not os.path.isfile("resources/unicodemap_py.csv" ) or overwrite_bi_graphemes_dictionary: generate_py_dictionary(datagen.train_texts + datagen.val_texts) load_labelutil(labelUtil=labelUtil, is_bi_graphemes=is_bi_graphemes, language=language, zh_type=zh_type) args.config.set('arch', 'n_classes', str(labelUtil.get_count())) if mode == "train": if overwrite_meta_files: log.info("Generate mean and std from samples") normalize_target_k = args.config.getint( 'train', 'normalize_target_k') datagen.sample_normalize(normalize_target_k, True, noise_percent) else: log.info("Read mean and std from meta files") datagen.get_meta_from_file( np.loadtxt( generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt( generate_file_path(save_dir, model_name, 'feats_std'))) elif mode == "load": # get feat_mean and feat_std to normalize dataset datagen.get_meta_from_file( np.loadtxt( generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt( generate_file_path(save_dir, model_name, 'feats_std'))) elif mode == 'predict': test_json = args.config.get('data', 'test_json') datagen = DataGenerator(save_dir=save_dir, model_name=model_name, max_freq=max_freq) datagen.load_train_data(test_json, max_duration=max_duration) labelutil = load_labelutil(labelUtil, is_bi_graphemes, language="zh", zh_type=zh_type) args.config.set('arch', 'n_classes', str(labelUtil.get_count())) datagen.get_meta_from_file( np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std'))) is_batchnorm = args.config.getboolean('arch', 'is_batchnorm') if batch_size == 1 and is_batchnorm and (mode == 'train' or mode == 'load'): raise Warning('batch size 1 is too small for is_batchnorm') # sort file paths by its duration in ascending order to implement sortaGrad if mode == "train" or mode == "load": max_t_count = datagen.get_max_seq_length(partition="train") max_label_length = \ datagen.get_max_label_length(partition="train", is_bi_graphemes=is_bi_graphemes, language=language, zh_type=zh_type) elif mode == "predict": max_t_count = datagen.get_max_seq_length(partition="test") max_label_length = \ datagen.get_max_label_length(partition="test", is_bi_graphemes=is_bi_graphemes, language=language, zh_type=zh_type) args.config.set('arch', 'max_t_count', str(max_t_count)) args.config.set('arch', 'max_label_length', str(max_label_length)) from importlib import import_module prepare_data_template = import_module(args.config.get('arch', 'arch_file')) init_states = prepare_data_template.prepare_data(args) sort_by_duration = (mode == "train") is_bucketing = args.config.getboolean('arch', 'is_bucketing') save_feature_as_csvfile = args.config.getboolean( 'train', 'save_feature_as_csvfile') if is_bucketing: buckets = json.loads(args.config.get('arch', 'buckets')) data_loaded = BucketSTTIter( partition="train", count=datagen.count, datagen=datagen, batch_size=batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=sort_by_duration, is_bi_graphemes=is_bi_graphemes, language=language, zh_type=zh_type, buckets=buckets, save_feature_as_csvfile=save_feature_as_csvfile, num_parts=kv.num_workers, part_index=kv.rank, noise_percent=noise_percent, fbank=fbank) else: data_loaded = STTIter(partition="train", count=datagen.count, datagen=datagen, batch_size=batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=sort_by_duration, is_bi_graphemes=is_bi_graphemes, save_feature_as_csvfile=save_feature_as_csvfile) data_loaded = BucketPrefetchingIter(data_loaded) if mode == 'train' or mode == 'load': if is_bucketing: init_states = prepare_data_template.prepare_data(args, is_val=True) validation_loaded = BucketSTTIter( partition="validation", count=datagen.val_count, datagen=datagen, batch_size=val_batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=False, is_bi_graphemes=is_bi_graphemes, language=language, zh_type=zh_type, buckets=buckets, save_feature_as_csvfile=save_feature_as_csvfile, # num_parts=kv.num_workers, # part_index=kv.rank, noise_percent=0, fbank=fbank) else: validation_loaded = STTIter( partition="validation", count=datagen.val_count, datagen=datagen, batch_size=val_batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=False, is_bi_graphemes=is_bi_graphemes, save_feature_as_csvfile=save_feature_as_csvfile) validation_loaded = BucketPrefetchingIter(validation_loaded) return data_loaded, validation_loaded, args elif mode == 'predict': return data_loaded, args
class Net(object): def __init__(self): if len(sys.argv) <= 1: raise Exception('cfg file path must be provided. ' + 'ex)python main.py --configfile examplecfg.cfg') self.args = parse_args(sys.argv[1]) # set parameters from cfg file # give random seed self.random_seed = self.args.config.getint('common', 'random_seed') self.mx_random_seed = self.args.config.getint('common', 'mx_random_seed') # random seed for shuffling data list if self.random_seed != -1: np.random.seed(self.random_seed) # set mx.random.seed to give seed for parameter initialization if self.mx_random_seed != -1: mx.random.seed(self.mx_random_seed) else: mx.random.seed(hash(datetime.now())) # set log file name self.log_filename = self.args.config.get('common', 'log_filename') self.log = LogUtil(filename=self.log_filename).getlogger() # set parameters from data section(common) self.mode = self.args.config.get('common', 'mode') # get meta file where character to number conversions are defined self.contexts = parse_contexts(self.args) self.num_gpu = len(self.contexts) self.batch_size = self.args.config.getint('common', 'batch_size') # check the number of gpus is positive divisor of the batch size for data parallel self.is_batchnorm = self.args.config.getboolean('arch', 'is_batchnorm') self.is_bucketing = self.args.config.getboolean('arch', 'is_bucketing') # log current config self.config_logger = ConfigLogger(self.log) self.config_logger(self.args.config) default_bucket_key = 1600 self.args.config.set('arch', 'max_t_count', str(default_bucket_key)) self.args.config.set('arch', 'max_label_length', str(100)) self.labelUtil = LabelUtil() is_bi_graphemes = self.args.config.getboolean('common', 'is_bi_graphemes') load_labelutil(self.labelUtil, is_bi_graphemes, language="zh") self.args.config.set('arch', 'n_classes', str(self.labelUtil.get_count())) self.max_t_count = self.args.config.getint('arch', 'max_t_count') # self.load_optimizer_states = self.args.config.getboolean('load', 'load_optimizer_states') # load model self.model_loaded, self.model_num_epoch, self.model_path = load_model( self.args) self.model = STTBucketingModule(sym_gen=self.model_loaded, default_bucket_key=default_bucket_key, context=self.contexts) from importlib import import_module prepare_data_template = import_module( self.args.config.get('arch', 'arch_file')) init_states = prepare_data_template.prepare_data(self.args) width = self.args.config.getint('data', 'width') height = self.args.config.getint('data', 'height') self.model.bind(data_shapes=[ ('data', (self.batch_size, default_bucket_key, width * height)) ] + init_states, label_shapes=[ ('label', (self.batch_size, self.args.config.getint('arch', 'max_label_length'))) ], for_training=True) _, self.arg_params, self.aux_params = mx.model.load_checkpoint( self.model_path, self.model_num_epoch) self.model.set_params(self.arg_params, self.aux_params, allow_extra=True, allow_missing=True) try: from swig_wrapper import Scorer vocab_list = [ chars.encode("utf-8") for chars in self.labelUtil.byList ] self.log.info("vacab_list len is %d" % len(vocab_list)) _ext_scorer = Scorer(0.26, 0.1, self.args.config.get('common', 'kenlm'), vocab_list) lm_char_based = _ext_scorer.is_character_based() lm_max_order = _ext_scorer.get_max_order() lm_dict_size = _ext_scorer.get_dict_size() self.log.info("language model: " "is_character_based = %d," % lm_char_based + " max_order = %d," % lm_max_order + " dict_size = %d" % lm_dict_size) self.scorer = _ext_scorer # self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, # scorer=_ext_scorer) except ImportError: import kenlm km = kenlm.Model(self.args.config.get('common', 'kenlm')) self.scorer = km.score # self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, # scorer=km.score) def getTrans(self, wav_file): self.data_train, self.args = load_data(self.args, wav_file) # self.model.set_params(self.arg_params, self.aux_params) # backward_t358_l1_batchnorm_moving_var model_loaded = self.model max_t_count = self.args.config.getint('arch', 'max_t_count') for nbatch, data_batch in enumerate(self.data_train): st = time.time() model_loaded.forward(data_batch, is_train=False) probs = model_loaded.get_outputs()[0].asnumpy() self.log.info("forward cost %.2f" % (time.time() - st)) beam_size = 5 from stt_metric import ctc_beam_decode st = time.time() results = ctc_beam_decode(scorer=self.scorer, beam_size=beam_size, vocab=self.labelUtil.byList, probs=probs) self.log.info("decode cost %.2f, result is:\n%s" % (time.time() - st, "\n".join(results))) return "\n".join(results)
def load_data(args, wav_file): mode = args.config.get('common', 'mode') if mode not in ['train', 'predict', 'load']: raise Exception( 'mode must be the one of the followings - train,predict,load') batch_size = args.config.getint('common', 'batch_size') whcs = WHCS() whcs.width = args.config.getint('data', 'width') whcs.height = args.config.getint('data', 'height') whcs.channel = args.config.getint('data', 'channel') whcs.stride = args.config.getint('data', 'stride') save_dir = 'checkpoints' model_name = args.config.get('common', 'prefix') is_bi_graphemes = args.config.getboolean('common', 'is_bi_graphemes') overwrite_meta_files = args.config.getboolean('train', 'overwrite_meta_files') overwrite_bi_graphemes_dictionary = args.config.getboolean( 'train', 'overwrite_bi_graphemes_dictionary') max_duration = args.config.getfloat('data', 'max_duration') max_freq = args.config.getint('data', 'max_freq') language = args.config.get('data', 'language') log = LogUtil().getlogger() labelUtil = LabelUtil() # test_json = "resources/d.json" datagen = DataGenerator(save_dir=save_dir, model_name=model_name, max_freq=max_freq) datagen.train_audio_paths = [wav_file] datagen.train_durations = [get_duration_wave(wav_file)] datagen.train_texts = ["1 1"] datagen.count = 1 # datagen.load_train_data(test_json, max_duration=max_duration) labelutil = load_labelutil(labelUtil, is_bi_graphemes, language="zh") args.config.set('arch', 'n_classes', str(labelUtil.get_count())) datagen.get_meta_from_file( np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std'))) is_batchnorm = args.config.getboolean('arch', 'is_batchnorm') if batch_size == 1 and is_batchnorm and (mode == 'train' or mode == 'load'): raise Warning('batch size 1 is too small for is_batchnorm') max_t_count = datagen.get_max_seq_length(partition="test") max_label_length = \ datagen.get_max_label_length(partition="test", is_bi_graphemes=is_bi_graphemes) args.config.set('arch', 'max_t_count', str(max_t_count)) args.config.set('arch', 'max_label_length', str(max_label_length)) from importlib import import_module prepare_data_template = import_module(args.config.get('arch', 'arch_file')) init_states = prepare_data_template.prepare_data(args) sort_by_duration = (mode == "train") is_bucketing = args.config.getboolean('arch', 'is_bucketing') save_feature_as_csvfile = args.config.getboolean( 'train', 'save_feature_as_csvfile') if is_bucketing: buckets = json.loads(args.config.get('arch', 'buckets')) data_loaded = BucketSTTIter( partition="train", count=datagen.count, datagen=datagen, batch_size=batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=sort_by_duration, is_bi_graphemes=is_bi_graphemes, buckets=buckets, save_feature_as_csvfile=save_feature_as_csvfile) else: data_loaded = STTIter(partition="train", count=datagen.count, datagen=datagen, batch_size=batch_size, num_label=max_label_length, init_states=init_states, seq_length=max_t_count, width=whcs.width, height=whcs.height, sort_by_duration=sort_by_duration, is_bi_graphemes=is_bi_graphemes, save_feature_as_csvfile=save_feature_as_csvfile) return data_loaded, args
class Net(object): def __init__(self): if len(sys.argv) <= 1: raise Exception('cfg file path must be provided. ' + 'ex)python main.py --configfile examplecfg.cfg') self.args = parse_args(sys.argv[1]) # set parameters from cfg file # give random seed self.random_seed = self.args.config.getint('common', 'random_seed') self.mx_random_seed = self.args.config.getint('common', 'mx_random_seed') # random seed for shuffling data list if self.random_seed != -1: np.random.seed(self.random_seed) # set mx.random.seed to give seed for parameter initialization if self.mx_random_seed != -1: mx.random.seed(self.mx_random_seed) else: mx.random.seed(hash(datetime.now())) # set log file name self.log_filename = self.args.config.get('common', 'log_filename') self.log = LogUtil(filename=self.log_filename).getlogger() # set parameters from data section(common) self.mode = self.args.config.get('common', 'mode') save_dir = 'checkpoints' model_name = self.args.config.get('common', 'prefix') max_freq = self.args.config.getint('data', 'max_freq') self.datagen = DataGenerator(save_dir=save_dir, model_name=model_name, max_freq=max_freq) self.datagen.get_meta_from_file( np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')), np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std'))) self.buckets = json.loads(self.args.config.get('arch', 'buckets')) # get meta file where character to number conversions are defined self.contexts = parse_contexts(self.args) self.num_gpu = len(self.contexts) self.batch_size = self.args.config.getint('common', 'batch_size') # check the number of gpus is positive divisor of the batch size for data parallel self.is_batchnorm = self.args.config.getboolean('arch', 'is_batchnorm') self.is_bucketing = self.args.config.getboolean('arch', 'is_bucketing') # log current config self.config_logger = ConfigLogger(self.log) self.config_logger(self.args.config) default_bucket_key = 1600 self.args.config.set('arch', 'max_t_count', str(default_bucket_key)) self.args.config.set('arch', 'max_label_length', str(95)) self.labelUtil = LabelUtil() is_bi_graphemes = self.args.config.getboolean('common', 'is_bi_graphemes') load_labelutil(self.labelUtil, is_bi_graphemes, language="zh") self.args.config.set('arch', 'n_classes', str(self.labelUtil.get_count())) self.max_t_count = self.args.config.getint('arch', 'max_t_count') # self.load_optimizer_states = self.args.config.getboolean('load', 'load_optimizer_states') # load model self.model_loaded, self.model_num_epoch, self.model_path = load_model( self.args) # self.model = STTBucketingModule( # sym_gen=self.model_loaded, # default_bucket_key=default_bucket_key, # context=self.contexts # ) from importlib import import_module prepare_data_template = import_module( self.args.config.get('arch', 'arch_file')) init_states = prepare_data_template.prepare_data(self.args) width = self.args.config.getint('data', 'width') height = self.args.config.getint('data', 'height') for bucket in self.buckets: net, init_state_names, ll = self.model_loaded(bucket) net.save('checkpoints/%s-symbol.json' % bucket) input_shapes = dict([('data', (self.batch_size, default_bucket_key, width * height))] + init_states + [('label', (1, 18))]) # self.executor = net.simple_bind(ctx=mx.cpu(), **input_shapes) # self.model.bind(data_shapes=[('data', (self.batch_size, default_bucket_key, width * height))] + init_states, # label_shapes=[ # ('label', (self.batch_size, self.args.config.getint('arch', 'max_label_length')))], # for_training=True) symbol, self.arg_params, self.aux_params = mx.model.load_checkpoint( self.model_path, self.model_num_epoch) all_layers = symbol.get_internals() concat = all_layers['concat36457_output'] sm = mx.sym.SoftmaxOutput(data=concat, name='softmax') self.executor = sm.simple_bind(ctx=mx.cpu(), **input_shapes) # self.model.set_params(self.arg_params, self.aux_params, allow_extra=True, allow_missing=True) for key in self.executor.arg_dict.keys(): if key in self.arg_params: self.arg_params[key].copyto(self.executor.arg_dict[key]) init_state_names.remove('data') init_state_names.sort() self.states_dict = dict( zip(init_state_names, self.executor.outputs[1:])) self.input_arr = mx.nd.zeros( (self.batch_size, default_bucket_key, width * height)) try: from swig_wrapper import Scorer vocab_list = [ chars.encode("utf-8") for chars in self.labelUtil.byList ] self.log.info("vacab_list len is %d" % len(vocab_list)) _ext_scorer = Scorer(0.26, 0.1, self.args.config.get('common', 'kenlm'), vocab_list) lm_char_based = _ext_scorer.is_character_based() lm_max_order = _ext_scorer.get_max_order() lm_dict_size = _ext_scorer.get_dict_size() self.log.info("language model: " "is_character_based = %d," % lm_char_based + " max_order = %d," % lm_max_order + " dict_size = %d" % lm_dict_size) self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, scorer=_ext_scorer) except ImportError: import kenlm km = kenlm.Model(self.args.config.get('common', 'kenlm')) self.eval_metric = EvalSTTMetric(batch_size=self.batch_size, num_gpu=self.num_gpu, is_logging=True, scorer=km.score) def forward(self, input_data, new_seq=False): if new_seq == True: for key in self.states_dict.keys(): self.executor.arg_dict[key][:] = 0. input_data.copyto(self.executor.arg_dict["data"]) self.executor.forward() for key in self.states_dict.keys(): self.states_dict[key].copyto(self.executor.arg_dict[key]) prob = self.executor.outputs[0].asnumpy() return prob def getTrans(self, wav_file): res = spectrogram_from_file(wav_file, noise_percent=0) buck = bisect.bisect_left(self.buckets, len(res)) bucket_key = 1600 res = self.datagen.normalize(res) d = np.zeros((self.batch_size, bucket_key, res.shape[1])) d[0, :res.shape[0], :] = res st = time.time() # model_loaded.forward(data_batch, is_train=False) probs = self.forward(mx.nd.array(d)) from stt_metric import ctc_greedy_decode res = ctc_greedy_decode(probs, self.labelUtil.byList) self.log.info("forward cost %.2f, %s" % (time.time() - st, res)) st = time.time() # model_loaded.update_metric(self.eval_metric, data_batch.label) self.log.info("upate metric cost %.2f" % (time.time() - st)) # print("my res is:") # print(eval_metric.placeholder) return self.eval_metric.placeholder