def get_next_split_data (self): ''' output: feat_list: list of np matrix [num_frames, feat_dim] asr_label_list: list of int32 np array [num_frames] sid_label_list: list of int32 ''' p1 = Popen (['splice-feats', '--print-args=false', '--left-context='+str(self.splice), '--right-context='+str(self.splice), 'scp:'+self.tmp_dir+'/split.'+self.name+'.'+str(self.split_data_counter)+'.scp', 'ark:-'], stdout=PIPE, stderr=DEVNULL) p2 = Popen (['apply-cmvn', '--print-args=false', '--norm-vars=true', self.exp+'/cmvn.mat', 'ark:-', 'ark:-'], stdin=p1.stdout, stdout=PIPE, stderr=DEVNULL) feat_list = [] asr_label_list = [] sid_label_list = [] while True: uid, feat = kaldi_IO.read_utterance (p2.stdout) if uid == None: break; if uid in self.asr_labels and uid in self.sid_labels: feat_list.append (feat) asr_label_list.append (self.asr_labels[uid]) sid_label_list.append (self.sid_labels[uid]) p2.stdout.close() if len(feat_list) == 0 or len(asr_label_list) == 0: raise RuntimeError("No feats are loaded! please check feature and labels, and make sure they are matched.") return (feat_list, asr_label_list, sid_label_list)
def get_next_split_data(self): ''' output: feat_list: list of np matrix [num_frames, feat_dim] label_list: list of int32 np array [num_frames] ''' p1 = Popen([ 'splice-feats', '--print-args=false', '--left-context=' + str(self.splice), '--right-context=' + str(self.splice), 'scp:' + self.temp_dir + '/split.' + self.name + '.' + str(self.split_data_counter) + '.scp', 'ark:-' ], stdout=PIPE, stderr=DEVNULL) p2 = Popen([ 'apply-cmvn', '--print-args=false', '--norm-vars=true', self.exp + '/cmvn.mat', 'ark:-', 'ark:-' ], stdin=p1.stdout, stdout=PIPE, stderr=DEVNULL) feat_list = [] label_list = [] while True: uid, feat = kaldi_IO.read_utterance(p2.stdout) if uid == None: # no more utterance, return return (feat_list, label_list) if uid in self.labels: feat_list.append(feat) label_list.append(self.labels[uid]) # read done p1.stdout.close()
def get_next_split_data(self): ''' output: feat_list: list of np matrix [num_frames, feat_dim] label_list: list of int32 np array [num_frames] ''' cmd = [ 'copy-feats', 'scp:' + self.tmp_dir + '/split.' + self.name + '.' + \ str(self.split_data_counter) + '.scp', 'ark:- |' ] cmd.extend([ 'splice-feats', '--left-context=' + str(self.splice), '--right-context=' + str(self.splice), 'ark:-', 'ark:-|' ]) cmd.extend([ 'apply-cmvn', '--norm-vars=true', self.exp + '/cmvn.mat', 'ark:-', 'ark:-' ]) p1 = Popen(' '.join(cmd), shell=True, stdout=PIPE, stderr=DEVNULL) feat_list = [] label_list = [] while True: uid, feat = kaldi_IO.read_utterance(p1.stdout) if uid == None: break if uid in self.labels: feat_list.append(feat) label_list.append(self.labels[uid]) p1.stdout.close() if len(feat_list) == 0 or len(label_list) == 0: raise RuntimeError("No feats are loaded! please check feature and labels," + \ "and make sure they are matched.") return (feat_list, label_list)
logger.info("loading the model %s", args.model_file) model_name=open(args.model_file, 'r').read() nnet.read(model_name, num_multi = num_multi) prior_counts = np.genfromtxt (args.prior_counts_file) priors = prior_counts / prior_counts.sum() log_priors = np.log(priors) ark_in = sys.stdin.buffer #ark_in = open('stdin','r') ark_out = sys.stdout.buffer encoding = sys.stdout.encoding signal (SIGPIPE, SIG_DFL) p1 = Popen(['splice-feats', '--print-args=false', '--left-context='+str(splice), '--right-context='+str(splice), 'ark:-', 'ark:-'], stdin=ark_in, stdout=PIPE, stderr=DEVNULL) p2 = Popen (['apply-cmvn', '--print-args=false', '--norm-vars=true', srcdir+'/cmvn.mat', 'ark:-', 'ark:-'], stdin=p1.stdout, stdout=PIPE, stderr=DEVNULL) while True: uid, feats = kaldi_IO.read_utterance(p2.stdout) if uid == None: # we are done break log_post = nnet.predict (feats, take_log = False) log_likes = log_post - log_priors kaldi_IO.write_utterance(uid, log_likes, ark_out, encoding) p1.stdout.close