def on_epoch_end(self, epoch, logs={}): monitor_values = logs[self.key_monitor] if len(monitor_values) >= 2: io.log('{}: {} --> {}'.format(self.key_monitor, self.best_val, monitor_values[-1])) if len(monitor_values) < 2 or self.monitor_op(monitor_values[-1], self.best_val): io.log('saving weights') self.model.save_weights(self.weights_path, overwrite=True) self.best_val = monitor_values[-1] io.log('best {}: {}'.format(self.key_monitor, self.best_val)) self.wait = 0 else: if self.wait >= self.patience or self.curr_lr < self.min_lr: self.model.stop_training = True self.wait += 1 if self.restore > 0: io.log('loading weights from last epoch') self.model.load_weights(self.weights_path) if self.k > 1.: self.curr_lr /= self.k K.set_value(self.model.optimizer.lr, self.curr_lr) io.log('lr: {} --> {}'.format(self.curr_lr * self.k, self.curr_lr))
def validate_rnn(model, buf_valid, metrics=['acc'], stateful=False, report_interval=20): """ Evaluate against a dataset. Return a list of values, one for each metric. `metrics` must be a list. """ metrics = [init_metric(m) for m in metrics] chunks_read = 0 while True: # Load data chunk X = buf_valid.read_next_chunk() if X is None: break report = chunks_read % report_interval == 0 chunks_read += 1 # Validate valid_Xs, valid_ys, valid_eobs, utt_indices = X if report: io.log('Validating on chunk {} ({} utts, max dur {})'.format( buf_valid.get_progress(), len(utt_indices), valid_Xs[0].shape[1] )) for valid_X, valid_y, valid_eob in zip(valid_Xs, valid_ys, valid_eobs): preds = model.predict( valid_X, batch_size=len(utt_indices), verbose=0 ) for i in range(len(valid_eob)): y_true = valid_y[i][buf_valid.get_delay():valid_eob[i]] y_pred = preds[i][buf_valid.get_delay():valid_eob[i]] for m in metrics: m.accum(y_true, y_pred) if stateful: model.reset_states() return [m.eval() for m in metrics]
def pre_execute(self): # Hack to fix recursion depth exceeded sys.setrecursionlimit(1000000) io.log('args: {}'.format(vars(self.args))) self._init_output_dir() self._init_records() self._init_best_err()
def __get_word_labels(text, utt): """ Return word-level error labels for text. Each error label is an array of strings, containing: <wrd_start> <wrd_end> <label1> <label1> ... """ if len(text) == 0 or text[0] is None: return None wlabels_raw = [] widx = -1 for idx, w in zip(range(len(text)), text): if __is_word(w): widx += 1 wlabel = [widx] err_indices = __find_errs(text, idx) if len(err_indices) == 0: wlabel.append('C/{}'.format(w)) else: io.log('**WARN** {} - word with multiple errors: {}'.format( utt, text[idx:err_indices[-1] + 1])) # Word target is shared across different error types target = __parse_target(text, idx, err_indices[-1] + 1, utt) for err_idx in err_indices: err_code = __parse_err_code(text, err_idx, utt) wlabel.append('{}/{}'.format(err_code, target)) wlabels_raw.append(wlabel) return __finalize_wlabels(wlabels_raw, text, utt)
def _init_base_err(self): """ Compute validation results using base model. """ args = self.args if 'base' in self.records: valid_errs = self.records['base'][1:] self.base_err = valid_errs[args.validate_on] return metrics = ['acc'] if args.task != 'classification': metrics = ['mean_squared_error'] io.log('Reporting base model results, metrics = {}'.format(metrics)) # Convert to multi-task to conform with data provider mt_model = Model(input=self.base_model.input, output=[self.base_model.output] * 2) train_err = -1 # Don't compute error on training set, too expensive valid_errs = [] for buf_valid in self.buf_valids: valid_errs.append( learning_mtrnn.validate_mtrnn(mt_model, buf_valid, metrics=metrics)[0][0]) io.log('--> base: train err {}, valid err {}'.format( train_err, valid_errs[args.validate_on])) # Update records and clean up self._update_records('base', [train_err] + valid_errs) self.base_err = valid_errs[args.validate_on] del mt_model
def eval_dnn(dnn, buf_dataset, shared_ds=None): """ Return the mini-batch error rate on the given dataset. :type dnn: models.dnn.DNN :param dnn: The DNN to use for evaluation :type buf_dataset: chaipy.data.temporal.BufferedTemporalData :param buf_dataset: The dataset to evaluate on :type shared_ds: tuple (see BufferedTemporalData.make_shared) :param shared_ds: (optional) The shared dataset to use. If not set, will be automatically created from buf_dataset. :rtype: float :return: The mean mini-batch error rate (percentage) """ if shared_ds is None: shared_ds = buf_dataset.make_shared() x, shared_x, y, shared_y = shared_ds[:4] # Compile validation function io.log('... getting the validation function') valid_fn = dnn.build_validation_function((shared_x, shared_y), batch_size=dnn.cfg.batch_size) io.log('Got it!') # Get error errors = _validate(valid_fn, buf_dataset, dnn.cfg.batch_size, shared_ds) return 100 * numpy.mean(errors)
def __proc_compound_words(text, utt): """ Compound words are linked by '+', '-', or '_'. We need to break them apart. The tricky thing is to preserve special form markers after breaking. """ if len(text) == 0 or text[0] is None: return text new_text = [] for idx, w in zip(range(len(text)), text): if __is_word(w) and not w.startswith('<') and \ any([c in w for c in ['+', '_', '-']]): if any([c in w for c in ['@o', '@s', '@b', '@si', '@i']]): io.log('**WARN** {} - not breaking {}'.format(utt, w)) new_text.append(w) else: err_indices = __find_errs(text, idx) if len(err_indices) != 0: io.log('**WARN** {} - compound with errs: {}'.format( utt, text[idx:err_indices[-1] + 1])) ary = w.split('@') new_words = ary[0].replace('+', ' ') new_words = new_words.replace('_', ' ') new_words = new_words.replace('-', ' ') nw_ary = new_words.split() for i, nw in zip(range(len(nw_ary)), nw_ary): new_text.append('@'.join([nw] + ary[1:])) if len(err_indices) != 0 and i < len(nw_ary) - 1: new_text.append('[* ->]') else: new_text.append(w) return new_text
def _train(self, model, lrate, optimizer, model_name, model_dir, resume_json, **kwargs): """ Train model. """ args = self.args loss = [] validate_metrics = {} for i in range(len(args.task)): if args.task[i] == 'classification': loss.append('sparse_categorical_crossentropy') validate_metrics[i] = ['acc'] else: loss.append('mean_squared_error') validate_metrics[i] = ['mean_squared_error'] metrics = ['acc'] if args.task[args.primary_task] != 'classification': metrics = ['mean_squared_error'] io.log('Loss: {}, Metrics: {}, Primary Task: {}, Task Weights: {}'.format( loss, validate_metrics, args.primary_task, args.task_weights )) train_errs, valid_errs = learning_mtrnn.train_mtrnn_v2( model, self.buf_train, self.buf_valids, lrate, validate_on=args.validate_on, optimizer=optimizer, save_dir=model_dir, pre_validate=resume_json is not None, restore=True, loss=loss, metrics=metrics, validate_metrics=validate_metrics, ntasks=len(args.task), primary_task=args.primary_task, task_weights=args.task_weights ) io.log('--> {}: train err {}, valid err {}'.format( model_name, train_errs[-1], valid_errs[args.validate_on][-1] )) return (train_errs, valid_errs)
def _train(self, model, lrate, optimizer, model_name, model_dir, resume_json, **kwargs): """ Train model. """ args = self.args loss = 'sparse_categorical_crossentropy' metrics = ['acc'] if args.task != 'classification': loss = 'mean_squared_error' metrics = ['mean_squared_error'] io.log('Loss: {}, Metrics: {}'.format(loss, metrics)) train_errs, valid_errs = learning_rnn.train_rnn_v2( model, self.buf_train, self.buf_valids, lrate, validate_on=args.validate_on, optimizer=optimizer, save_dir=model_dir, pre_validate=resume_json is not None, restore=True, loss=loss, metrics=metrics) io.log('--> {}: train err {}, valid err {}'.format( model_name, train_errs[-1], valid_errs[args.validate_on][-1])) return (train_errs, valid_errs)
def _init_ivectors(self): """ Add i-vectors if applicable. """ args = self.args self.ivectors = None if args.ivectors is not None: io.log('Loading i-vectors from {}'.format(args.ivectors)) self.ivectors = ivector_ark_read(args.ivectors)
def _init_base_model(self): """ Initialize base model. """ args = self.args io.log('Initializing base model, json: {}, weights: {}'.format( args.base_model_json, args.base_model_weights)) self.base_model = model_from_json(io.json_load(args.base_model_json)) self.base_model.load_weights(args.base_model_weights)
def train_gmms(group2array, n_components=1, cov_type='diag', ordered=True): """ Train a GMM for each group. Return group to GMM mapping. """ gmms = OrderedDict() if ordered else {} for group in group2array: log('Fitting GMM for {}'.format(group)) gmms[group] = GMM(n_components=n_components, covariance_type=cov_type) gmms[group].fit(group2array[group]) return gmms
def _save_model(self, model, model_name, results): """ Save model to disk if applicable. """ args = self.args valid_errs = results[1:] if args.save_all or valid_errs[args.validate_on] < self.best_err: model_fname = os.path.join(args.output_dir, model_name) io.log('Saving final model to {}'.format(model_fname)) io.json_save('{}.json'.format(model_fname), model.to_json()) model.save_weights('{}.weights'.format(model_fname))
def _ext_model(self, model_name): """ Check if model configuration has already been computed. """ if model_name not in self.records: return False train_err = self.records[model_name][0] valid_errs = self.records[model_name][1:] io.log('--> {}: train err {}, valid err {}'.format( model_name, train_err, valid_errs[self.args.validate_on])) return True
def main(): desc = 'Extract features with DNN. Output to Kaldi ark.' parser = common.init_argparse(desc) parser.add_argument('model_in', help='Model that can be read by load_dnn') parser.add_argument('feats_scp', help='scp of input features') parser.add_argument('ark_out', help='Output ark file') parser.add_argument('--output-layer', type=int, default=-2, help='Layer to use for extracting features. ' + \ 'Negative index can be used. For example, ' + \ '-1 means the last layer, and so on.') parser.add_argument('--context', type=int, default=8, help='Number of context frames for splicing') parser.add_argument('--padding', default='replicate', help='What to do with out-of-bound frames. Valid ' + \ 'values: [replicate|zero]') parser.add_argument('--ivectors', help='Utterance i-vectors to append') parser.add_argument('--chunk-size', default='300m', help='Chunk size for data buffering') args = parser.parse_args() io.log('Initializing dataset') ivectors = None if args.ivectors is None else \ io.ivector_ark_read(args.ivectors, dtype=theano.config.floatX) dataset = init_dataset(args.feats_scp, args.context, args.padding, ivectors) io.log('Initializing model') dnn = load_dnn(args.model_in) # Initializing shared_ds according to chunk_size num_items = get_num_items(args.chunk_size, theano.config.floatX) max_frames = num_items / dataset.get_dim() max_utt_frames = np.max( map(dataset.get_num_frames_by_utt_name, dataset.get_utt_names())) common.CHK_GE(max_frames, max_utt_frames) x = np.zeros((max_frames, dataset.get_dim()), dtype=theano.config.floatX) io.log('...getting extraction function') extract_fn = dnn.build_extract_feat_function(args.output_layer) io.log('Got it!') io.log('** Begin outputting to {} **'.format(args.ark_out)) ark_out = KaldiWriteOut(args.ark_out) utt_names, utt_frames, total_frames = [], [], 0 for utt in dataset.get_utt_names(): frames = dataset.get_num_frames_by_utt_name(utt) if total_frames + frames > max_frames: __extract(extract_fn, ark_out, dataset, x, utt_names, utt_frames) utt_names, utt_frames, total_frames = [], [], 0 utt_names.append(utt) utt_frames.append(frames) total_frames += frames __extract(extract_fn, ark_out, dataset, x, utt_names, utt_frames) ark_out.close()
def validate_mtrnn(model, buf_valid, ntasks=None, metrics=['acc'], stateful=False, report_interval=20): """ Evaluate against a dataset. Return a list of lists, one for each task. Each task-specific list is a list of values, one for each metric. `metrics` can be a list (same metrics applied to all tasks) or a dict that maps from task ID to a list of task-specific metrics. """ if ntasks is None: ntasks = len(buf_valid.dataset().get_frame_labels()) mt_metrics = [] for t in range(ntasks): st_metrics = metrics if type(metrics) != list: assert type(metrics) == dict st_metrics = metrics[t] mt_metrics.append([init_metric(m) for m in st_metrics]) chunks_read = 0 while True: # Load data chunk X = buf_valid.read_next_chunk() if X is None: break report = chunks_read % report_interval == 0 chunks_read += 1 # Validate valid_Xs, valid_ys, valid_eobs, utt_indices = X if report: io.log('Validating on chunk {} ({} utts, max dur {})'.format( buf_valid.get_progress(), len(utt_indices), valid_Xs[0].shape[1])) for valid_X, valid_y, valid_eob in zip(valid_Xs, valid_ys, valid_eobs): preds = model.predict(valid_X, batch_size=len(utt_indices), verbose=0) # Special handling for single-task if ntasks == 1: preds = [preds] for t in range(ntasks): for i in range(len(valid_eob)): y_true = valid_y[t][i][buf_valid.get_delay():valid_eob[i]] y_pred = preds[t][i][buf_valid.get_delay():valid_eob[i]] for m in mt_metrics[t]: m.accum(y_true, y_pred) if stateful: model.reset_states() valid_metrics = [] for t in range(ntasks): valid_metrics.append([m.eval() for m in mt_metrics[t]]) return valid_metrics
def __separate_blocks(text, utt): """ For blocks of text like <he is playing>, the angular brackets need to be separated so we can easily retrieve the whole block if needed. """ if len(text) == 0 or text[0] is None: return text new_text = [] for w in text: # Check for 1-letter word (stand-alone brackets are allowed) if len(w) == 1: new_text.append(w) # If start of block, separate the opening bracket elif w.startswith('<') and not w.endswith('>'): new_text.append(w[0]) new_text.append(w[1:]) # If end of block, separate the closing bracket elif w.endswith('>') and not w.startswith('<'): new_text.append(w[:len(w) - 1]) new_text.append(w[len(w) - 1]) elif w == "(?)": new_text.append('&=unk') # If one whole block, separate both brackets elif w.startswith('<') and w.endswith('>'): common.CHK_NEQ(w, '<>') io.log('**WARN** {} - rare stand-alone block: {}'.format(utt, w)) new_text.append(w[0]) new_text.append(w[1:len(w) - 1]) new_text.append(w[len(w) - 1]) else: new_text.append(w) # Make sure there are matching brackets. Unlike in explanations, in word # blocks opening brackets don't have to match immediately. opening = common.find_all(new_text, '<') closing = common.find_all(new_text, '>') if len(opening) != len(closing): with open('/z/mkperez/HD_Project/Aphasia_Bank_data/overlap_log.txt', 'a') as wf: wf.write("prob: %s\n" % (utt)) wf.write("prob text: %s\n" % (text)) # raise ValueError('{} - has {} < but {} >: {}'.format( # utt, len(opening), len(closing), new_text # )) elif len(opening) != 0: used = [] for c in closing: matched = False for o in [x for x in reversed(opening) if x < c and x not in used]: matched = True used.append(o) break if not matched: raise ValueError('{} - unmatched >: {}'.format( utt, new_text[:c + 1])) return new_text
def _init_best_err(self): """ Determine the best model thus far. """ self.best_model, self.best_err = None, FLT_MAX for model_name in self.records: valid_errs = self.records[model_name][1:] if valid_errs[self.args.validate_on] < self.best_err: self.best_model = model_name self.best_err = valid_errs[self.args.validate_on] io.log('Best existing model: {}, Best existing err: {}'.format( self.best_model, self.best_err))
def run(ivectors, train_group2ivecs, test_ivecs, cov_type): all_data = OrderedDict() for group in train_group2ivecs: train_data = np.asarray(map(lambda x: ivectors[x], train_group2ivecs[group])) all_data[group] = train_data io.log('Training GMMs') gmms = train_gmms(all_data, n_components=1, cov_type=cov_type, ordered=True) io.log('Getting predictions') test_data = np.vstack(map(lambda x: ivectors[x], test_ivecs)) return predict_gmm(test_data, gmms)
def main(): desc = 'Convert from alignment with length to regular alignments. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('ali_with_length', help='Alignment with lengths') args = parser.parse_args() ali = ali_with_length_read(args.ali_with_length, ordered=True, expand=True) io.log('Read {} aligment with lengths'.format(len(ali))) for key in ali: print '{} {}'.format(key, ' '.join(ali[key]))
def phone2word_ali(key, phone_tokens, words, lexicon, sil_label, length): """ Print word alignment to stdout. If there are several possible alignments, arbitrarily choose the first one. """ word_alis = get_alignments(words, phone_tokens, lexicon) if word_alis is None: raise ValueError('{} - failed to align {} to {}'.format( key, words, phone_tokens)) if len(word_alis) > 1: io.log('WARNING - {} has multiple ({}) alignments: {} ({})'.format( key, len(word_alis), word_alis, words)) __print_ali(key, word_alis[0], sil_label, length)
def __parse_err_code(text, idx, utt): err_code = text[idx].replace('[*', '').replace(']', '').strip() # Uncategorized error if err_code in ['']: return 'X' # Check that error is of known type if not any( [err_code.startswith(c) for c in ['p', 's', 'n', 'd', 'm', 'f', '->']]): io.log('WARNING: {} - invalid err_code {} ({})'.format( utt, err_code, text[:idx + 1])) return err_code
def _save_model(self, model, model_name, results): """ Save model to disk if applicable. """ args = self.args valid_errs = results[1:] if args.save_all or valid_errs[args.validate_on] < self.best_err: model_fname = os.path.join(args.output_dir, model_name) io.log('Saving final model to {}'.format(model_fname)) # Remove auxiliary output prior to saving st_model = Model(input=model.input, output=model.output[0]) io.json_save('{}.json'.format(model_fname), st_model.to_json()) st_model.save_weights('{}.weights'.format(model_fname)) del st_model
def __parse_target(text, start, end, utt): targets = [w for w in text[start:end] if w.startswith('[:')] if len(targets) == 0: return '?' if len(targets) > 1: io.log('**WARN** {} - multiple targets {}, using 1st item ({})'.format( utt, targets, text[:end])) target = targets[0] target = [target.replace('[:', '').replace(']', '').strip()] target = __clean_word_tokens(target, utt) target = __proc_compound_words(target, utt) target = ' '.join(target) return target if target.lower() != 'x@n' else '?'
def _init_records(self): """ Setup result caching for parameter combinations. Load existing results from disk if possible. Each line will look like this: <model_name> <train_err> <valid_err> [<valid_err> ...] """ self.records_fname = os.path.join(self.args.output_dir, 'summary.txt') self.records = OrderedDict() if os.path.exists(self.records_fname): io.log('Loading existing records from {}'.format( self.records_fname)) self.records = io.dict_read(self.records_fname, ordered=True, lst=True, fn=float)
def main(args): # Check and set variables common.CHK_GT(args.frames_to_stack, 0) if args.frames_to_skip is None: args.frames_to_skip = args.frames_to_stack common.CHK_GT(args.frames_to_skip, 0) ds = TemporalData.from_kaldi(args.scp) io.log('Loaded dataset containing {} utts'.format(len(ds.get_utt_names()))) io.log('Outputting stacked features (stack: {}, skip: {}) to stdout...'. format(args.frames_to_stack, args.frames_to_skip)) for utt_name in ds.get_utt_names(): data = ds.get_data_by_utt_name(utt_name) stacked = stack_data(data, args.frames_to_stack, args.frames_to_skip) print_matrix(utt_name, stacked)
def _init_resume_json(self, model_dir): """ Load json for resuming training if possible. """ resume_json = None resume_fname = os.path.join(model_dir, 'resume.json') if os.path.exists(resume_fname): resume_json = io.json_load(resume_fname) # Check that json contains enough information assert 'weights' in resume_json assert 'lrate' in resume_json assert 'epoch' in resume_json # Make path absolute resume_json['weights'] = os.path.join(model_dir, resume_json['weights']) io.log('Resuming training: {}'.format(resume_json)) return resume_json
def main(): desc = 'Convert from speaker i-vectors to utt-ivectors. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('spk_ivectors', help='File containing spk i-vectors.') parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.') args = parser.parse_args() spk_ivectors = ivector_ark_read(args.spk_ivectors) utt2spk = io.dict_read(args.utt2spk, ordered=True) spk2utt = common.make_reverse_index(utt2spk, ordered=True) wrote = 0 for spk in spk2utt.keys(): for utt in spk2utt[spk]: print_vector(utt, spk_ivectors[spk]) wrote += 1 io.log('Wrote {} utt i-vectors for {} spks'.format(wrote, len(spk2utt)))
def get_log_priors(class_frame_counts, prior_floor): if class_frame_counts is None: return None with open(class_frame_counts, 'r') as f: count_str = f.readline().strip().replace('[', '').replace(']', '') priors = np.asarray(count_str.split(), dtype=np.float32) priors = priors / np.sum(priors) # Add a small value before doing log to avoid NaN log_priors = np.log(priors + 1e-20) # Floor pdf num_floored = 0 for i in range(len(log_priors)): if priors[i] < prior_floor: log_priors[i] = math.sqrt(FLT_MAX) num_floored += 1 io.log('Floored {} pdf-priors (hard-set to {}, which disables DNN output)'. format(num_floored, math.sqrt(FLT_MAX))) return log_priors
def init_lrate(cls, init_lr, min_lr, depoch, dstart, dstop, max_epoch=None, resume_json=None): io.log('.....using {}'.format(cls)) lrate = cls(start_rate=init_lr, min_lrate_stop=min_lr, min_epoch_decay_start=depoch, min_derror_decay_start=dstart, min_derror_stop=dstop, max_epoch=max_epoch) if resume_json is not None: io.log('Resuming learning rate from {}'.format(resume_json)) lrate = parse_lrate_json(resume_json, lrate) return lrate