def best_wer_timit(decoding_dir): avg_lines = [] for path in glob(os.path.join(decoding_dir, "score_*", "*.sys")): with open(path, "r") as f: sys_lines = f.readlines() LMWT = os.path.basename(os.path.dirname(path)).split("_")[1] avg_lines.append( (LMWT, next(line for line in sys_lines if "Sum/Avg" in line))) result = [] for line in avg_lines: if line[1].count("|") == 5: _, _, n1, n2, n3, _ = line[1].split("|") _, corr, sub, _del, ins, per, _, _ = re.sub(' +', ' ', n2).split(" ") result.append({ "lm_weight": line[0], "corr": corr, "sub": sub, "del": _del, "ins": ins, "per": per }) else: logger.warn("Skipping line: {}".format(line[1])) return min(result, key=lambda x: x['per'])
def run_shell(cmd, stdin=None, pipefail=True, cmd_logging_level=logging.DEBUG): """ :param cmd: :param stdin: :param pipefail: From bash man: If pipefail is enabled, the pipeline's return status is the value of the last (rightmost) command to exit with a non-zero status, or zero if all commands exit successfully. :return: """ assert stdin is None or isinstance(stdin, bytes), f"Expected bytes as input for stdin, got {type(stdin)}" logger.log(cmd_logging_level, f"RUN: {cmd}") if cmd.split(" ")[0].endswith(".sh"): if not (os.path.isfile(cmd.split(" ")[0]) and os.access(cmd.split(" ")[0], os.X_OK)): logger.warn(f"{cmd.split(' ')[0]} does not exist or is not runnable!") if pipefail: cmd = 'set -o pipefail; ' + cmd p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable='/bin/bash', env=os.environ.copy()) (output, err) = p.communicate(stdin) output = output.decode("utf-8") err = err.decode("utf-8") return_code = p.wait() if return_code > 0: logger.error( "Call: \n{}\n{}\n{}\nReturn Code: {}\nstdout: {}\nstderr: {}" .format("".join(["-"] * 73), cmd, "".join(["-"] * 80), return_code, output, err)) raise RuntimeError(f"Call: {cmd} had nonzero return code: {return_code}, stderr: {err}") # logger.warn("ERROR: {}".format(err)) logger.log(cmd_logging_level, f"OUTPUT: {output}") return output
def load_state_dict(self, state_dict): if len(self.data_source) == state_dict['data_source_len']: self.start_idx = state_dict['start_idx'] + 1 else: logger.warn( "The dataset used when this sampler was saved is not the same as the one used now.\n" "Ignoring the saved sampler and restarting sampling.")
def load_state_dict(self, state_dict): if self.samples_per_chunk == state_dict['samples_per_chunk']: self.permutation = state_dict['permutation'] self.start_idx = state_dict['start_idx'] + 1 else: logger.warn( "The dataset used when this sampler was saved is not the same as the one used now.\n" "Ignoring the saved sampler and restarting sampling.")
def process_batch(self, _input): with tempfile.TemporaryDirectory(dir=self.tmp_root_dir) as tmp_run_dir: if not os.path.exists(os.path.join(tmp_run_dir, "exp_files")): os.makedirs(os.path.join(tmp_run_dir, "exp_files")) if isinstance(_input, tuple): assert isinstance(_input[0], list), isinstance(_input[1], list) assert len(_input[0]) == len(_input[1]) wav_files, metadata_dict = _input else: wav_files = _input metadata_dict = None if isinstance(wav_files[0], list): wav_files, metadata = wav_files metadata_dict = {} for w_file, _mdata in zip(wav_files, metadata): metadata_dict[os.path.basename(w_file)[:-5]] = _mdata _wav_files = set() for file in wav_files: assert os.path.abspath(file) if file in _wav_files: logger.warn(f"Duplicate file {file}, ignoring...") else: _wav_files.add(file) # wav_files = list(_wav_files) # self.add_noise_padding() tmp_scp, spk2utt_path, utt2spk_path = self.preppare_tmp_files(wav_files, tmp_run_dir) feats = get_kaldi_feats(tmp_scp, tmp_run_dir, spk2utt_path, utt2spk_path) # TODO apply mean std norm, same as in dataloader result = self.decoder.is_keyword_batch(input_features=feats, sensitivity=self.sensitivity, tmp_out_dir=tmp_run_dir) if metadata_dict is not None: return list(zip(metadata_dict, result.values())) else: return result
def _check_labels_indexed_from(self, all_labels_loaded, label_name): max_label = max([ all_labels_loaded[label_name][l].max() for l in all_labels_loaded[label_name] ]) min_label = min([ all_labels_loaded[label_name][l].min() for l in all_labels_loaded[label_name] ]) logger.debug(f"Max label: {max_label}") logger.debug(f"min label: {min_label}") if min_label > 0: logger.warn( f"label {label_name} is appears to be indexed from {min_label} -> making it indexed from 0" ) for l in all_labels_loaded[label_name]: all_labels_loaded[label_name][ l] = all_labels_loaded[label_name][l] - min_label max_label = max([ all_labels_loaded[label_name][l].max() for l in all_labels_loaded[label_name] ]) min_label = min([ all_labels_loaded[label_name][l].min() for l in all_labels_loaded[label_name] ]) logger.debug(f"Max label new : {max_label}") logger.debug(f"min label new: {min_label}") if self.state.label_index_from != 0: assert self.state.label_index_from > 0 all_labels_loaded[label_name] = { filename: all_labels_loaded[label_name][filename] + self.state.label_index_from for filename in all_labels_loaded[label_name] }
def _load_labels(label_dict, label_index_from, max_label_length, phoneme_dict): all_labels_loaded = {} for lable_name in label_dict: all_labels_loaded[lable_name] = load_labels( label_dict[lable_name]['label_folder'], label_dict[lable_name]['label_opts']) if max_label_length is not None and max_label_length > 0: all_labels_loaded[lable_name] = \ {l: all_labels_loaded[lable_name][l] for l in all_labels_loaded[lable_name] if len(all_labels_loaded[lable_name][l]) < max_label_length} if lable_name == "lab_phn": if phoneme_dict is not None: for sample_id in all_labels_loaded[lable_name]: assert max(all_labels_loaded[lable_name][sample_id]) <= max( phoneme_dict.idx2reducedIdx.keys()), \ "Are you sure you have the righ phoneme dict?" + \ " Labels have higher indices than phonemes ( {} <!= {} )".format( max(all_labels_loaded[lable_name][sample_id]), max(phoneme_dict.idx2reducedIdx.keys())) # map labels according to phoneme dict tmp_labels = np.copy( all_labels_loaded[lable_name][sample_id]) for k, v in phoneme_dict.idx2reducedIdx.items(): tmp_labels[all_labels_loaded[lable_name][sample_id] == k] = v all_labels_loaded[lable_name][sample_id] = tmp_labels max_label = max([ all_labels_loaded[lable_name][l].max() for l in all_labels_loaded[lable_name] ]) min_label = min([ all_labels_loaded[lable_name][l].min() for l in all_labels_loaded[lable_name] ]) logger.debug(f"Max label: {max_label}") logger.debug(f"min label: {min_label}") if min_label > 0: logger.warn( f"label {lable_name} does not seem to be indexed from 0 -> making it indexed from 0" ) for l in all_labels_loaded[lable_name]: all_labels_loaded[lable_name][ l] = all_labels_loaded[lable_name][l] - 1 max_label = max([ all_labels_loaded[lable_name][l].max() for l in all_labels_loaded[lable_name] ]) min_label = min([ all_labels_loaded[lable_name][l].min() for l in all_labels_loaded[lable_name] ]) logger.debug(f"Max label new : {max_label}") logger.debug(f"min label new: {min_label}") if label_index_from != 0: assert label_index_from > 0 all_labels_loaded[lable_name] = { filename: all_labels_loaded[lable_name][filename] + label_index_from for filename in all_labels_loaded[lable_name] } return all_labels_loaded
def valid_epoch_sync_metrics(epoch, model, loss_fun, metrics, config, max_label_length, device, tensorboard_logger): model.eval() valid_loss = 0 accumulated_valid_metrics = {metric: 0 for metric in metrics} valid_data = config['dataset']['data_use']['valid_with'] _all_feats = config['dataset']['dataset_definition']['datasets'][ valid_data]['features'] _all_labs = config['dataset']['dataset_definition']['datasets'][ valid_data]['labels'] dataset = get_dataset( config['training']['dataset_type'], config['exp']['data_cache_root'], f"{valid_data}_{config['exp']['name']}", {feat: _all_feats[feat] for feat in config['dataset']['features_use']}, {lab: _all_labs[lab] for lab in config['dataset']['labels_use']}, config['training']['batching']['max_seq_length_valid'], model.context_left, model.context_right, normalize_features=True, phoneme_dict=config['dataset']['dataset_definition']['phoneme_dict'], max_seq_len=config['training']['batching']['max_seq_length_valid'], max_label_length=max_label_length) if config['training']['batching']['batch_size_valid'] != 1: logger.warn("setting valid batch size to 1 to avoid padding zeros") dataloader = KaldiDataLoader( dataset, config['training']['batching']['batch_size_valid'], config["exp"]["n_gpu"] > 0, batch_ordering=model.batch_ordering) assert len(dataset) >= config['training']['batching']['batch_size_valid'], \ f"Length of valid dataset {len(dataset)} too small " \ + f"for batch_size of {config['training']['batching']['batch_size_valid']}" n_steps_this_epoch = 0 with tqdm(disable=not logger.isEnabledFor(logging.INFO), total=len(dataloader)) as pbar: pbar.set_description('V e:{} l: {} '.format(epoch, '-')) for batch_idx, (sample_name, inputs, targets) in enumerate(dataloader): n_steps_this_epoch += 1 inputs = to_device(device, inputs) if "lab_phn" not in targets: targets = to_device(device, targets) output = model(inputs) loss = loss_fun(output, targets) output = detach_cpu(output) targets = detach_cpu(targets) loss = detach_cpu(loss) #### Logging #### valid_loss += loss["loss_final"].item() _valid_metrics = eval_metrics((output, targets), metrics) for metric, metric_value in _valid_metrics.items(): accumulated_valid_metrics[metric] += metric_value pbar.set_description('V e:{} l: {:.4f} '.format( epoch, loss["loss_final"].item())) pbar.update() do_plotting = True if n_steps_this_epoch == 60 or n_steps_this_epoch == 1 and do_plotting: # raise NotImplementedError("TODO: add plots to tensorboard") output = output['out_phn'] inputs = inputs["fbank"].numpy() _phoneme_dict = dataset.state.phoneme_dict vocabulary_size = len( dataset.state.phoneme_dict.reducedIdx2phoneme) + 1 vocabulary = [ chr(c) for c in list(range(65, 65 + 58)) + list(range(65 + 58 + 69, 65 + 58 + 69 + 500)) ][:vocabulary_size] decoder = ctcdecode.CTCBeamDecoder(vocabulary, log_probs_input=True, beam_width=1) decoder_logits = output.permute(0, 2, 1) # We expect batch x seq x label_size beam_result, beam_scores, timesteps, out_seq_len = decoder.decode( decoder_logits) _targets = [] curr_l = 0 for l in targets['target_sequence_lengths']: _targets.append(targets['lab_phn'][curr_l:curr_l + l]) curr_l += l for i in range(len(inputs)): _beam_result = beam_result[i, 0, :out_seq_len[i, 0]] # logger.debug(sample_name) result_decoded = [ _phoneme_dict.reducedIdx2phoneme[l.item() - 1] for l in _beam_result ] result_decoded = " ".join(result_decoded) logger.debug("RES: " + result_decoded) # plot_phns = True # if plot_phns: label_decoded = " ".join([ _phoneme_dict.reducedIdx2phoneme[l.item() - 1] for l in _targets[i] ]) logger.debug("LAB: " + label_decoded) text = sample_id_to_transcript( sample_name[i], "/mnt/data/datasets/LibriSpeech/dev-clean") logger.debug("TXT: " + text) # if plot_phns: plot_alignment_spectrogram_ctc( sample_name[i], inputs[i], (np.exp(output.numpy()[i]).T / np.exp(output.numpy()[i]).sum(axis=1)).T, _phoneme_dict, label_decoded, text, result_decoded=result_decoded) # else: # plot_alignment_spectrogram(sample_name, inputs["fbank"][i], # (np.exp(output).T / np.exp(output).sum(axis=1)).T, # _phoneme_dict, result_decoded=result_decoded) #### /Logging #### for metric, metric_value in accumulated_valid_metrics.items(): accumulated_valid_metrics[metric] += metric_value tensorboard_logger.set_step(epoch, 'valid') tensorboard_logger.add_scalar('valid_loss', valid_loss / n_steps_this_epoch) for metric in accumulated_valid_metrics: tensorboard_logger.add_scalar( metric, accumulated_valid_metrics[metric] / n_steps_this_epoch) return { 'valid_loss': valid_loss / n_steps_this_epoch, 'valid_metrics': { metric: accumulated_valid_metrics[metric] / n_steps_this_epoch for metric in accumulated_valid_metrics } }