예제 #1
0
def best_wer_timit(decoding_dir):
    avg_lines = []
    for path in glob(os.path.join(decoding_dir, "score_*", "*.sys")):
        with open(path, "r") as f:
            sys_lines = f.readlines()
        LMWT = os.path.basename(os.path.dirname(path)).split("_")[1]
        avg_lines.append(
            (LMWT, next(line for line in sys_lines if "Sum/Avg" in line)))

    result = []
    for line in avg_lines:
        if line[1].count("|") == 5:
            _, _, n1, n2, n3, _ = line[1].split("|")

            _, corr, sub, _del, ins, per, _, _ = re.sub(' +', ' ',
                                                        n2).split(" ")
            result.append({
                "lm_weight": line[0],
                "corr": corr,
                "sub": sub,
                "del": _del,
                "ins": ins,
                "per": per
            })
        else:
            logger.warn("Skipping line: {}".format(line[1]))

    return min(result, key=lambda x: x['per'])
예제 #2
0
def run_shell(cmd, stdin=None, pipefail=True, cmd_logging_level=logging.DEBUG):
    """
    :param cmd:
    :param stdin:
    :param pipefail:    From bash man: If pipefail is enabled, the pipeline's return status is
     the value of the last (rightmost) command to exit with a non-zero status, or zero if all
     commands exit successfully.
    :return:
    """
    assert stdin is None or isinstance(stdin, bytes), f"Expected bytes as input for stdin, got {type(stdin)}"

    logger.log(cmd_logging_level, f"RUN: {cmd}")
    if cmd.split(" ")[0].endswith(".sh"):
        if not (os.path.isfile(cmd.split(" ")[0]) and os.access(cmd.split(" ")[0], os.X_OK)):
            logger.warn(f"{cmd.split(' ')[0]} does not exist or is not runnable!")

    if pipefail:
        cmd = 'set -o pipefail; ' + cmd

    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable='/bin/bash',
                         env=os.environ.copy())

    (output, err) = p.communicate(stdin)
    output = output.decode("utf-8")
    err = err.decode("utf-8")
    return_code = p.wait()
    if return_code > 0:
        logger.error(
            "Call:  \n{}\n{}\n{}\nReturn Code: {}\nstdout: {}\nstderr: {}"
                .format("".join(["-"] * 73), cmd, "".join(["-"] * 80), return_code, output, err))
        raise RuntimeError(f"Call: {cmd} had nonzero return code: {return_code}, stderr: {err}")
    # logger.warn("ERROR: {}".format(err))

    logger.log(cmd_logging_level, f"OUTPUT: {output}")
    return output
예제 #3
0
 def load_state_dict(self, state_dict):
     if len(self.data_source) == state_dict['data_source_len']:
         self.start_idx = state_dict['start_idx'] + 1
     else:
         logger.warn(
             "The dataset used when this sampler was saved is not the same as the one used now.\n"
             "Ignoring the saved sampler and restarting sampling.")
예제 #4
0
 def load_state_dict(self, state_dict):
     if self.samples_per_chunk == state_dict['samples_per_chunk']:
         self.permutation = state_dict['permutation']
         self.start_idx = state_dict['start_idx'] + 1
     else:
         logger.warn(
             "The dataset used when this sampler was saved is not the same as the one used now.\n"
             "Ignoring the saved sampler and restarting sampling.")
예제 #5
0
    def process_batch(self, _input):
        with tempfile.TemporaryDirectory(dir=self.tmp_root_dir) as tmp_run_dir:
            if not os.path.exists(os.path.join(tmp_run_dir, "exp_files")):
                os.makedirs(os.path.join(tmp_run_dir, "exp_files"))

            if isinstance(_input, tuple):
                assert isinstance(_input[0], list), isinstance(_input[1], list)
                assert len(_input[0]) == len(_input[1])
                wav_files, metadata_dict = _input
            else:
                wav_files = _input
                metadata_dict = None
            if isinstance(wav_files[0], list):
                wav_files, metadata = wav_files
                metadata_dict = {}
                for w_file, _mdata in zip(wav_files, metadata):
                    metadata_dict[os.path.basename(w_file)[:-5]] = _mdata
            _wav_files = set()
            for file in wav_files:
                assert os.path.abspath(file)
                if file in _wav_files:
                    logger.warn(f"Duplicate file {file}, ignoring...")
                else:
                    _wav_files.add(file)
            # wav_files = list(_wav_files)

            # self.add_noise_padding()

            tmp_scp, spk2utt_path, utt2spk_path = self.preppare_tmp_files(wav_files, tmp_run_dir)

            feats = get_kaldi_feats(tmp_scp, tmp_run_dir, spk2utt_path, utt2spk_path)

            # TODO apply mean std norm, same as in dataloader

            result = self.decoder.is_keyword_batch(input_features=feats,
                                                   sensitivity=self.sensitivity,
                                                   tmp_out_dir=tmp_run_dir)
            if metadata_dict is not None:
                return list(zip(metadata_dict, result.values()))
            else:
                return result
예제 #6
0
    def _check_labels_indexed_from(self, all_labels_loaded, label_name):

        max_label = max([
            all_labels_loaded[label_name][l].max()
            for l in all_labels_loaded[label_name]
        ])
        min_label = min([
            all_labels_loaded[label_name][l].min()
            for l in all_labels_loaded[label_name]
        ])
        logger.debug(f"Max label: {max_label}")
        logger.debug(f"min label: {min_label}")

        if min_label > 0:
            logger.warn(
                f"label {label_name} is appears to be indexed from {min_label} -> making it indexed from 0"
            )
            for l in all_labels_loaded[label_name]:
                all_labels_loaded[label_name][
                    l] = all_labels_loaded[label_name][l] - min_label

            max_label = max([
                all_labels_loaded[label_name][l].max()
                for l in all_labels_loaded[label_name]
            ])
            min_label = min([
                all_labels_loaded[label_name][l].min()
                for l in all_labels_loaded[label_name]
            ])
            logger.debug(f"Max label new : {max_label}")
            logger.debug(f"min label new: {min_label}")

        if self.state.label_index_from != 0:
            assert self.state.label_index_from > 0
            all_labels_loaded[label_name] = {
                filename: all_labels_loaded[label_name][filename] +
                self.state.label_index_from
                for filename in all_labels_loaded[label_name]
            }
예제 #7
0
def _load_labels(label_dict, label_index_from, max_label_length, phoneme_dict):
    all_labels_loaded = {}

    for lable_name in label_dict:
        all_labels_loaded[lable_name] = load_labels(
            label_dict[lable_name]['label_folder'],
            label_dict[lable_name]['label_opts'])

        if max_label_length is not None and max_label_length > 0:
            all_labels_loaded[lable_name] = \
                {l: all_labels_loaded[lable_name][l] for l in all_labels_loaded[lable_name]
                 if len(all_labels_loaded[lable_name][l]) < max_label_length}

        if lable_name == "lab_phn":
            if phoneme_dict is not None:
                for sample_id in all_labels_loaded[lable_name]:
                    assert max(all_labels_loaded[lable_name][sample_id]) <= max(
                        phoneme_dict.idx2reducedIdx.keys()), \
                        "Are you sure you have the righ phoneme dict?" + \
                        " Labels have higher indices than phonemes ( {} <!= {} )".format(
                            max(all_labels_loaded[lable_name][sample_id]),
                            max(phoneme_dict.idx2reducedIdx.keys()))

                    # map labels according to phoneme dict
                    tmp_labels = np.copy(
                        all_labels_loaded[lable_name][sample_id])
                    for k, v in phoneme_dict.idx2reducedIdx.items():
                        tmp_labels[all_labels_loaded[lable_name][sample_id] ==
                                   k] = v

                    all_labels_loaded[lable_name][sample_id] = tmp_labels

        max_label = max([
            all_labels_loaded[lable_name][l].max()
            for l in all_labels_loaded[lable_name]
        ])
        min_label = min([
            all_labels_loaded[lable_name][l].min()
            for l in all_labels_loaded[lable_name]
        ])
        logger.debug(f"Max label: {max_label}")
        logger.debug(f"min label: {min_label}")

        if min_label > 0:
            logger.warn(
                f"label {lable_name} does not seem to be indexed from 0 -> making it indexed from 0"
            )
            for l in all_labels_loaded[lable_name]:
                all_labels_loaded[lable_name][
                    l] = all_labels_loaded[lable_name][l] - 1

            max_label = max([
                all_labels_loaded[lable_name][l].max()
                for l in all_labels_loaded[lable_name]
            ])
            min_label = min([
                all_labels_loaded[lable_name][l].min()
                for l in all_labels_loaded[lable_name]
            ])
            logger.debug(f"Max label new : {max_label}")
            logger.debug(f"min label new: {min_label}")

        if label_index_from != 0:
            assert label_index_from > 0
            all_labels_loaded[lable_name] = {
                filename:
                all_labels_loaded[lable_name][filename] + label_index_from
                for filename in all_labels_loaded[lable_name]
            }

    return all_labels_loaded
예제 #8
0
파일: viz_asr.py 프로젝트: pfriesch/PhnKWS
def valid_epoch_sync_metrics(epoch, model, loss_fun, metrics, config,
                             max_label_length, device, tensorboard_logger):
    model.eval()

    valid_loss = 0
    accumulated_valid_metrics = {metric: 0 for metric in metrics}

    valid_data = config['dataset']['data_use']['valid_with']
    _all_feats = config['dataset']['dataset_definition']['datasets'][
        valid_data]['features']
    _all_labs = config['dataset']['dataset_definition']['datasets'][
        valid_data]['labels']
    dataset = get_dataset(
        config['training']['dataset_type'],
        config['exp']['data_cache_root'],
        f"{valid_data}_{config['exp']['name']}",
        {feat: _all_feats[feat]
         for feat in config['dataset']['features_use']},
        {lab: _all_labs[lab]
         for lab in config['dataset']['labels_use']},
        config['training']['batching']['max_seq_length_valid'],
        model.context_left,
        model.context_right,
        normalize_features=True,
        phoneme_dict=config['dataset']['dataset_definition']['phoneme_dict'],
        max_seq_len=config['training']['batching']['max_seq_length_valid'],
        max_label_length=max_label_length)

    if config['training']['batching']['batch_size_valid'] != 1:
        logger.warn("setting valid batch size to 1 to avoid padding zeros")
    dataloader = KaldiDataLoader(
        dataset,
        config['training']['batching']['batch_size_valid'],
        config["exp"]["n_gpu"] > 0,
        batch_ordering=model.batch_ordering)

    assert len(dataset) >= config['training']['batching']['batch_size_valid'], \
        f"Length of valid dataset {len(dataset)} too small " \
        + f"for batch_size of {config['training']['batching']['batch_size_valid']}"

    n_steps_this_epoch = 0
    with tqdm(disable=not logger.isEnabledFor(logging.INFO),
              total=len(dataloader)) as pbar:
        pbar.set_description('V e:{} l: {} '.format(epoch, '-'))
        for batch_idx, (sample_name, inputs, targets) in enumerate(dataloader):
            n_steps_this_epoch += 1

            inputs = to_device(device, inputs)
            if "lab_phn" not in targets:
                targets = to_device(device, targets)

            output = model(inputs)
            loss = loss_fun(output, targets)

            output = detach_cpu(output)
            targets = detach_cpu(targets)
            loss = detach_cpu(loss)

            #### Logging ####
            valid_loss += loss["loss_final"].item()
            _valid_metrics = eval_metrics((output, targets), metrics)
            for metric, metric_value in _valid_metrics.items():
                accumulated_valid_metrics[metric] += metric_value

            pbar.set_description('V e:{} l: {:.4f} '.format(
                epoch, loss["loss_final"].item()))
            pbar.update()

            do_plotting = True
            if n_steps_this_epoch == 60 or n_steps_this_epoch == 1 and do_plotting:
                # raise NotImplementedError("TODO: add plots to tensorboard")
                output = output['out_phn']
                inputs = inputs["fbank"].numpy()
                _phoneme_dict = dataset.state.phoneme_dict
                vocabulary_size = len(
                    dataset.state.phoneme_dict.reducedIdx2phoneme) + 1
                vocabulary = [
                    chr(c) for c in list(range(65, 65 + 58)) +
                    list(range(65 + 58 + 69, 65 + 58 + 69 + 500))
                ][:vocabulary_size]
                decoder = ctcdecode.CTCBeamDecoder(vocabulary,
                                                   log_probs_input=True,
                                                   beam_width=1)

                decoder_logits = output.permute(0, 2, 1)
                # We expect batch x seq x label_size
                beam_result, beam_scores, timesteps, out_seq_len = decoder.decode(
                    decoder_logits)

                _targets = []
                curr_l = 0
                for l in targets['target_sequence_lengths']:
                    _targets.append(targets['lab_phn'][curr_l:curr_l + l])
                    curr_l += l
                for i in range(len(inputs)):
                    _beam_result = beam_result[i, 0, :out_seq_len[i, 0]]
                    # logger.debug(sample_name)
                    result_decoded = [
                        _phoneme_dict.reducedIdx2phoneme[l.item() - 1]
                        for l in _beam_result
                    ]
                    result_decoded = " ".join(result_decoded)
                    logger.debug("RES: " + result_decoded)
                    # plot_phns = True
                    # if plot_phns:
                    label_decoded = " ".join([
                        _phoneme_dict.reducedIdx2phoneme[l.item() - 1]
                        for l in _targets[i]
                    ])
                    logger.debug("LAB: " + label_decoded)
                    text = sample_id_to_transcript(
                        sample_name[i],
                        "/mnt/data/datasets/LibriSpeech/dev-clean")
                    logger.debug("TXT: " + text)

                    # if plot_phns:
                    plot_alignment_spectrogram_ctc(
                        sample_name[i],
                        inputs[i],
                        (np.exp(output.numpy()[i]).T /
                         np.exp(output.numpy()[i]).sum(axis=1)).T,
                        _phoneme_dict,
                        label_decoded,
                        text,
                        result_decoded=result_decoded)
                    # else:
                    #     plot_alignment_spectrogram(sample_name, inputs["fbank"][i],
                    #                                (np.exp(output).T / np.exp(output).sum(axis=1)).T,
                    #                                _phoneme_dict, result_decoded=result_decoded)

            #### /Logging ####
    for metric, metric_value in accumulated_valid_metrics.items():
        accumulated_valid_metrics[metric] += metric_value

    tensorboard_logger.set_step(epoch, 'valid')
    tensorboard_logger.add_scalar('valid_loss',
                                  valid_loss / n_steps_this_epoch)
    for metric in accumulated_valid_metrics:
        tensorboard_logger.add_scalar(
            metric, accumulated_valid_metrics[metric] / n_steps_this_epoch)

    return {
        'valid_loss': valid_loss / n_steps_this_epoch,
        'valid_metrics': {
            metric: accumulated_valid_metrics[metric] / n_steps_this_epoch
            for metric in accumulated_valid_metrics
        }
    }