示例#1
0
def test_upload():
    # Make dirs
    os.makedirs('tmp/publish_dir', exist_ok=True)
    populate_wham_dir('tmp/wham')

    # Dataset and NN
    train_set = WhamDataset('tmp/wham', task='sep_clean')
    model = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16,
                       hid_chan=4, skip_chan=8, n_filters=32)

    # Save publishable
    model_conf = model.serialize()
    model_conf.update(train_set.get_infos())
    save_publishable('tmp/publish_dir', model_conf, metrics={}, train_conf={})

    if False:
        # Upload
        zen, current = upload_publishable(
            'tmp/publish_dir',
            uploader="Manuel Pariente",
            affiliation="INRIA",
            use_sandbox=True,
            unit_test=True,  # Remove this argument and monkeypatch `input()`
        )

        # Assert metadata is correct
        meta = current.json()['metadata']
        assert meta['creators'][0]['name'] == "Manuel Pariente"
        assert meta['creators'][0]['affiliation'] == "INRIA"
        assert 'asteroid-models' in [d['identifier'] for d in meta['communities']]

        # Clean up
        zen.remove_deposition(current.json()['id'])
    shutil.rmtree('tmp/wham')
示例#2
0
def test_upload():
    # Make dirs
    os.makedirs("tmp/publish_dir", exist_ok=True)
    populate_wham_dir("tmp/wham")

    # Dataset and NN
    train_set = WhamDataset("tmp/wham", task="sep_clean")
    model = ConvTasNet(n_src=2,
                       n_repeats=2,
                       n_blocks=2,
                       bn_chan=16,
                       hid_chan=4,
                       skip_chan=8,
                       n_filters=32)

    # Save publishable
    model_conf = model.serialize()
    model_conf.update(train_set.get_infos())
    save_publishable("tmp/publish_dir", model_conf, metrics={}, train_conf={})

    # Upload
    token = os.getenv("ACCESS_TOKEN")
    if token:  # ACESS_TOKEN is not available on forks.
        zen, current = upload_publishable(
            "tmp/publish_dir",
            uploader="Manuel Pariente",
            affiliation="INRIA",
            use_sandbox=True,
            unit_test=True,  # Remove this argument and monkeypatch `input()`
            git_username="******",
        )

        # Assert metadata is correct
        meta = current.json()["metadata"]
        assert meta["creators"][0]["name"] == "Manuel Pariente"
        assert meta["creators"][0]["affiliation"] == "INRIA"
        assert "asteroid-models" in [
            d["identifier"] for d in meta["communities"]
        ]

        # Clean up
        zen.remove_deposition(current.json()["id"])
        shutil.rmtree("tmp/wham")
示例#3
0
文件: eval.py 项目: saurjya/asteroid
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    #model = ConvTasNet.from_pretrained(model_path)
    model = DCUNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device

    test_set = BBCSODataset(
        conf["json_dir"],
        conf["n_src"],
        conf["sample_rate"],
        conf["batch_size"],
        220500,
        train = False
    )
    # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf["exp_dir"], "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        mix = mix.unsqueeze(0)
        sources = sources.unsqueeze(0)
        est_sources = model(mix)
        loss, reordered_sources = loss_func(est_sources, sources, return_est=True)
        #mix_np = mix.squeeze(0).cpu().data.numpy()
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.squeeze(0).cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )
        #utt_metrics["mix_path"] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))
        
        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            #print(mix_np.shape)
            sf.write(local_save_dir + "mixture.wav", np.swapaxes(mix_np,0,1), conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
示例#4
0
def main(conf):
    model_path = os.path.join(conf['exp_dir'], 'best_model.pth')
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf['use_gpu']:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = WhamDataset(conf['test_dir'], conf['task'],
                           sample_rate=conf['sample_rate'],
                           nondefault_nsrc=model.masker.n_src,
                           segment=None)  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx')

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf['exp_dir'], 'examples/')
    if conf['n_save_ex'] == -1:
        conf['n_save_ex'] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf['n_save_ex'])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix[None, None])
        loss, reordered_sources = loss_func(est_sources, sources[None],
                                            return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.squeeze().cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze().cpu().data.numpy()
        utt_metrics = get_metrics(mix_np, sources_np, est_sources_np,
                                  sample_rate=conf['sample_rate'])
        utt_metrics['mix_path'] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0],
                     conf['sample_rate'])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src,
                         conf['sample_rate'])
            for src_idx, est_src in enumerate(est_sources_np):
                sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx+1),
                         est_src, conf['sample_rate'])
            # Write local metrics to the example folder.
            with open(local_save_dir + 'metrics.json', 'w') as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv'))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = 'input_' + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + '_imp'] = ldf.mean()
    print('Overall metrics :')
    pprint(final_results)
    with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f:
        json.dump(final_results, f, indent=0)
    model_dict = torch.load(model_path, map_location='cpu')

    publishable = save_publishable(
        os.path.join(conf['exp_dir'], 'publish_dir'), model_dict,
        metrics=final_results, train_conf=train_conf
    )
示例#5
0
def main(conf):

    train_set = DeMaskDataset(conf, conf["data"]["clean_train"], True,
                              conf["data"]["rirs_train"])
    val_set = DeMaskDataset(conf, conf["data"]["clean_dev"], False,
                            conf["data"]["rirs_dev"])

    train_loader = DataLoader(
        train_set,
        shuffle=True,
        batch_size=conf["training"]["batch_size"],
        num_workers=conf["training"]["num_workers"],
        drop_last=True,
    )
    val_loader = DataLoader(
        val_set,
        shuffle=False,
        batch_size=conf["training"]["batch_size"],
        num_workers=conf["training"]["num_workers"],
        drop_last=True,
    )

    model = DeMask(**conf["filterbank"], **conf["demask_net"])
    optimizer = make_optimizer(model.parameters(), **conf["optim"])
    # Define scheduler
    scheduler = None
    if conf["training"]["half_lr"]:
        scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                      factor=0.5,
                                      patience=5)
    # Just after instantiating, save the args. Easy loading in the future.
    exp_dir = conf["main_args"]["exp_dir"]
    os.makedirs(exp_dir, exist_ok=True)
    conf_path = os.path.join(exp_dir, "conf.yml")
    with open(conf_path, "w") as outfile:
        yaml.safe_dump(conf, outfile)

    # Define Loss function.
    loss_func = singlesrc_neg_sisdr
    system = DeMaskSystem(
        model=model,
        loss_func=loss_func,
        optimizer=optimizer,
        train_loader=train_loader,
        val_loader=val_loader,
        scheduler=scheduler,
        config=conf,
    )

    # Define callbacks
    checkpoint_dir = os.path.join(exp_dir, "checkpoints/")
    checkpoint = ModelCheckpoint(
        checkpoint_dir,
        monitor="val_loss",
        mode="min",
        save_top_k=conf["training"]["save_top_k"],
        verbose=True,
    )
    early_stopping = False
    if conf["training"]["early_stop"]:
        early_stopping = EarlyStopping(monitor="val_loss",
                                       patience=conf["training"]["patience"],
                                       verbose=True)

    # Don't ask GPU if they are not available.
    gpus = -1 if torch.cuda.is_available() else None
    trainer = pl.Trainer(
        max_epochs=conf["training"]["epochs"],
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stopping,
        default_root_dir=exp_dir,
        gpus=gpus,
        distributed_backend="ddp",
        gradient_clip_val=conf["training"]["gradient_clipping"],
        train_percent_check=0.1,
    )
    trainer.fit(system)

    best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
    with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f:
        json.dump(best_k, f, indent=0)

    state_dict = torch.load(checkpoint.best_model_path)
    system.load_state_dict(state_dict=state_dict["state_dict"])
    system.cpu()

    to_save = system.model.serialize()
    to_save.update(train_set.get_infos())
    torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
    save_publishable(
        os.path.join(exp_dir, "publish_dir"),
        to_save,
        metrics=dict(),
        train_conf=conf,
        recipe="asteroid/demask",
    )
示例#6
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], conf["ckpt_path"])

    # all resulting files would be saved in eval_save_dir
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    os.makedirs(eval_save_dir, exist_ok=True)

    if not os.path.exists(os.path.join(eval_save_dir, "final_metrics.json")):
        if conf["ckpt_path"] == "best_model.pth":
            # serialized checkpoint
            model = getattr(asteroid,
                            conf["model"]).from_pretrained(model_path)
        else:
            # non-serialized checkpoint, _ckpt_epoch_{i}.ckpt, keys would start with
            # "model.", which need to be removed
            model = getattr(asteroid,
                            conf["model"])(**conf["train_conf"]["filterbank"],
                                           **conf["train_conf"]["masknet"])
            all_states = torch.load(model_path, map_location="cpu")
            state_dict = {
                k.split('.', 1)[1]: all_states["state_dict"][k]
                for k in all_states["state_dict"]
            }
            model.load_state_dict(state_dict)
            # model.load_state_dict(all_states["state_dict"], strict=False)

        # Handle device placement
        if conf["use_gpu"]:
            model.cuda()
        model_device = next(model.parameters()).device
        test_set = make_test_dataset(
            corpus=conf["corpus"],
            test_dir=conf["test_dir"],
            task=conf["task"],
            sample_rate=conf["sample_rate"],
            n_src=conf["train_conf"]["data"]["n_src"],
        )
        # Used to reorder sources only
        loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

        # Randomly choose the indexes of sentences to save.
        ex_save_dir = os.path.join(eval_save_dir, "examples/")
        if conf["n_save_ex"] == -1:
            conf["n_save_ex"] = len(test_set)
        save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])

        series_list = []
        torch.no_grad().__enter__()
        for idx in tqdm(range(len(test_set))):
            # Forward the network on the mixture.
            mix, sources = tensors_to_device(test_set[idx],
                                             device=model_device)
            est_sources = model(mix.unsqueeze(0))

            # When inferencing separation for multi-task training,
            # exclude the last channel. Does not effect single-task training
            # models (from_scratch, pre+FT).
            est_sources = est_sources[:, :sources.shape[0]]

            loss, reordered_sources = loss_func(est_sources,
                                                sources[None],
                                                return_est=True)
            mix_np = mix.cpu().data.numpy()
            sources_np = sources.cpu().data.numpy()
            est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
            # For each utterance, we get a dictionary with the mixture path,
            # the input and output metrics
            utt_metrics = get_metrics(
                mix_np,
                sources_np,
                est_sources_np,
                sample_rate=conf["sample_rate"],
                metrics_list=compute_metrics,
            )
            if hasattr(test_set, "mixture_path"):
                utt_metrics["mix_path"] = test_set.mixture_path
            series_list.append(pd.Series(utt_metrics))

            # Save some examples in a folder. Wav files and metrics as text.
            if idx in save_idx:
                local_save_dir = os.path.join(ex_save_dir,
                                              "ex_{}/".format(idx))
                os.makedirs(local_save_dir, exist_ok=True)
                sf.write(local_save_dir + "mixture.wav", mix_np,
                         conf["sample_rate"])
                # Loop over the sources and estimates
                for src_idx, src in enumerate(sources_np):
                    sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                             conf["sample_rate"])
                for src_idx, est_src in enumerate(est_sources_np):
                    est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                    sf.write(
                        local_save_dir + "s{}_estimate.wav".format(src_idx),
                        est_src,
                        conf["sample_rate"],
                    )
                # Write local metrics to the example folder.
                with open(local_save_dir + "metrics.json", "w") as f:
                    json.dump(utt_metrics, f, indent=0)

        # Save all metrics to the experiment folder.
        all_metrics_df = pd.DataFrame(series_list)
        all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

        # Print and save summary metrics
        final_results = {}
        for metric_name in compute_metrics:
            input_metric_name = "input_" + metric_name
            ldf = all_metrics_df[metric_name] - all_metrics_df[
                input_metric_name]
            final_results[metric_name] = all_metrics_df[metric_name].mean()
            final_results[metric_name + "_imp"] = ldf.mean()
        print("Overall metrics :")
        pprint(final_results)
        with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
            json.dump(final_results, f, indent=0)
    else:
        with open(os.path.join(eval_save_dir, "final_metrics.json"), "r") as f:
            final_results = json.load(f)

    if conf["publishable"]:
        assert conf["ckpt_path"] == "best_model.pth"
        model_dict = torch.load(model_path, map_location="cpu")
        os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"),
                    exist_ok=True)
        publishable = save_publishable(
            os.path.join(conf["exp_dir"], "publish_dir"),
            model_dict,
            metrics=final_results,
            train_conf=train_conf,
        )
示例#7
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = ConvTasNet.from_pretrained(model_path)
    model = LambdaOverlapAdd(
        nnet=model,  # function to apply to each segment.
        n_src=2,  # number of sources in the output of nnet
        window_size=64000,  # Size of segmenting window
        hop_size=None,  # segmentation hop size
        window="hanning",  # Type of the window (see scipy.signal.get_window
        reorder_chunks=False,  # Whether to reorder each consecutive segment.
        enable_grad=
        False,  # Set gradient calculation on of off (see torch.set_grad_enabled)
    )

    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()

    model_device = next(model.parameters()).device

    # Evaluation is mode using 'remix' mixture
    dataset_kwargs = {
        "root_path": Path(conf["train_conf"]["data"]["root_path"]),
        "task": conf["train_conf"]["data"]["task"],
        "sample_rate": conf["train_conf"]["data"]["sample_rate"],
        "num_workers": conf["train_conf"]["training"]["num_workers"],
        "mixture": "remix",
    }

    test_set = DAMPVSEPDataset(split="test", **dataset_kwargs)

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = test_set[idx]
        mix = mix.to(model_device)
        est_sources = model.forward(mix.unsqueeze(0).unsqueeze(1))
        mix_np = mix.squeeze(0).cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = est_sources.squeeze(0).cpu().data.numpy()

        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
            average=False,
        )
        utt_metrics = split_metric_dict(utt_metrics)
        utt_metrics["mix_path"] = test_set.mixture_path
        series_list.append(pd.Series(utt_metrics))
        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np / max(abs(mix_np)),
                     conf["sample_rate"])

            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])

            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        for s in ["", "_s0", "_s1"]:
            input_metric_name = "input_" + f"{metric_name}{s}"
            ldf = all_metrics_df[f"{metric_name}{s}"] - all_metrics_df[
                input_metric_name]
            final_results[f"{metric_name}{s}"] = all_metrics_df[
                f"{metric_name}{s}"].mean()
            final_results[f"{metric_name}{s}" + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
示例#8
0
文件: eval.py 项目: ChokJohn/SpeechX
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = TransMask.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    if conf['file_path'] == '':
        test_set = LibriMix(
            csv_dir=conf["test_dir"],
            task=conf["task"],
            sample_rate=conf["sample_rate"],
            n_src=conf["train_conf"]["masknet"]["n_src"],
            segment=None,
        )  # Uses all segment length
        # Used to reorder sources only
        loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1 and conf['file_path'] == '':
        conf["n_save_ex"] = len(test_set)
        save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    else:
        save_idx = 0
    series_list = []
    torch.no_grad().__enter__()
    sdr = 0
    rtf = 0
    if conf['file_path'] != '':
        file_path = conf['file_path']
        if os.path.isdir(file_path):
            wavs = [
                os.path.join(file_path, wav) for wav in os.listdir(file_path)
                if '.wav' in wav
            ]
            for wav in wavs:
                inference_wav(wav, conf, model_device, model, ex_save_dir)
        else:
            inference_wav(file_path, conf, model_device, model, ex_save_dir)
        return

    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)

        mul = 8
        mix = mix.view(-1, 1).repeat(1, mul).view(-1)
        sources = sources.repeat(1, mul)

        #print('DEVICE')
        #print(model_device)
        ss = time()
        est_sources = model(mix.unsqueeze(0))
        dur = time() - ss
        ll = len(mix) / 8000
        rtf += (dur / ll)
        print(rtf / (idx + 1))
        #import pdb;pdb.set_trace()

        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )

        sdr += utt_metrics['sdr']
        print(sdr / (idx + 1))

        utt_metrics["mix_path"] = test_set.mixture_path
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    # publishable = save_publishable(
    save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
示例#9
0
def main(conf):
    compute_metrics = update_compute_metrics(conf["compute_wer"],
                                             COMPUTE_METRICS)
    anno_df = pd.read_csv(
        Path(conf["test_dir"]).parent.parent.parent / "test_annotations.csv")
    wer_tracker = (MockWERTracker() if not conf["compute_wer"] else WERTracker(
        ASR_MODEL_PATH, anno_df))
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = LibriMix(
        csv_dir=conf["test_dir"],
        task=conf["task"],
        sample_rate=conf["sample_rate"],
        n_src=conf["train_conf"]["data"]["n_src"],
        segment=None,
        return_id=True,
    )  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    eval_save_dir = os.path.join(conf["exp_dir"], conf["out_dir"])
    ex_save_dir = os.path.join(eval_save_dir, "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources, ids = test_set[idx]
        mix, sources = tensors_to_device([mix, sources], device=model_device)
        est_sources = model(mix.unsqueeze(0))
        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix.cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        # For each utterance, we get a dictionary with the mixture path,
        # the input and output metrics
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=COMPUTE_METRICS,
        )
        utt_metrics["mix_path"] = test_set.mixture_path
        est_sources_np_normalized = normalize_estimates(est_sources_np, mix_np)
        utt_metrics.update(**wer_tracker(
            mix=mix_np,
            clean=sources_np,
            estimate=est_sources_np_normalized,
            wav_id=ids,
            sample_rate=conf["sample_rate"],
        ))
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np,
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np_normalized):
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(eval_save_dir, "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()

    print("Overall metrics :")
    pprint(final_results)
    if conf["compute_wer"]:
        print("\nWER report")
        wer_card = wer_tracker.final_report_as_markdown()
        print(wer_card)
        # Save the report
        with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f:
            f.write(wer_card)

    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )
示例#10
0
文件: eval.py 项目: ChokJohn/SpeechX
        print("\nWER report")
        wer_card = wer_tracker.final_report_as_markdown()
        print(wer_card)
        # Save the report
        with open(os.path.join(eval_save_dir, "final_wer.md"), "w") as f:
            f.write(wer_card)

    with open(os.path.join(eval_save_dir, "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
>>>>>>> 210b5e4eb8ce24fe25780e008c89a4bb71bbd0ea
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )


if __name__ == "__main__":
    args = parser.parse_args()
    arg_dic = dict(vars(args))
<<<<<<< HEAD

=======
>>>>>>> 210b5e4eb8ce24fe25780e008c89a4bb71bbd0ea
    # Load training config
    conf_path = os.path.join(args.exp_dir, "conf.yml")
    with open(conf_path) as f:
        train_conf = yaml.safe_load(f)
示例#11
0
def main(conf):
    model_path = os.path.join(conf["exp_dir"], "best_model.pth")
    model = DPRNNTasNet.from_pretrained(model_path)
    # Handle device placement
    if conf["use_gpu"]:
        model.cuda()
    model_device = next(model.parameters()).device
    test_set = WhamDataset(
        conf["test_dir"],
        conf["task"],
        sample_rate=conf["sample_rate"],
        nondefault_nsrc=None,
        segment=None,
    )  # Uses all segment length
    # Used to reorder sources only
    loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")

    # Randomly choose the indexes of sentences to save.
    ex_save_dir = os.path.join(conf["exp_dir"], "examples/")
    if conf["n_save_ex"] == -1:
        conf["n_save_ex"] = len(test_set)
    save_idx = random.sample(range(len(test_set)), conf["n_save_ex"])
    series_list = []
    torch.no_grad().__enter__()
    for idx in tqdm(range(len(test_set))):
        # Forward the network on the mixture.
        mix, sources = tensors_to_device(test_set[idx], device=model_device)
        est_sources = model(mix[None, None])
        _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)),
                              descending=True)
        indxs = indxs[:, :2]
        # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean)
        # so we sort the estimated signals and take only the two with highest energy.
        est_sources = est_sources.gather(
            1,
            indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1]))
        loss, reordered_sources = loss_func(est_sources,
                                            sources[None],
                                            return_est=True)
        mix_np = mix[None].cpu().data.numpy()
        sources_np = sources.cpu().data.numpy()
        est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
        utt_metrics = get_metrics(
            mix_np,
            sources_np,
            est_sources_np,
            sample_rate=conf["sample_rate"],
            metrics_list=compute_metrics,
        )
        utt_metrics["mix_path"] = test_set.mix[idx][0]
        series_list.append(pd.Series(utt_metrics))

        # Save some examples in a folder. Wav files and metrics as text.
        if idx in save_idx:
            local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
            os.makedirs(local_save_dir, exist_ok=True)
            sf.write(local_save_dir + "mixture.wav", mix_np[0],
                     conf["sample_rate"])
            # Loop over the sources and estimates
            for src_idx, src in enumerate(sources_np):
                sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src,
                         conf["sample_rate"])
            for src_idx, est_src in enumerate(est_sources_np):
                est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src))
                sf.write(
                    local_save_dir + "s{}_estimate.wav".format(src_idx + 1),
                    est_src,
                    conf["sample_rate"],
                )
            # Write local metrics to the example folder.
            with open(local_save_dir + "metrics.json", "w") as f:
                json.dump(utt_metrics, f, indent=0)

    # Save all metrics to the experiment folder.
    all_metrics_df = pd.DataFrame(series_list)
    all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv"))

    # Print and save summary metrics
    final_results = {}
    for metric_name in compute_metrics:
        input_metric_name = "input_" + metric_name
        ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
        final_results[metric_name] = all_metrics_df[metric_name].mean()
        final_results[metric_name + "_imp"] = ldf.mean()
    print("Overall metrics :")
    pprint(final_results)
    with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f:
        json.dump(final_results, f, indent=0)

    model_dict = torch.load(model_path, map_location="cpu")
    os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True)
    publishable = save_publishable(
        os.path.join(conf["exp_dir"], "publish_dir"),
        model_dict,
        metrics=final_results,
        train_conf=train_conf,
    )