Пример #1
0
def test_feature():
    conf = load_yaml(datadir / "mlfb_vqvae.yml")
    spkr_conf = load_yaml(datadir / "spkr.yml")
    feat = Feature(datadir, conf["feature"], spkr_conf["SF1"], gl_flag=True)
    feat.analyze(datadir / "SF1_10001.wav")
    (datadir / "SF1_10001.h5").unlink()
    (datadir / "SF1_10001_anasyn.wav").unlink()
Пример #2
0
def test_feature():
    conf = load_yaml(ymlf)
    spkr_conf = load_yaml(spkrymlf)
    feat = Feature(datadir, conf["feature"], spkr_conf["SF1"])
    feat.analyze(
        datadir / "SF1_10001.wav",
        synth_flag=True,
    )
    (datadir / "SF1_10001.h5").unlink()
    (datadir / "SF1_10001_anasyn.wav").unlink()
Пример #3
0
def main():
    dcp = "Extract aoucstic features"
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--phase", type=str, default=None, help="phase")
    parser.add_argument("--n_decode_samples",
                        type=int,
                        default=3,
                        help="# decode samples")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--spkr_yml",
                        type=str,
                        help="yml file for speaker params")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    args = parser.parse_args()

    conf = load_yaml(args.conf)
    spkr_conf = load_yaml(args.spkr_yml)
    scp = open_scpdir(Path(args.scpdir) / args.phase)

    featdir = Path(args.featdir) / conf["feature"]["label"] / args.phase
    featsscp = featdir / "feats.scp"
    if featsscp.exists():
        featsscp.unlink()

    for spkr in scp["spkrs"]:
        logging.info("extract feature for {}".format(spkr))
        wavs = [scp["wav"][uid] for uid in scp["spk2utt"][spkr]]
        (featdir / spkr).mkdir(parents=True, exist_ok=True)
        feat = Feature(featdir / spkr, conf["feature"], spkr_conf[spkr])

        # create feats.scp
        with open(featsscp, "a") as fp:
            for uid in scp["spk2utt"][spkr]:
                wavf = scp["wav"][uid]
                h5f = str(featdir / spkr / (Path(wavf).stem + ".h5"))
                fp.write("{} {}\n".format(uid, h5f))

        # feature extraction with GliffinLim
        Parallel(n_jobs=args.n_jobs)([
            delayed(feat.analyze)(wavf, synth_flag=True)
            for wavf in wavs[:args.n_decode_samples]
        ])

        # feature extraction without GliffinLim
        Parallel(n_jobs=args.n_jobs)([
            delayed(feat.analyze)(wavf, synth_flag=False)
            for wavf in wavs[args.n_decode_samples:]
        ])
Пример #4
0
def test_feature_8k():
    conf = load_yaml(ymlf)
    conf["feature"].update({
        "fs": 8000,
        "fftl": 256,
        "fmin": 80,
        "fmax": 3800,
        "hop_size": 80,
        "mlfb_dim": 80,
    })
    spkr_conf = load_yaml(datadir / "spkr.yml")
    feat = Feature(datadir, conf["feature"], spkr_conf["SF1"])
    feat.analyze(datadir / "SF1_10001_8k.wav", synth_flag=False)
    (datadir / "SF1_10001_8k.h5").unlink()
Пример #5
0
def test_dataset(decoder_f0, use_mcep, use_raw):
    conf = load_yaml(ymlf)
    conf["decoder_f0"] = decoder_f0
    conf["receptive_size"] = 128
    if use_mcep:
        conf["input_feat_type"] = "mcep"
        conf["output_feat_type"] = "mcep"
        conf["ignore_scaler"] = ["mcep", "raw"]
    if use_raw:
        conf["use_raw"] = True
        conf["input_feat_type"] = "mlfb"
        conf["ignore_scaler"] = ["raw"]

    scp = {}
    scpdir = datadir / "scpdir"
    for phase in ["train", "dev", "eval"]:
        scp[phase] = open_scpdir(scpdir / phase)
        scp[phase]["feats"] = {"01": h5f, "02": h5f, "03": h5f}
    dataset = BaseDataset(conf, scp, phase="train", scaler=scaler)
    dataloader = DataLoader(dataset,
                            batch_size=12,
                            shuffle=True,
                            num_workers=1)

    for i, batch in enumerate(dataloader):
        for k, v in batch.items():
            if isinstance(v, torch.Tensor):
                pass
                # print(k, v.type(), v.size())
            else:
                pass
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Convert filter banks to waveform using Griffin-Lim algorithm",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("--conf",
                        type=str,
                        required=True,
                        help="Cofiguration file")
    parser.add_argument(
        "--rootdir",
        type=str,
        required=True,
        help="Root directory of filter bank h5 files",
    )
    parser.add_argument("--outdir",
                        type=str,
                        required=True,
                        help="Output directory")
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        stream=sys.stdout,
        format="%(asctime)s (%(module)s:%(lineno)d) "
        "%(levelname)s: %(message)s",
    )

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # find h5 files
    feats_files = sorted(list(Path(args.rootdir).glob("*.h5")))
    feats = {
        Path(args.outdir) / filename.stem + ".wav":
        read_feature(filename, "feats")
        for filename in feats_files
    }

    # Main Griffin-Lim algorithm
    Parallel(n_jobs=30)([
        delayed(mlfb2wavf)(
            feats[wavf],
            wavf,
            fs=conf["feature"]["fs"],
            n_mels=conf["feature"]["mlfb_dim"],
            fftl=conf["feature"]["fftl"],
            hop_size=conf["feature"]["hop_size"],
            plot=False,
        ) for wavf in list(feats.keys())
    ])
def main():
    dcp = "Extract feature statistics"
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--phase", type=str, default=None, help="phase")
    parser.add_argument("--conf", type=str, help="ymal file for network parameters")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    args = parser.parse_args()

    conf = load_yaml(args.conf)
    scp = open_scpdir(Path(args.scpdir) / args.phase)
    featdir = Path(args.featdir) / conf["feature"]["label"]
    featsscp = featdir / args.phase / "feats.scp"
    scp["feats"] = open_featsscp(featsscp)
    scaler = {}

    # speaker independent scaler extraction
    feats = ["mlfb", "lcf0"]
    # NOTE: need to be improved, require smart way
    if conf["feature"]["fs"] != 8000:
        feats.append("mcep")

    for win_type in conf["feature"]["window_types"]:
        if win_type != "hann":
            feats += [f"mlfb_{win_type}"]

    for ext in feats:
        s = Scaler()
        s.fit(list(scp["feats"].values()), ext=ext)
        logging.info("# of samples for {}: {}".format(ext, s.ss.n_samples_seen_))
        scaler[ext] = s.ss

    # speaker dependent statistics extraction
    for spkr in scp["spkrs"]:
        file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]]
        s = Scaler()
        s.fit(file_lists_sd, ext="lcf0")
        logging.info(
            "# of samples {} of {}: {} samples".format(
                "lcf0", spkr, s.ss.n_samples_seen_
            )
        )
        scaler[spkr] = {"lcf0": s.ss}

    pklf = featdir / "scaler.pkl"
    joblib.dump(scaler, str(pklf))
    logging.info("Save scaler to {}".format(pklf))
Пример #8
0
def main():
    dcp = "Extract feature statistics"
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--phase", type=str, default=None, help="phase")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()

    conf = load_yaml(args.conf)
    scp = open_scpdir(Path(args.scpdir) / args.phase)
    featsscp = Path(
        args.featdir) / conf["feature"]["label"] / args.phase / "feats.scp"
    scp["feats"] = open_featsscp(featsscp)
    expdir = Path(args.expdir)
    scaler = {}

    # speaker independent scaler extraction
    feats = ["mlfb", "mcep", "lcf0"]
    for ext in feats:
        s = Scaler()
        s.fit(list(scp["feats"].values()), ext=ext)
        logging.info("# of samples for {}: {}".format(ext,
                                                      s.ss.n_samples_seen_))
        scaler[ext] = s.ss

    # speaker dependent statistics extraction
    for spkr in scp["spkrs"]:
        file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]]
        s = Scaler()
        s.fit(file_lists_sd, ext="lcf0")
        logging.info("# of samples {} of {}: {} samples".format(
            "lcf0", spkr, s.ss.n_samples_seen_))
        scaler[spkr] = {"lcf0": s.ss}

    pklf = str(expdir / "{}_scaler.pkl".format(conf["feature"]["label"]))
    joblib.dump(scaler, pklf)
    logging.info("Save scaler to {}".format(pklf))
Пример #9
0
from pathlib import Path

import numpy as np
import soundfile as sf
import torch
from crank.net.module.mlfb import LogMelFilterBankLayer
from crank.net.module.sinc_conv import SincConvPreprocessingLayer
from crank.utils import load_yaml

B, T = 1, 65536
datadir = Path(__file__).parent / "data"
ymlf = datadir / "mlfb_vqvae_22050.yml"
spkrymlf = datadir / "spkr.yml"

conf = load_yaml(ymlf)
wavf = datadir / "SF1_10001.wav"


def test_sincconv():
    sinc_conv = SincConvPreprocessingLayer(
        in_channels=1,
        sincconv_channels=32,
        sincconv_kernel_size=65,
        out_channels=80,
        kernel_sizes=[4, 4, 4, 2],
    )
    x, fs = sf.read(str(wavf))
    x = np.array(x, dtype=np.float32)

    x = torch.from_numpy(x).unsqueeze(0).unsqueeze(-1)
Пример #10
0
def main():
    # options for python
    description = "Train VQ-VAE model"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("--flag",
                        help='flag ["train", "eval", "reconstruction"]')
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--checkpoint", type=str, default=None, help="Resume")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument("--featsscp",
                        type=str,
                        help="specify feats.scp not scpdir")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    assert str(device) == "cuda", "ERROR: Do not accept CPU training."

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # load scp
    scp = {}
    featdir = Path(args.featdir) / conf["feature"]["label"]
    for phase in ["train", "dev", "eval"]:
        scp[phase] = open_scpdir(Path(args.scpdir) / phase)
        scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp")
    if args.flag == "eval" and args.featsscp != "None":
        logging.info("Load feats.scp from {}".format(args.featsscp))
        scp[args.flag]["feats"] = open_featsscp(args.featsscp)

    expdir = Path(args.expdir) / Path(args.conf).stem
    expdir.mkdir(exist_ok=True, parents=True)
    spkr_size = len(scp["train"]["spkrs"])

    # load model
    model = get_model(conf, spkr_size, device)
    resume = 0
    if args.checkpoint != "None":
        model, resume = load_checkpoint(model, args.checkpoint)
    else:
        if args.flag in ["reconstruction", "eval"]:
            import re
            pkls = list(expdir.glob("*.pkl"))
            steps = [re.findall('[0-9]+', str(p.stem))[0] for p in pkls]
            max_step = max([int(s) for s in steps])
            checkpoint = str([p for p in pkls if str(max_step) in str(p)][0])
            model, resume = load_checkpoint(model, checkpoint)

    # load others
    scaler = joblib.load(
        Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"]))
    optimizer = get_optimizer(conf, model)
    criterion = get_criterion(conf)
    dataloader = get_dataloader(conf,
                                scp,
                                scaler,
                                n_jobs=args.n_jobs,
                                flag=args.flag)
    scheduler = get_scheduler(conf, optimizer)
    writer = {
        "train":
        SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name),
        "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name),
    }

    ka = {
        "model": model,
        "optimizer": optimizer,
        "criterion": criterion,
        "dataloader": dataloader,
        "writer": writer,
        "expdir": expdir,
        "conf": conf,
        "feat_conf": conf["feature"],
        "scheduler": scheduler,
        "device": device,
        "scaler": scaler,
        "resume": resume,
    }
    trainer = TrainerWrapper(conf["trainer_type"], **ka)
    trainer.run(flag=args.flag)
Пример #11
0
def main():

    parser = argparse.ArgumentParser(description="calculate MCD.")
    parser.add_argument("--conf",
                        type=str,
                        required=True,
                        help="Configuration file")
    parser.add_argument("--spkr_conf",
                        type=str,
                        required=True,
                        help="Speaker configuration file")
    parser.add_argument(
        "--featdir",
        type=str,
        required=True,
        help="Root directory of ground truth feature h5 files",
    )
    parser.add_argument("--outwavdir",
                        type=str,
                        required=True,
                        help="Converted waveform directory")
    parser.add_argument(
        "--out",
        "-O",
        type=str,
        help="The output filename. "
        "If omitted, then output to sys.stdout",
    )
    parser.add_argument("--n_jobs",
                        default=40,
                        type=int,
                        help="number of parallel jobs")
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        stream=sys.stdout,
        format="%(asctime)s (%(module)s:%(lineno)d) "
        "%(levelname)s: %(message)s",
    )

    # load configure files
    conf = load_yaml(args.conf)
    spkr_conf = load_yaml(args.spkr_conf)

    # load converted files. If mcep, use h5; else, waveform
    if conf["feat_type"] == "mcep":
        converted_files = sorted(list(Path(args.outwavdir).glob("*.h5")))
    else:
        converted_files = sorted(list(Path(args.outwavdir).glob("*.wav")))
    logging.info(f"number of utterances = {len(converted_files)}")

    # load ground truth scp
    featdir = Path(args.featdir) / conf["feature"]["label"]
    gt_feats = open_featsscp(featdir / "eval" / "feats.scp")

    if args.out is None:
        out = sys.stdout
    else:
        out = open(args.out, "w", encoding="utf-8")

    MCD_list = Parallel(args.n_jobs)([
        delayed(calculate)(cv_path, gt_feats, conf, spkr_conf)
        for cv_path in converted_files
    ])

    # summarize by pair
    pairwise_MCD = {}
    for k, v in MCD_list:
        orgspk, tarspk, _ = k.split("-")
        pair = orgspk + "-" + tarspk
        if pair not in pairwise_MCD:
            pairwise_MCD[pair] = []
        pairwise_MCD[pair].append(v)

    for k in sorted(pairwise_MCD.keys()):
        mcd_list = pairwise_MCD[k]
        mean_mcd = float(sum(mcd_list) / len(mcd_list))
        out.write(f"{k} {mean_mcd:.3f}\n")
Пример #12
0
def main():
    # options for python
    description = "Train VQ-VAE model"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        "--flag",
        type=str,
        default="train",
        help='Flag for ["train", "eval", "reconstruction"]',
    )
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--checkpoint",
                        type=str,
                        default=None,
                        help="Resume model for re-training")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument(
        "--featsscp",
        type=str,
        help="specify feats.scp instead of using scp directory")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    assert str(device) == "cuda", "ERROR: Do not accept CPU training."

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # load scp
    scp = {}
    featdir = Path(args.featdir) / conf["feature"]["label"]
    for phase in ["train", "dev", "eval"]:
        scp[phase] = open_scpdir(Path(args.scpdir) / phase)
        scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp")
    if args.flag == "eval" and args.featsscp != "None":
        logging.info("Load feats.scp from {}".format(args.featsscp))
        scp[args.flag]["feats"] = open_featsscp(args.featsscp)

    expdir = Path(args.expdir) / Path(args.conf).stem
    expdir.mkdir(exist_ok=True, parents=True)
    spkr_size = len(scp["train"]["spkrs"])

    # load model
    model = get_model(conf, spkr_size, device)
    resume = 0
    if args.checkpoint != "None":
        model, resume = load_checkpoint(model, args.checkpoint)
    else:
        if args.flag in ["reconstruction", "eval"]:
            checkpoint = list(expdir.glob("*.pkl"))[-1]
            model, resume = load_checkpoint(model, checkpoint)

    # load others
    scaler = joblib.load(
        Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"]))
    optimizer = get_optimizer(conf, model)
    criterion = get_criterion(conf)
    dataloader = get_dataloader(conf,
                                scp,
                                scaler,
                                n_jobs=args.n_jobs,
                                flag=args.flag)
    scheduler = get_scheduler(conf, optimizer)
    writer = {
        "train":
        SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name),
        "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name),
    }

    ka = {
        "model": model,
        "optimizer": optimizer,
        "criterion": criterion,
        "dataloader": dataloader,
        "writer": writer,
        "expdir": expdir,
        "conf": conf,
        "feat_conf": conf["feature"],
        "scheduler": scheduler,
        "device": device,
        "scaler": scaler,
        "resume": resume,
    }

    if conf["trainer_type"] == "vqvae":
        trainer = VQVAETrainer(**ka)
    elif conf["trainer_type"] == "lsgan":
        trainer = LSGANTrainer(**ka)
    elif conf["trainer_type"] == "cycle":
        trainer = CycleVQVAETrainer(**ka)
    elif conf["trainer_type"] == "cyclegan":
        trainer = CycleGANTrainer(**ka)
    else:
        raise NotImplementedError(
            "conf['trainer_type']: {} is not supported.".format(
                conf["trainer_type"]))
    trainer.run(flag=args.flag)