示例#1
0
def run(args):
    num_bins, config_dict = parse_yaml(args.config)
    dataloader_conf = config_dict["dataloader"]
    spectrogram_conf = config_dict["spectrogram_reader"]
    # Load cmvn
    dict_mvn = dataloader_conf["mvn_dict"]
    if dict_mvn:
        if not os.path.exists(dict_mvn):
            raise FileNotFoundError("Could not find mvn files")
        with open(dict_mvn, "rb") as f:
            dict_mvn = pickle.load(f)
    # default: True
    apply_log = dataloader_conf[
        "apply_log"] if "apply_log" in dataloader_conf else True

    dcnet = PITNet(num_bins, **config_dict["model"])

    frame_length = spectrogram_conf["frame_length"]
    frame_shift = spectrogram_conf["frame_shift"]
    window = spectrogram_conf["window"]

    separator = Separator(dcnet, args.state_dict, cuda=args.cuda)

    utt_dict = parse_scps(args.wave_scp)
    num_utts = 0
    for key, utt in utt_dict.items():
        try:
            samps, stft_mat = stft(utt,
                                   frame_length=frame_length,
                                   frame_shift=frame_shift,
                                   window=window,
                                   center=True,
                                   return_samps=True)
        except FileNotFoundError:
            print("Skip utterance {}... not found".format(key))
            continue
        print("Processing utterance {}".format(key))
        num_utts += 1
        norm = np.linalg.norm(samps, np.inf)
        spk_mask, spk_spectrogram = separator.seperate(stft_mat,
                                                       cmvn=dict_mvn,
                                                       apply_log=apply_log)

        for index, stft_mat in enumerate(spk_spectrogram):
            istft(os.path.join(args.dump_dir,
                               '{}.spk{}.wav'.format(key, index + 1)),
                  stft_mat,
                  frame_length=frame_length,
                  frame_shift=frame_shift,
                  window=window,
                  center=True,
                  norm=norm,
                  fs=8000,
                  nsamps=samps.size)
            if args.dump_mask:
                sio.savemat(
                    os.path.join(args.dump_dir,
                                 '{}.spk{}.mat'.format(key, index + 1)),
                    {"mask": spk_mask[index]})
    print("Processed {} utterance!".format(num_utts))
示例#2
0
 def __init__(self, wave_scp, **kwargs):
     if not os.path.exists(wave_scp):
         raise FileNotFoundError("Could not find file {}".format(wave_scp))
     self.stft_kwargs = kwargs
     self.wave_dict = parse_scps(wave_scp)
     self.wave_keys = [key for key in self.wave_dict.keys()]
     logger.info(
         "Create SpectrogramReader for {} with {} utterances".format(
             wave_scp, len(self.wave_dict)))
示例#3
0
 def __init__(self, wave_scp, **kwargs):
     if not os.path.exists(wave_scp):
         raise FileNotFoundError("Could not find file {}".format(wave_scp))
     self.stft_kwargs = kwargs
     self.wave_dict = parse_scps(wave_scp)
     self.wave_keys = [key for key in self.wave_dict.keys()]
     logger.info(
         "Create SpectrogramReader for {} with {} utterances".format(
             wave_scp, len(self.wave_dict)))
示例#4
0
def run(args):
    num_bins, config_dict = parse_yaml(args.config)
    # Load cmvn
    dict_mvn = config_dict["dataloader"]["mvn_dict"]
    if dict_mvn:
        if not os.path.exists(dict_mvn):
            raise FileNotFoundError("Could not find mvn files")
        with open(dict_mvn, "rb") as f:
            dict_mvn = pickle.load(f)

    dcnet = DCNet(num_bins, **config_dict["dcnet"])

    frame_length = config_dict["spectrogram_reader"]["frame_length"]
    frame_shift = config_dict["spectrogram_reader"]["frame_shift"]
    window = config_dict["spectrogram_reader"]["window"]

    cluster = DeepCluster(
        dcnet,
        args.dcnet_state,
        args.num_spks,
        pca=args.dump_pca,
        cuda=args.cuda)

    utt_dict = parse_scps(args.wave_scp)
    num_utts = 0
    for key, utt in utt_dict.items():
        try:
            samps, stft_mat = stft(
                utt,
                frame_length=frame_length,
                frame_shift=frame_shift,
                window=window,
                center=True,
                return_samps=True)
        except FileNotFoundError:
            print("Skip utterance {}... not found".format(key))
            continue
        print("Processing utterance {}".format(key))
        num_utts += 1
        norm = np.linalg.norm(samps, np.inf)
        pca_mat, spk_mask, spk_spectrogram = cluster.seperate(
            stft_mat, cmvn=dict_mvn)

        for index, stft_mat in enumerate(spk_spectrogram):
            istft(
                os.path.join(args.dump_dir, '{}.spk{}.wav'.format(
                    key, index + 1)),
                stft_mat,
                frame_length=frame_length,
                frame_shift=frame_shift,
                window=window,
                center=True,
                norm=norm,
                fs=8000,
                nsamps=samps.size)
            if args.dump_mask:
                sio.savemat(
                    os.path.join(args.dump_dir, '{}.spk{}.mat'.format(
                        key, index + 1)), {"mask": spk_mask[index]})
        if args.dump_pca:
            sio.savemat(
                os.path.join(args.dump_dir, '{}.mat'.format(key)),
                {"pca_matrix": pca_mat})
    print("Processed {} utterance!".format(num_utts))
示例#5
0
 def __init__(self, scp_path, addr_processor=lambda x: x):
     if not os.path.exists(scp_path):
         raise FileNotFoundError("Could not find file {}".format(scp_path))
     self.index_dict = parse_scps(scp_path, addr_processor=addr_processor)
     self.index_keys = [key for key in self.index_dict.keys()]
示例#6
0
def run(args):
    num_bins, config_dict = parse_yaml(args.config)
    # Load cmvn
    dict_mvn = config_dict["dataloader"]["mvn_dict"]
    if dict_mvn:
        if not os.path.exists(dict_mvn):
            raise FileNotFoundError("Could not find mvn files")
        with open(dict_mvn, "rb") as f:
            dict_mvn = pickle.load(f)

    dcnet = DCNet(num_bins, **config_dict["dcnet"])

    frame_length = config_dict["spectrogram_reader"]["frame_length"]
    frame_shift = config_dict["spectrogram_reader"]["frame_shift"]
    window = config_dict["spectrogram_reader"]["window"]

    cluster = DeepCluster(dcnet,
                          args.dcnet_state,
                          args.num_spks,
                          pca=args.dump_pca,
                          cuda=args.cuda)

    utt_dict = parse_scps(args.wave_scp)
    num_utts = 0
    for key, utt in utt_dict.items():
        try:
            samps, stft_mat = stft(utt,
                                   frame_length=frame_length,
                                   frame_shift=frame_shift,
                                   window=window,
                                   center=True,
                                   return_samps=True)
        except FileNotFoundError:
            print("Skip utterance {}... not found".format(key))
            continue
        print("Processing utterance {}".format(key))
        num_utts += 1
        norm = np.linalg.norm(samps, np.inf)
        pca_mat, spk_mask, spk_spectrogram = cluster.seperate(stft_mat,
                                                              cmvn=dict_mvn)

        for index, stft_mat in enumerate(spk_spectrogram):
            istft(os.path.join(args.dump_dir,
                               '{}.spk{}.wav'.format(key, index + 1)),
                  stft_mat,
                  frame_length=frame_length,
                  frame_shift=frame_shift,
                  window=window,
                  center=True,
                  norm=norm,
                  fs=8000,
                  nsamps=samps.size)
            if args.dump_mask:
                sio.savemat(
                    os.path.join(args.dump_dir,
                                 '{}.spk{}.mat'.format(key, index + 1)),
                    {"mask": spk_mask[index]})
        if args.dump_pca:
            sio.savemat(os.path.join(args.dump_dir, '{}.mat'.format(key)),
                        {"pca_matrix": pca_mat})
    print("Processed {} utterance!".format(num_utts))