Exemplo n.º 1
0
class FeatureExtractorWorker:
    '''
    音声→特徴量
    '''
    def __init__(self, recorded_queue: Queue, feature_queue: Queue,
                 feature_extractor_config: configs.FeatureExtractorConfig):
        self._recorded_queue = recorded_queue
        self._feature_queue = feature_queue
        self._feat = FeatureExtractor(analyzer='world',
                                      fs=feature_extractor_config.fs,
                                      fftl=feature_extractor_config.fftl,
                                      shiftms=feature_extractor_config.shiftms,
                                      minf0=feature_extractor_config.minf0,
                                      maxf0=feature_extractor_config.maxf0)
        self._feature_extractor_config = feature_extractor_config

    def start(self):
        while True:
            recorded_wav = self._recorded_queue.get()  # 同期処理
            recorded_wav = recorded_wav.astype(numpy.float)
            f0, spc, ap = self._feat.analyze(recorded_wav)
            mcep = self._feat.mcep(
                dim=self._feature_extractor_config.mcep_dim,
                alpha=self._feature_extractor_config.mcep_alpha)
            feature = create_feature_extractor_result(f0, spc, ap, mcep)
            self._feature_queue.put(feature)
Exemplo n.º 2
0
 def __init__(self, recorded_queue: Queue, feature_queue: Queue,
              feature_extractor_config: configs.FeatureExtractorConfig):
     self._recorded_queue = recorded_queue
     self._feature_queue = feature_queue
     self._feat = FeatureExtractor(analyzer='world',
                                   fs=feature_extractor_config.fs,
                                   fftl=feature_extractor_config.fftl,
                                   shiftms=feature_extractor_config.shiftms,
                                   minf0=feature_extractor_config.minf0,
                                   maxf0=feature_extractor_config.maxf0)
     self._feature_extractor_config = feature_extractor_config
Exemplo n.º 3
0
def world_feature_extract(wav_list, args):
    """EXTRACT WORLD FEATURE VECTOR"""
    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         minf0=args.minf0,
                                         maxf0=args.maxf0,
                                         fftl=args.fftl)

    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))
        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        x = np.array(x, dtype=np.float32)
        if args.highpass_cutoff != 0:
            x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract features
        f0, _, _ = feature_extractor.analyze(x)
        uv, cont_f0 = convert_continuos_f0(f0)
        cont_f0_lpf = low_pass_filter(cont_f0,
                                      int(1.0 / (args.shiftms * 0.001)),
                                      cutoff=20)
        codeap = feature_extractor.codeap()
        mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha)

        # concatenate
        cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
        uv = np.expand_dims(uv, axis=-1)
        feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

        # save to hdf5
        hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
            ".wav", ".h5")
        write_hdf5(hdf5name, "/feat_org", feats)
        if args.save_extended:
            # extend time resolution
            upsampling_factor = int(args.shiftms * fs * 0.001)
            feats_extended = extend_time(feats, upsampling_factor)
            feats_extended = feats_extended.astype(np.float32)
            write_hdf5(hdf5name, "/feat", feats_extended)

        # overwrite wav file
        if args.highpass_cutoff != 0:
            wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs,
                          np.int16(x))
Exemplo n.º 4
0
def noise_shaping(wav_list, args):
    """APPLY NOISE SHAPING"""
    # define feature extractor
    feature_extractor = FeatureExtractor(
        analyzer="world",
        fs=args.fs,
        shiftms=args.shiftms,
        fftl=args.fftl)

    # define synthesizer
    synthesizer = Synthesizer(
        fs=args.fs,
        shiftms=args.shiftms,
        fftl=args.fftl)

    for i, feat_id in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" % (feat_id, i + 1, len(wav_list)))
        # load wavfile and apply low cut filter
        wav_filename = args.outdir.replace("feat_id", feat_id)
        fs, x = wavfile.read(wav_filename)
        wav_type = x.dtype
        x = np.array(x, dtype=np.float64)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        ## extract features (only for get the number of frames)
        f0, _, _ = feature_extractor.analyze(x)
        num_frames = f0.shape[0]

        # load average mcep
        mlsa_coef = read_hdf5(args.stats, "/%s/mean" % args.feature_type)
        mlsa_coef = mlsa_coef[args.mcep_dim_start:args.mcep_dim_end] * args.mag
        mlsa_coef[0] = 0.0
        if args.inv:
            mlsa_coef[1:] = -1.0 * mlsa_coef[1:]
        mlsa_coef = np.tile(mlsa_coef, [num_frames, 1])
        
        # synthesis and write
        x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha)
        x_ns = low_cut_filter(x_ns, args.fs, cutoff=70)
        write_name = args.writedir.replace("feat_id", feat_id)
        # check directory existence
        wav = np.clip(x_ns, -32768, 32767)
        if wav_type == np.int16:
            wavfile.write(write_name, args.fs, np.int16(wav))
        else:
            wavfile.write(write_name, args.fs, wav)
Exemplo n.º 5
0
def world_feature_extract(wav_list, idx, f0_dict, npow_dict):
    f0s = []
    npows = []
    for f in wav_list:
        # open waveform
        wavf = f.rstrip()
        fs, x = wavfile.read(wavf)
        x = np.array(x, dtype=np.float)
        logging.info("Extract: " + wavf)

        # constract FeatureExtractor class
        feat = FeatureExtractor(analyzer='world', fs=fs)

        # f0 and npow extraction
        f0, _, _ = feat.analyze(x)
        npow = feat.npow()

        f0s.append(f0)
        npows.append(npow)
    f0_dict[idx] = f0s
    npow_dict[idx] = npows
Exemplo n.º 6
0
def world_feature_extract(queue, wav_list, args):
    """EXTRACT WORLD FEATURE VECTOR
    Parameters
    ----------
    queue : multiprocessing.Queue()
        the queue to store the file name of utterance
    wav_list : list
        list of the wav files
    args : 
        feature extract arguments
    """
    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         minf0=args.minf0,
                                         maxf0=args.maxf0,
                                         fftl=args.fftl)
    # extraction
    for i, wav_name in enumerate(wav_list):
        # check exists
        if args.feature_dir == None:
            feat_name = wav_name.replace("wav", args.feature_format)
        else:
            feat_name = rootdir_replace(wav_name,
                                        extname=args.feature_format,
                                        newdir=args.feature_dir)
        #if not os.path.exists(os.path.dirname(feat_name)):
        #    os.makedirs(os.path.dirname(feat_name))
        if check_hdf5(feat_name, "/world"):
            if args.overwrite:
                logging.info("overwrite %s (%d/%d)" %
                             (wav_name, i + 1, len(wav_list)))
            else:
                logging.info("skip %s (%d/%d)" %
                             (wav_name, i + 1, len(wav_list)))
                continue
        else:
            logging.info("now processing %s (%d/%d)" %
                         (wav_name, i + 1, len(wav_list)))
        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        x = np.array(x, dtype=np.float32)
        if args.highpass_cutoff != 0:
            x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract features
        f0, spc, ap = feature_extractor.analyze(x)
        codeap = feature_extractor.codeap()
        mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha)
        npow = feature_extractor.npow()
        uv, cont_f0 = convert_continuos_f0(f0)
        lpf_fs = int(1.0 / (args.shiftms * 0.001))
        cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20)
        next_cutoff = 70
        while not (cont_f0_lpf > [0]).all():
            logging.info("%s low-pass-filtered [%dHz]" %
                         (feat_name, next_cutoff))
            cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff)
            next_cutoff *= 2

        # concatenate
        cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
        uv = np.expand_dims(uv, axis=-1)
        feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

        # save feature
        write_hdf5(feat_name, "/world", feats)
        if args.save_f0:
            write_hdf5(feat_name, "/f0", f0)
        if args.save_ap:
            write_hdf5(feat_name, "/ap", ap)
        if args.save_spc:
            write_hdf5(feat_name, "/spc", spc)
        if args.save_npow:
            write_hdf5(feat_name, "/npow", npow)
        if args.save_extended:
            # extend time resolution
            upsampling_factor = int(args.shiftms * fs * 0.001)
            feats_extended = extend_time(feats, upsampling_factor)
            feats_extended = feats_extended.astype(np.float32)
            write_hdf5(feat_name, "/world_extend", feats_extended)
        if args.save_vad:
            _, vad_idx = extfrm(mcep, npow, power_threshold=args.pow_th)
            write_hdf5(feat_name, "/vad_idx", vad_idx)
    queue.put('Finish')
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description="making feature file argsurations.")

    parser.add_argument("--waveforms",
                        default=None,
                        help="directory or list of filename of input wavfile")
    parser.add_argument("--stats",
                        default=None,
                        help="filename of hdf5 format")
    parser.add_argument("--writedir",
                        default=None,
                        help="directory to save preprocessed wav file")
    parser.add_argument("--fs",
                        default=FS,
                        type=int,
                        help="Sampling frequency")
    parser.add_argument("--shiftms",
                        default=SHIFTMS,
                        type=int,
                        help="Frame shift in msec")
    parser.add_argument("--fftl", default=FFTL, type=int, help="FFT length")
    parser.add_argument("--mcep_dim_start",
                        default=MCEP_DIM_START,
                        type=int,
                        help="Start index of mel cepstrum")
    parser.add_argument("--mcep_dim_end",
                        default=MCEP_DIM_END,
                        type=int,
                        help="End index of mel cepstrum")
    parser.add_argument("--mcep_alpha",
                        default=MCEP_ALPHA,
                        type=float,
                        help="Alpha of mel cepstrum")
    parser.add_argument("--mag",
                        default=MAG,
                        type=float,
                        help="magnification of noise shaping")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")
    parser.add_argument('--n_jobs',
                        default=1,
                        type=int,
                        help="number of parallel jobs")
    parser.add_argument('--inv',
                        default=False,
                        type=strtobool,
                        help="if True, inverse filtering will be performed")
    args = parser.parse_args()

    # read list
    if os.path.isdir(args.waveforms):
        file_list = sorted(find_files(args.waveforms, "*.wav"))
    else:
        file_list = read_txt(args.waveforms)

    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         fftl=args.fftl)

    # define synthesizer
    synthesizer = Synthesizer(fs=args.fs, shiftms=args.shiftms, fftl=args.fftl)

    # check directory existence
    if not os.path.exists(args.writedir):
        os.makedirs(args.writedir)

    def noise_shaping(wav_list):
        for wav_name in wav_list:
            # load wavfile and apply low cut filter
            fs, x = wavfile.read(wav_name)
            wav_type = x.dtype
            x = np.array(x, dtype=np.float64)

            # check sampling frequency
            if not fs == args.fs:
                print("ERROR: sampling frequency is not matched.")
                sys.exit(1)

            # extract features (only for get the number of frames)
            f0, _, _ = feature_extractor.analyze(x)
            num_frames = f0.shape[0]

            # load average mcep
            mlsa_coef = read_hdf5(args.stats, "/mean")
            mlsa_coef = mlsa_coef[args.mcep_dim_start:args.
                                  mcep_dim_end] * args.mag
            mlsa_coef[0] = 0.0
            if args.inv:
                mlsa_coef[1:] = -1.0 * mlsa_coef[1:]
            mlsa_coef = np.tile(mlsa_coef, [num_frames, 1])

            # synthesis and write
            x_ns = synthesizer.synthesis_diff(x,
                                              mlsa_coef,
                                              alpha=args.mcep_alpha)
            x_ns = low_cut_filter(x_ns, args.fs, cutoff=70)
            if wav_type == np.int16:
                write_name = args.writedir + "/" + os.path.basename(wav_name)
                wavfile.write(write_name, args.fs, np.int16(x_ns))
            else:
                wavfile.write(write_name, args.fs, x_ns)

    # divie list
    file_lists = np.array_split(file_list, args.n_jobs)
    file_lists = [f_list.tolist() for f_list in file_lists]

    # multi processing
    processes = []
    for f in file_lists:
        p = mp.Process(target=noise_shaping, args=(f, ))
        p.start()
        processes.append(p)

    # wait for all process
    for p in processes:
        p.join()
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description="making feature file argsurations.")

    parser.add_argument("--waveforms",
                        default=None,
                        help="directory or list of filename of input wavfile")
    parser.add_argument("--hdf5dir",
                        default=None,
                        help="directory to save hdf5")
    parser.add_argument("--wavdir",
                        default=None,
                        help="directory to save of preprocessed wav file")
    parser.add_argument("--fs",
                        default=FS,
                        type=int,
                        help="Sampling frequency")
    parser.add_argument("--shiftms",
                        default=SHIFTMS,
                        type=int,
                        help="Frame shift in msec")
    parser.add_argument("--minf0", default=MINF0, type=int, help="minimum f0")
    parser.add_argument("--maxf0", default=MAXF0, type=int, help="maximum f0")
    parser.add_argument("--mcep_dim",
                        default=MCEP_DIM,
                        type=int,
                        help="Dimension of mel cepstrum")
    parser.add_argument("--mcep_alpha",
                        default=MCEP_ALPHA,
                        type=float,
                        help="Alpha of mel cepstrum")
    parser.add_argument("--fftl", default=FFTL, type=int, help="FFT length")
    parser.add_argument("--highpass_cutoff",
                        default=HIGHPASS_CUTOFF,
                        type=int,
                        help="Cut off frequency in lowpass filter")
    parser.add_argument("--n_jobs",
                        default=10,
                        type=int,
                        help="number of parallel jobs")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")

    args = parser.parse_args()

    # read list
    if os.path.isdir(args.waveforms):
        file_list = sorted(find_files(args.waveforms, "*.wav"))
    else:
        file_list = read_txt(args.waveforms)

    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         minf0=args.minf0,
                                         maxf0=args.maxf0,
                                         fftl=args.fftl)

    # check directory existence
    if not os.path.exists(args.wavdir):
        os.makedirs(args.wavdir)
    if not os.path.exists(args.hdf5dir):
        os.makedirs(args.hdf5dir)

    def feature_extract(wav_list):
        for wav_name in wav_list:
            # load wavfile and apply low cut filter
            fs, x = wavfile.read(wav_name)
            x = np.array(x, dtype=np.float32)
            if args.highpass_cutoff != 0:
                x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff)

            # check sampling frequency
            if not fs == args.fs:
                print("ERROR: sampling frequency is not matched.")
                sys.exit(1)

            # extract features
            f0, spc, ap = feature_extractor.analyze(x)
            uv, cont_f0 = convert_continuos_f0(f0)
            cont_f0_lpf = low_pass_filter(cont_f0,
                                          int(1.0 / (args.shiftms * 0.001)),
                                          cutoff=20)
            codeap = feature_extractor.codeap()
            mcep = feature_extractor.mcep(dim=args.mcep_dim,
                                          alpha=args.mcep_alpha)

            # concatenate
            cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
            uv = np.expand_dims(uv, axis=-1)
            feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

            # extend time resolution
            upsampling_factor = int(args.shiftms * fs * 0.001)
            feats_extended = extend_time(feats, upsampling_factor)

            # save to hdf5
            feats_extended = feats_extended.astype(np.float32)
            hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace(
                ".wav", ".h5")
            write_hdf5(hdf5name, "/feat_org", feats)
            write_hdf5(hdf5name, "/feat", feats_extended)

            # overwrite wav file
            if args.highpass_cutoff != 0:
                wavfile.write(args.wavdir + "/" + os.path.basename(wav_name),
                              fs, np.int16(x))

    # divie list
    file_lists = np.array_split(file_list, args.n_jobs)
    file_lists = [f_list.tolist() for f_list in file_lists]

    # multi processing
    processes = []
    for f in file_lists:
        p = mp.Process(target=feature_extract, args=(f, ))
        p.start()
        processes.append(p)

    # wait for all process
    for p in processes:
        p.join()