Exemplo n.º 1
0
def process_file(wav_dir,
                 vad_dir,
                 out_dir,
                 file_name,
                 model,
                 wav_suffix='.wav',
                 vad_suffix='.lab.gz'):
    """ Extract i-vector from wav file.

        :param wav_dir: directory with wav files
        :type wav_dir: str
        :param vad_dir: directory with vad files
        :type vad_dir: str
        :param out_dir: output directory
        :type out_dir: str
        :param file_name: name of the file
        :type file_name: str
        :param model: input models for i-vector extraction
        :type model: tuple
        :param wav_suffix: suffix of wav files
        :type wav_suffix: str
        :param vad_suffix: suffix of vad files
        :type vad_suffix
    """
    loginfo('[wav2ivec.process_file] Processing file {} ...'.format(file_name))
    ubm_weights, ubm_means, ubm_covs, ubm_norm, gmm_model, numg, dimf, v, mvvt = model
    wav = os.path.join(wav_dir, file_name) + wav_suffix
    rate, sig = read(wav)
    if len(sig.shape) != 1:
        raise GeneralException(
            '[wav2ivec.process_file] Expected mono as input audio.')
    if rate != 8000:
        logwarning(
            '[wav2ivec.process_file] '
            'The input file is expected to be in 8000 Hz, got {} Hz instead, resampling.'
            .format(rate))
        sig = signal.resample(sig, 8000)
    if ADDDITHER > 0.0:
        loginfo('[wav2ivec.process_file] Adding dither ...')
        sig = features.add_dither(sig, ADDDITHER)

    fea = get_mfccs(sig)
    vad, n_regions, n_frames = get_vad(vad_dir, file_name, vad_suffix, sig,
                                       fea)

    fea = fea[vad, ...]
    w = get_ivec(fea, numg, dimf, gmm_model, ubm_means, ubm_norm, v, mvvt)
    if w is not None:
        Tools.mkdir_p(os.path.join(out_dir, os.path.dirname(file_name)))
        np.save(os.path.join(out_dir, file_name), w)
Exemplo n.º 2
0
def process_file(wav_dir,
                 vad_dir,
                 out_dir,
                 file_name,
                 model,
                 min_size,
                 max_size,
                 tolerance,
                 wav_suffix='.wav',
                 vad_suffix='.lab.gz'):
    """ Extract i-vectors from wav file.

        :param wav_dir: directory with wav files
        :type wav_dir: str
        :param vad_dir: directory with vad files
        :type vad_dir: str
        :param out_dir: output directory
        :type out_dir: str
        :param file_name: name of the file
        :type file_name: str
        :param model: input models for i-vector extraction
        :type model: tuple
        :param min_size: minimal size of window in ms
        :type min_size: int
        :param max_size: maximal size of window in ms
        :type max_size: int
        :param tolerance: accept given number of frames as speech even when it
                          is marked as silence
        :type tolerance: int
        :param wav_suffix: suffix of wav files
        :type wav_suffix: str
        :param vad_suffix: suffix of vad files
        :type vad_suffix
    """
    loginfo('[wav2ivecs.process_file] Processing file {} '
            '...'.format(file_name))
    _, ubm_means, _, ubm_norm, gmm_model, numg, dimf, v, mvvt = model
    ubm_means = model[1]
    ubm_norm = model[3]
    gmm_model = model[4]

    if len(file_name.split()) > 1:
        file_name = file_name.split()[0]
    wav = os.path.join(wav_dir, file_name) + wav_suffix
    rate = get_sr(wav)
    #
    loginfo(wav)
    #
    if rate != 8000:
        logwarning('[wav2ivec.process_file] '
                   'The input file is expected to be in 8000 Hz, got {} Hz '
                   'instead, resampling.'.format(rate))
    rate, sig = af_to_array(wav, target_sr=8000)
    if ADDDITHER > 0.0:  # TODO: Where is this defined?
        #loginfo('[wav2ivecs.process_file] Adding dither ...')
        sig = features.add_dither(sig, ADDDITHER)

    fea = get_mfccs(sig)
    vad, n_regions, n_frames = get_vad(vad_dir, file_name, vad_suffix, sig,
                                       fea)

    ivec_set = IvecSet()
    ivec_set.name = file_name
    for seg in get_segments(vad, min_size, max_size, tolerance):
        start, end = get_num_segments(seg[0]), get_num_segments(seg[1])
        w = get_ivec(fea[seg[0]:seg[1] + 1], numg, dimf, gmm_model, ubm_means,
                     ubm_norm, v, mvvt)
        if w is None:
            continue
        ivec_set.add(w, start, end)
    Tools.mkdir_p(os.path.join(out_dir, os.path.dirname(file_name)))
    ivec_set.save(os.path.join(out_dir, '{}.pkl'.format(file_name)))
Exemplo n.º 3
0
def process_file(wav_dir,
                 vad_dir,
                 out_dir,
                 file_name,
                 model,
                 min_size,
                 max_size,
                 tolerance,
                 wav_suffix='.wav',
                 vad_suffix='.lab.gz'):
    """ Extract i-vectors from wav file.

        :param wav_dir: directory with wav files
        :type wav_dir: str
        :param vad_dir: directory with vad files
        :type vad_dir: str
        :param out_dir: output directory
        :type out_dir: str
        :param file_name: name of the file
        :type file_name: str
        :param model: input models for i-vector extraction
        :type model: tuple
        :param min_size: minimal size of window in ms
        :type min_size: int
        :param max_size: maximal size of window in ms
        :type max_size: int
        :param tolerance: accept given number of frames as speech even when it is marked as silence
        :type tolerance: int
        :param wav_suffix: suffix of wav files
        :type wav_suffix: str
        :param vad_suffix: suffix of vad files
        :type vad_suffix
    """
    loginfo('[wav2ivecs.process_file] Processing file {} ...'.format(
        file_name.split()[0]))
    ubm_weights, ubm_means, ubm_covs, ubm_norm, gmm_model, numg, dimf, v, mvvt = model
    if len(file_name.split()) > 1:
        file_name = file_name.split()[0]
    wav = os.path.join(wav_dir, file_name) + wav_suffix
    rate, sig = read(wav)
    if len(sig.shape) != 1:
        raise GeneralException(
            '[wav2ivec.process_file] Expected mono as input audio.')
    if rate != 8000:
        logwarning(
            '[wav2ivecs.process_file] '
            'The input file is expected to be in 8000 Hz, got {} Hz instead, resampling.'
            .format(rate))
        sig = signal.resample(sig, 8000)
    if ADDDITHER > 0.0:
        loginfo('[wav2ivecs.process_file] Adding dither ...')
        sig = features.add_dither(sig, ADDDITHER)

    fea = get_mfccs(sig)
    vad, n_regions, n_frames = get_vad(vad_dir, file_name, vad_suffix, sig,
                                       fea)

    ivec_set = IvecSet()
    ivec_set.name = file_name
    for seg in get_segments(vad, min_size, max_size, tolerance):
        start, end = get_num_segments(seg[0]), get_num_segments(seg[1])
        loginfo(
            '[wav2ivecs.process_file] Processing speech segment from {} ms to {} ms ...'
            .format(start, end))
        if seg[0] > fea.shape[0] - 1 or seg[1] > fea.shape[0] - 1:
            logwarning(
                '[norm.process_file] Unexpected features dimensionality - check VAD input or audio.'
            )
            continue
        w = get_ivec(fea[seg[0]:seg[1]], numg, dimf, gmm_model, ubm_means,
                     ubm_norm, v, mvvt)
        if w is None:
            continue
        ivec_set.add(w, start, end)
    Tools.mkdir_p(os.path.join(out_dir, os.path.dirname(file_name)))
    ivec_set.save(os.path.join(out_dir, '{}.pkl'.format(file_name)))