示例#1
0
def main(wavfile, destfile, win_size, hop_size, nfbank, zoom, eps):
    # load signal
    fs, sig = apkit.load_wav(wavfile)
    tf = apkit.stft(sig, apkit.cola_hamming, win_size, hop_size)
    nch, nframe, _ = tf.shape

    # trim freq bins
    nfbin = _FREQ_MAX * win_size / fs  # 0-8kHz
    freq = np.fft.fftfreq(win_size)[:nfbin]
    tf = tf[:, :, :nfbin]

    # compute pairwise gcc on f-banks
    ecov = apkit.empirical_cov_mat(tf, fw=1, tw=1)
    fbw = apkit.mel_freq_fbank_weight(nfbank,
                                      freq,
                                      fs,
                                      fmax=_FREQ_MAX,
                                      fmin=_FREQ_MIN)
    fbcc = apkit.gcc_phat_fbanks(ecov, fbw, zoom, freq, eps=eps)

    # merge to a single numpy array, indexed by 'tpbd'
    #                                           (time, pair, bank, delay)
    feature = np.asarray(
        [fbcc[(i, j)] for i in xrange(nch) for j in xrange(nch) if i < j])
    feature = np.moveaxis(feature, 2, 0)

    # and map [-1.0, 1.0] to 16-bit integer, to save storage space
    dtype = np.int16
    vmax = np.iinfo(dtype).max
    feature = (feature * vmax).astype(dtype)

    np.save(destfile, feature)
示例#2
0
文件: features.py 项目: idiap/nnsslm
 def __call__(self, fs, sig):
     tf = apkit.stft(sig,
                     apkit.cola_hamming,
                     self.win_size,
                     self.hop_size,
                     last_sample=True)
     min_fbin = self.min_freq * self.win_size / fs
     if self.max_freq >= 0:
         max_fbin = self.max_freq * self.win_size / fs
     else:
         max_fbin = self.win_size / 2
     tf = tf[:, :, min_fbin:max_fbin]
     feat = np.concatenate((tf.real, tf.imag), axis=0)
     return feat.astype(np.float32, copy=False)
示例#3
0
def get_fbanks_gcc(signal,
                   fs,
                   win_size=1024,
                   hop_size=512,
                   nfbank=50,
                   zoom=25,
                   eps=1e-8):
    _FREQ_MAX = 8000
    _FREQ_MIN = 100

    tf = apkit.stft(signal, apkit.cola_hamming, win_size, hop_size)
    nch, nframe, _ = tf.shape

    # trim freq bins
    nfbin = int(_FREQ_MAX * win_size / fs)  # 0-8kHz
    freq = np.fft.fftfreq(win_size)
    freq = freq[:nfbin]
    tf = tf[:, :, :nfbin]

    # compute pairwise gcc on f-banks
    ecov = apkit.empirical_cov_mat(tf, fw=1, tw=1)
    fbw = apkit.mel_freq_fbank_weight(nfbank,
                                      freq,
                                      fs,
                                      fmax=_FREQ_MAX,
                                      fmin=_FREQ_MIN)
    fbcc = apkit.gcc_phat_fbanks(ecov, fbw, zoom, freq, eps=eps)

    # merge to a single numpy array, indexed by 'tpbd'
    #                                           (time, pair, bank, delay)
    feature = np.asarray(
        [fbcc[(i, j)] for i in range(nch) for j in range(nch) if i < j])

    feature = np.squeeze(feature, axis=0)
    feature = np.moveaxis(feature, 2, 0)

    # and map [-1.0, 1.0] to 16-bit integer, to save storage space
    dtype = np.int16
    vmax = np.iinfo(dtype).max
    feature = (feature * vmax).astype(dtype)

    return feature
示例#4
0
def load_cpsd(afile, win_size, hop_size):
    fs, sig = apkit.load_wav(afile)
    tf = apkit.stft(sig, apkit.cola_hamming, win_size, hop_size)
    return apkit.pairwise_cpsd(tf)
示例#5
0
def main(infile, outdir, afunc, win_size, hop_size, block_size, block_hop,
         min_sc):
    stime = time.time()

    # load candidate DOAs
    pts = apkit.load_pts_on_sphere()
    pts = pts[pts[:, 2] > -0.05]  # use upper half of the sphere
    # NOTE: alternatively use only points on the horizontal plane
    # pts = apkit.load_pts_horizontal(360)
    print('%.3fs: load points (%d)' % (time.time() - stime, len(pts)),
          file=sys.stderr)

    # compute neighbors (for peak finding)
    nlist = apkit.neighbor_list(pts, math.pi / 180.0 * 8.0)
    print('%.3fs: neighbor list' % (time.time() - stime), file=sys.stderr)

    # load signal
    fs, sig = apkit.load_wav(infile)
    print('%.3fs: load signal' % (time.time() - stime), file=sys.stderr)

    # compute delays (delay for each candidate DOA and each microphone)
    delays = apkit.compute_delay(_MICROPHONE_COORDINATES, pts, fs=fs)
    print('%.3fs: compute delays' % (time.time() - stime), file=sys.stderr)

    # compute empirical covariance matrix
    tf = apkit.stft(sig, apkit.cola_hamming, win_size, hop_size)
    max_fbin = _MAX_FREQ * win_size // fs  # int
    assert max_fbin <= win_size // 2
    tf = tf[:, :, :max_fbin]  # 0-8kHz
    fbins = np.arange(max_fbin, dtype=float) / win_size
    if block_size is None:
        ecov = apkit.empirical_cov_mat(tf)
    else:
        ecov = apkit.empirical_cov_mat_by_block(tf, block_size, block_hop)
    nch, _, nblock, nfbin = ecov.shape
    print('%.3fs: empirical cov matrix (nfbin=%d)' %
          (time.time() - stime, nfbin),
          file=sys.stderr)

    # local angular spectrum function
    phi = afunc(ecov, delays, fbins)
    print('%.3fs: compute phi' % (time.time() - stime), file=sys.stderr)

    # find local maxima
    lmax = apkit.local_maxima(phi, nlist, th_phi=min_sc)
    print('%.3fs: find local maxima' % (time.time() - stime), file=sys.stderr)

    # merge predictions that have similar azimuth predicitons
    # NOTE: skip this step if the candinate DOAs are on the horizontal plane
    lmax = apkit.merge_lm_on_azimuth(phi, lmax, pts, math.pi / 180.0 * 5.0)
    print('%.3fs: refine local maxima' % (time.time() - stime),
          file=sys.stderr)

    # save results
    # each file contains the predicted angular spectrum for each frame/block
    # each line has five tokens:
    #   (1) x coordinate of the candidate DOA
    #   (2) y coordinate of the candidate DOA
    #   (3) z coordinate of the candidate DOA
    #   (4) angular spectrum value
    #   (5) 1 if this is a local maximum, otherwise 0
    for t in range(nblock):
        with open(f'{outdir}/{t:06d}', 'w') as f:
            for i in range(len(pts)):
                print('%g %g %g %g %d' % (pts[i, 0], pts[i, 1], pts[i, 2],
                                          phi[i, t], 1 if i in lmax[t] else 0),
                      file=f)
    print('%.3fs: save results' % (time.time() - stime), file=sys.stderr)
示例#6
0
def load_ncov(path, win_size, hop_size):
    fs, sig = apkit.load_wav(path)
    nfbin = _MAX_FREQ * win_size // fs  # 0-8kHz
    tf = apkit.stft(sig, apkit.cola_hamming, win_size, hop_size)
    tf = tf[:, :, :nfbin]
    return apkit.cov_matrix(tf)