def signal_energy(signal, fs, fl=0.05, fh=0.02):
    X = stft(signal, fs, fl, fh)

    #pylab.figure();
    #pylab.imshow(scipy.absolute(X.T), origin='lower', aspect='auto',
    #                         interpolation='nearest')
    #pylab.specgram(signal, NFFT=44100*fs, noverlap=44100*fh);
    #pylab.show();

    mag = scipy.absolute(X)
    energy = scipy.sum(mag, 1)
    return energy
def signal_energy(signal, fs, fl = 0.05, fh=0.02):
    X = stft(signal, fs, fl,fh)

    #pylab.figure();
    #pylab.imshow(scipy.absolute(X.T), origin='lower', aspect='auto',
    #                         interpolation='nearest')
    #pylab.specgram(signal, NFFT=44100*fs, noverlap=44100*fh);
    #pylab.show();

    mag = scipy.absolute(X);
    energy = scipy.sum(mag, 1)
    return energy
    def computeSpectrogram(self, timeseries, nFFT, nHop, dt, windowFn, fRange):
        '''
        Compute STFT spectrogram of data in timeseries. Expected numpy.array shape
        (nSamples,nSignals)
        
        Returns: spectra, (nTimes,nFreq,nSignals)
        '''

        data = timeseries.getData()

        fVec = self.getFreqVec()
        if fRange is not None:
            mask = np.logical_and(fVec >= fRange[0], fVec <= fRange[1])
        else:
            # Create an all true array
            mask = np.ones((len(fVec), 1)) == 1

        mask = np.nonzero(mask)
        mask = mask[0]

        # Loop over signals (2nd dimension of )
        Nsignals = timeseries.numSignals()
        for i in range(Nsignals):
            x = data[:, i, np.newaxis]
            X = spectrogram.stft(x,
                                 nFFT,
                                 nHop,
                                 transform=np.fft.fft,
                                 win=windowFn,
                                 zp_back=0,
                                 zp_front=0)

            if i == 0:
                spectra = np.zeros((X.shape[0], len(mask), Nsignals),
                                   dtype=complex)

            spectra[:, :, i] = X[:, mask]

        return spectra
Exemplo n.º 4
0
def transform(audio_data, save_image_path, nFFT=256, overlap=0.75):
    '''audio_data: signals to convert
    save_image_path: path to store the image file'''
    # spectrogram
    freq_data = stft(audio_data, nFFT, overlap)
    freq_data = np.maximum(np.abs(freq_data),
                           np.max(np.abs(freq_data)) / 10000)
    log_freq_data = 20. * np.log10(freq_data / 1e-4)
    N_samples = log_freq_data.shape[0]
    # log_freq_data = np.maximum(log_freq_data, max_m - 70)
    # print(np.max(np.max(log_freq_data)))
    # print(np.min(np.min(log_freq_data)))
    log_freq_data = np.round(log_freq_data)
    log_freq_data = np.transpose(log_freq_data)
    # ipdb.set_trace()

    assert np.max(np.max(log_freq_data)) < 256, 'spectrogram value too large'
    # save the image
    spec_imag = Image.fromarray(log_freq_data)
    spec_imag = spec_imag.convert('RGB')
    spec_imag.save(save_image_path)
    return N_samples
Exemplo n.º 5
0
def find_words(data, fs, num, **args):
    min_silence_length = args["min_silence_length"] if "min_silence_length" in args else 5
    _plot = args["plot"] if "plot" in args else False
    _debug = False

    frml = 0.03
    frmh = 0.02

    X = stft(data, fs, frml, frmh)
    magnitude = np.absolute(X)
    signal_energy = np.sum(magnitude, 1)


    if "thresh" in args.keys():
        thresh = args["thresh"]
    else:
        thresh = np.mean(signal_energy) - np.min(signal_energy)

    en_th = (signal_energy > thresh).astype(np.int8)
    rising_edge = np.diff( np.hstack(([0], en_th)) )
    falling_edge = np.diff( np.hstack((en_th, [0])) )

    (stp,) = np.nonzero(rising_edge > 0)
    (edp,) = np.nonzero(falling_edge < 0)

    if len(stp) != len(edp):
        raise ValueError("Wrong threshold value")

    """Remove too short silence periods"""
    silence = stp[1:] - edp[:-1]
    (too_short,) = np.nonzero(silence <= min_silence_length)
    stp = np.delete(stp, too_short + 1)
    edp = np.delete(edp, too_short)

    nonsilence = edp - stp
    shortest = nonsilence.argsort()
    
    if len(nonsilence) < num:
        raise ValueError("You say less words than you ask me to find or you speak too fast")
    """ Remove too short nonsilence """
    stp = np.delete(stp, shortest[:-num])
    edp = np.delete(edp, shortest[:-num])

    pos = lambda a: fs*frmh*a
    
    if _plot:
        pylab.subplot(2,1,1)
        etime = np.array(range(len(signal_energy)), dtype=float)*frmh
        pylab.plot(etime,signal_energy/1e6)
        pylab.axhline(thresh/1e6)
        [ pylab.axvline(x*frmh, color="red") for x in stp ]
        [ pylab.axvline(x*frmh, color="green") for x in edp ]
        #pylab.xlabel("Czas [s]")
        pylab.title("Energia wypowiedzi w czasie")
        time = np.array(range(len(data)), dtype=float)/fs
        sc_data = data - np.mean(data)
        sc_data = sc_data/np.max(sc_data)
        pylab.subplot(2,1,2)
        pylab.plot(time, sc_data)
        [ pylab.axvline(pos(x)/fs, color="red") for x in stp ]
        [ pylab.axvline(pos(x)/fs, color="green") for x in edp ]
        pylab.xlabel("Czas [s]")
        pylab.title(u"Zapis sygnału")
        pylab.show()


    return [ data[pos(a):pos(b)] for a,b in zip(stp,edp) ]
def find_words(data, fs, num, **args):
    min_silence_length = args["min_silence_length"] if "min_silence_length" in args else 5
    _plot = args["plot"] if "plot" in args else False
    _debug = False

    frml = 0.03
    frmh = 0.02

    X = stft(data, fs, frml, frmh)
    magnitude = np.absolute(X)
    signal_energy = np.sum(magnitude, 1)

    if "thresh" in args.keys():
        thresh = args["thresh"]
    else:
        thresh = np.mean(signal_energy) - np.min(signal_energy)

    en_th = (signal_energy > thresh).astype(np.int8)
    rising_edge = np.diff(np.hstack(([0], en_th)))
    falling_edge = np.diff(np.hstack((en_th, [0])))

    (stp,) = np.nonzero(rising_edge > 0)
    (edp,) = np.nonzero(falling_edge < 0)

    if len(stp) != len(edp):
        raise ValueError("Wrong threshold value")

    """Remove too short silence periods"""
    silence = stp[1:] - edp[:-1]
    (too_short,) = np.nonzero(silence <= min_silence_length)
    stp = np.delete(stp, too_short + 1)
    edp = np.delete(edp, too_short)

    nonsilence = edp - stp
    shortest = nonsilence.argsort()

    if len(nonsilence) < num:
        raise ValueError("You say less words than you ask me to find or you speak too fast")
    """ Remove too short nonsilence """
    stp = np.delete(stp, shortest[:-num])
    edp = np.delete(edp, shortest[:-num])

    pos = lambda a: fs * frmh * a

    if _plot:
        pylab.subplot(2, 1, 1)
        etime = np.array(range(len(signal_energy)), dtype=float) * frmh
        pylab.plot(etime, signal_energy / 1e6)
        pylab.axhline(thresh / 1e6)
        [pylab.axvline(x * frmh, color="red") for x in stp]
        [pylab.axvline(x * frmh, color="green") for x in edp]
        # pylab.xlabel("Czas [s]")
        pylab.title("Energia wypowiedzi w czasie")
        time = np.array(range(len(data)), dtype=float) / fs
        sc_data = data - np.mean(data)
        sc_data = sc_data / np.max(sc_data)
        pylab.subplot(2, 1, 2)
        pylab.plot(time, sc_data)
        [pylab.axvline(pos(x) / fs, color="red") for x in stp]
        [pylab.axvline(pos(x) / fs, color="green") for x in edp]
        pylab.xlabel("Czas [s]")
        pylab.title(u"Zapis sygnału")
        pylab.show()

    return [data[pos(a) : pos(b)] for a, b in zip(stp, edp)]