示例#1
0
def stft_to_wav(Zxx_magn, Zxx_phase):
    """Convert an STFT (magnitude-only!) 2D numpy array to a time series audio signal.

    Args:
        Zxx (2D numpy array): The STFT [nfft//2 + 1, n_windows] to convert.
    Outputs:
        wav (1D numpy array):  The reconstructed mono audio signal.
    """

    #first construct spectrum from magnitude and phase
    Zxx = get_spectrum(Zxx_magn, Zxx_phase)

    #check if inversion of stft is possible!
    print(
        "inversion possible? ",
        check_COLA(Preprocessing.WINDOW, Preprocessing.WINLEN,
                   Preprocessing.WINSHIFT))

    times, wav = istft(Zxx,
                       fs=Preprocessing.FS,
                       window=Preprocessing.WINDOW,
                       nperseg=Preprocessing.WINLEN,
                       noverlap=Preprocessing.WINSHIFT,
                       nfft=Preprocessing.NFFT,
                       input_onesided=Preprocessing.ONESIDED,
                       boundary=Preprocessing.BOUNDARY,
                       time_axis=-1,
                       freq_axis=-2)
    assert not np.isnan(wav).any()
    # The output signal must be in the range [-1, 1], otherwise we need to clip or normalize.
    max_sample = np.max(abs(wav))
    if max_sample > 1.0:
        wav = wav / max_sample
    return wav
示例#2
0
def test_stft_module(combo, one_item):
    win_length = combo[0]
    hop_length = int(combo[0] * combo[1])
    win_type = combo[2]
    window = nussl.AudioSignal.get_window(combo[2], win_length)
    stft_params = nussl.STFTParams(window_length=win_length,
                                   hop_length=hop_length,
                                   window_type=win_type)

    representation = ml.networks.modules.STFT(win_length,
                                              hop_length=hop_length,
                                              window_type=win_type)

    if not check_COLA(window, win_length, win_length - hop_length):
        assert True

    data = one_item['mix_audio']

    encoded = representation(data, 'transform')
    decoded = representation(encoded, 'inverse')
    encoded = encoded.squeeze(0).permute(1, 0, 2)

    assert (decoded - data).abs().max() < 1e-6

    audio_signal = nussl.AudioSignal(audio_data_array=data.squeeze(0).numpy(),
                                     sample_rate=16000,
                                     stft_params=stft_params)
    nussl_magnitude = np.abs(audio_signal.stft())
    _encoded = encoded.squeeze(0)
    cutoff = _encoded.shape[0] // 2
    _encoded = _encoded[:cutoff, ...]
    assert (_encoded - nussl_magnitude).abs().max() < 1e-6
示例#3
0
def list_stft_hop_size(window):
    (window_size, ) = window.shape
    assert window_size > 0
    l = []
    for i in range(1, window_size):
        if sp.check_COLA(window, window_size,
                         window_size - i) and sp.check_NOLA(
                             window, window_size, window_size - i):
            l.append(i)
    return l
示例#4
0
def test_stft_istft_combo(combo, signals):
    win_length = combo[0]
    hop_length = int(combo[0] * combo[1])
    win_type = combo[2]
    window = nussl.AudioSignal.get_window(combo[2], win_length)

    if not check_COLA(window, win_length, win_length - hop_length):
        assert True

    for signal in signals:
        _check_stft_istft_allclose(signal, win_length, hop_length, win_type)
def STFT(x, fs, wL, nOverlap):
    if signal.check_COLA('hann', wL, nOverlap):
        f, t, Zxx = signal.stft(x,
                                fs,
                                nperseg=wL,
                                noverlap=nOverlap,
                                return_onesided=True)
    else:
        print('COLA constrain not met! Change STFT parameters!')
        return
    #Zxx_abs = np.abs(Zxx)
    #Zxx_phi = np.arctan2(Zxx.imag,Zxx.real)

    return np.abs(Zxx), np.arctan2(Zxx.imag, Zxx.real), t, f
示例#6
0
def make_stft_args(frame_period, fs, nperseg=None, window='hann', **kwargs):
    nshift = fs * frame_period // 1000

    if nperseg is None:
        nperseg = nshift * 4

    noverlap = nperseg - nshift

    dct = dict(window=window, nperseg=nperseg, noverlap=noverlap)
    if not check_COLA(**dct):
        raise ValueError(dct)

    dct["fs"] = fs
    return dct
示例#7
0
    def test_check_COLA(self):
        settings = [
                    ('boxcar', 10, 0),
                    ('boxcar', 10, 9),
                    ('bartlett', 51, 26),
                    ('hann', 256, 128),
                    ('hann', 256, 192),
                    ('blackman', 300, 200),
                    (('tukey', 0.5), 256, 64),
                    ('hann', 256, 255),
                    ]

        for set in settings:
            msg = '{0}, {1}, {2}'.format(*set)
            assert_equal(True, check_COLA(*set), err_msg=msg)
示例#8
0
    def test_check_COLA(self):
        settings = [
                    ('boxcar', 10, 0),
                    ('boxcar', 10, 9),
                    ('bartlett', 51, 26),
                    ('hann', 256, 128),
                    ('hann', 256, 192),
                    ('blackman', 300, 200),
                    (('tukey', 0.5), 256, 64),
                    ('hann', 256, 255),
                    ]

        for set in settings:
            msg = '{0}, {1}, {2}'.format(*set)
            assert_equal(True, check_COLA(*set), err_msg=msg)
示例#9
0
def make_stft(lis):
    arr = make_chunks(lis)
    mags = []
    angles = []
    if check_COLA('hann', nperseg=perseg, noverlap=overlap):
        for wav in arr:
            f, t, X = stft(wav, nperseg=perseg, noverlap=overlap)
            mags.append(np.transpose(np.abs(X)).astype('float32'))
            angles.append(np.angle(X).astype('float32'))
    else:
        print("COLA constraint not met, in func: utils.make_stft")
        exit()

    # print(len(mags),np.abs(mags[0].shape))
    return np.vstack(mags), angles
示例#10
0
文件: stft.py 项目: nd1511/untwist
    def __init__(self, window='hann', fft_size=1024, hop_size=512):

        if isinstance(window, np.ndarray):
            self.window = window
        else:
            self.window = signal.get_window(window, fft_size)
        self.fft_size = int(fft_size)
        self.hop_size = int(hop_size)
        self.window_size = len(self.window)
        self.overlap = self.window_size - self.hop_size

        # Correct for scipy spectrum scaling
        self.scale = 1.0 / np.sqrt(self.window.sum()**2)

        if not signal.check_COLA(self.window, self.window_size, self.overlap):
            raise Exception('COLA constraint not satisfied')
示例#11
0
    def __init_kernel__(self):
        """
        Generate enframe_kernel, fft_kernel, ifft_kernel and overlap-add kernel.
        ** enframe_kernel: Using conv1d layer and identity matrix.
        ** fft_kernel: Using linear layer for matrix multiplication. In fact,
        enframe_kernel and fft_kernel can be combined, But for the sake of 
        readability, I took the two apart.
        ** ifft_kernel, pinv of fft_kernel.
        ** overlap-add kernel, just like enframe_kernel, but transposed.
        
        Returns:
            tuple: four kernels.
        """
        enframed_kernel = th.eye(self.fft_len)[:, None, :]
        if support_clp_op:
            tmp = fft(th.eye(self.fft_len))
            fft_kernel = th.stack([tmp.real, tmp.imag], dim=2)
        else:
            fft_kernel = fft(th.eye(self.fft_len), 1)
        if self.mode == 'break':
            enframed_kernel = th.eye(self.win_len)[:, None, :]
            fft_kernel = fft_kernel[:self.win_len]
        fft_kernel = th.cat((fft_kernel[:, :, 0], fft_kernel[:, :, 1]), dim=1)
        ifft_kernel = th.pinverse(fft_kernel)[:, None, :]
        window = get_window(self.win_type, self.win_len)

        self.perfect_reconstruct = check_COLA(window, self.win_len,
                                              self.win_len - self.win_hop)
        window = th.FloatTensor(window)
        if self.mode == 'continue':
            left_pad = (self.fft_len - self.win_len) // 2
            right_pad = left_pad + (self.fft_len - self.win_len) % 2
            window = F.pad(window, (left_pad, right_pad))
        if self.win_sqrt:
            self.padded_window = window
            window = th.sqrt(window)
        else:
            self.padded_window = window**2

        fft_kernel = fft_kernel.T * window
        ifft_kernel = ifft_kernel * window
        ola_kernel = th.eye(self.fft_len)[:self.win_len, None, :]
        if self.mode == 'continue':
            ola_kernel = th.eye(self.fft_len)[:, None, :self.fft_len]
        return enframed_kernel, fft_kernel, ifft_kernel, ola_kernel
示例#12
0
文件: __init__.py 项目: giruenf/GRIPy
def frequency_phase_rotation(values, angle, deg=False):
    window_size = 64
    noverlap = 32
    window = signal.hann(window_size, sym=False)
    if not signal.check_COLA(window, len(window), noverlap):
        raise Exception('check_COLA failed.')
    f, t, Zxx = signal.stft(values,
                            window=window,
                            nperseg=window_size,
                            noverlap=noverlap)
    Zxx_rotated = np.zeros(Zxx.shape, dtype=np.complex)
    for freq_idx in range(Zxx.shape[0]):  # Loop over all frequencies
        Zxx_rotated[freq_idx] = phase_rotation(Zxx[freq_idx], angle, deg)
    t, x = signal.istft(Zxx_rotated,
                        window=window,
                        nperseg=window_size,
                        noverlap=noverlap)
    return t, x
plt.xlabel("The normalized frequencies ")
plt.ylabel("The log-scaled frequency response")
plt.show()

# apply filter to input signal x using convolution in the time-domain (0.2 pt)
x_filter = convolve(x, b)
plt.figure(3)
plt.plot(x_filter)
plt.show()

##3. STFT and inverse STFT, see the error between reconstruction and original (0.5 pt).
#!! FILL IN PARTS WITH "None"
# Plot the spectrogram using STFT with 1024 sample length window.

win = scipy.signal.hann(winlen, sym=False)
signal.check_COLA(win, winlen, winlen/2)

# Calculate window length (0.1 pt)
print("window length is " + str(float(1000.0*winlen/fs) + ' ms ')

# Take STFT (0.1 pt)
f,t,X = stft(x, fs=fs, window=win, nperseg=winlen, noverlap=winlen/2, nfft=winlen, detrend=False, return_onesided=True, padded=True, axis=-1)

# Apply iSTFT (0.1 pt)
_,x_rec = istft(X,fs=fs,window=win,nperseg=winlen,noverlap=winlen/2,nfft=winlen,input_onesided=True)

# - what is the amount of data, i.e., how many samples are in the STFT domain (0.1 pt)?

# - Each STFT value is complex (one float for real part, one float for imaginary part): 
# - Does the amount of data increase in the STFT domain versus time-domain (0.1 pt)?
示例#14
0
# window = "boxcar"           # 75%: COLA satisfied

# "Shaky" reconstructions. For check_COLA, see ola.py
# window = "boxcar"           # 25%: COLA not satisfied
# window = "blackmanharris"   # 50%: COLA not satisfied

window_name = window  # Rename explicitly if necessary

#
# Generate data
#

cutoff_duration = sum(durations) * 0.4
noverlap = int(window_length * overlap_percent)

cola = signal.check_COLA(window, window_length, noverlap)
nola = signal.check_NOLA(window, window_length, noverlap)

# A4 C5 E5 for "A4"
chord = notes.minor_chord(scale_name=scale_name, n_notes=3, output="chord")

melody = m21.stream.Stream()
for i in range(1, n_chords + 1):
    inv = i % 3
    melody.append(chord)
    chord_copy = copy.deepcopy(chord)
    chord_copy.inversion(inv)  # Move root note to the top
    chord = chord_copy

# Add the extra notes to each chord - we want to remove these after STFT
for i, name in enumerate(extra_notes):
import modelParameters as mp
import utils

tf.reset_default_graph()

savedModelPath = "./savedModels_HP_Tune/"
trainingStats = np.load(savedModelPath + "trainingStatistics.npy")

featMean = trainingStats[0]
featStd = trainingStats[1]

filePath_input = "C:/Users/mhp/Documents/DNN_Datasets/bcmRecordings/testInput/tobc_01_feat_11.wav"
filePath_target = "C:/Users/mhp/Documents/DNN_Datasets/bcmRecordings/testReference/tobc_01_ref_11.wav"

### Feature Extraction ###
print(dsp.check_COLA('hann', mp.NFFT, int(mp.NFFT * mp.STFT_OVERLAP)))
features, features_phi = featureExtraction.featureExtraction(
    filePath_input, mp.AUDIO_dB_SPL, mp.NFFT, mp.STFT_OVERLAP, mp.NUM_CLASSES,
    featMean, featStd)

labels, labels_phi = featureExtraction.featureExtraction(
    filePath_target, mp.AUDIO_dB_SPL, mp.NFFT, mp.STFT_OVERLAP, mp.NUM_CLASSES,
    featMean, featStd)

### UNFREEZE MODEL ###
frozen_graph = savedModelPath + "myFrozenModel.pb"
with tf.gfile.GFile(frozen_graph, "rb") as f:
    restored_graph_def = tf.GraphDef()
    restored_graph_def.ParseFromString(f.read())

with tf.Graph().as_default() as graph:
示例#16
0
文件: ola.py 项目: DoraMemo-x/aumix
    ("Rectangular, 75%: ", rect120, 120, 90),  # True , True
    ("Rectangular, 25%: ", rect120, 120, 30),  # False, True
    ("Hann symmetrical, 50%: ", hann_sym120, 120, 60),  # False, True
    ("Hann Periodic/DFT-even, 1/2: ", hann_asym120, 120, 60),  # True , True
    ("Hann Periodic/DFT-even, 2/3: ", hann_asym120, 120, 80),  # True , True
    ("Hann Periodic/DFT-even, 3/4: ", hann_asym120, 120, 90),  # True , True
    ("Blackmanharris, 50%: ", blackmanharris120, 120, 60),  # False, True
    ("Blackmanharris, 75%: ", blackmanharris120, 120, 90),  # True , True

    # NOLA
    ("62 ones with 2 zeros appended, 50%: ", ones64, 64, 32),  # False, False
    ("Hann symmetrical, 1/64 (not enough overlap): ", hann_sym64, 64,
     1),  # False, False
    ("Hann symmetrical, 2/64 (not enough overlap): ", hann_sym64, 64,
     2),  # False, True
    ("Hann symmetrical, 3/64 (not enough overlap): ", hann_sym64, 64, 3
     )  # False, True
]

results = np.empty((len(tests), 3), dtype="<U64")  # no. tests rows, 3 columns

for i, (desc, w, nperseg, noverlap) in enumerate(tests):
    results[i] = np.array([
        desc,
        str(signal.check_COLA(w, nperseg, noverlap)),
        str(signal.check_NOLA(w, nperseg, noverlap))
    ])

df = pd.DataFrame(data=results, columns=["Description", "COLA", "NOLA"])
print(df)
示例#17
0
def checkModInputs(fs,
                   frameSize,
                   windowSize,
                   modFrameSize,
                   modWindowSize,
                   window,
                   sym=False):
    """
    This function checks the frame sizes and (a) throws an error if they are not suitable (e.g. if 
    the modulation window is not an integral multiple of the acoustic frame step) and (b) returns a
    warning message if the constant overlap-add (COLA) principle is not satisfied.

    Inputs:
        - fs: the sampling frequency of the speech files, form "16000"
        - frameSize: the step size of the acoustic windows in seconds, form "0.001"
        - windowSize: the acoustic window size in seconds, form "0.03"
        - modFrameSiza: the step size of the modulation windows in seconds, form "0.1"
        - modWindowSize: the modulation window size in seconds, form "1"
        - window: the window type to use for both acoustic and modulation domains which
          must be of the ones recognised by SciPy without additional variables, form "hamming" 
        - sym: whether a symmetric window should be used, form "False"

    Outputs:
        - acWin: the acoustic window as a numpy array
        - modWin: the modulation window as a numpy array
    """
    from scipy.signal.windows import get_window, hamming
    from scipy.signal import check_COLA

    w = round(frameSize * 10000)  # This assumes never less than 0.1 ms steps
    x = round(windowSize * 10000)
    y = round(modFrameSize * 10000)
    z = round(modWindowSize * 10000)

    if (z % w) != 0:
        raise Exception(
            "The modulation window size must be an integral multiple of the modulation frame sizes."
        )
    elif w > x:
        raise Exception(
            "The acoustic window size must be greater than or equal to the acoustic frame size."
        )
    elif y > z:
        raise Exception(
            "The modulation window size must be greater than or equal to the modulation frame size."
        )
    elif (modWindowSize * 100000) / 10 != round(z):
        raise Exception(
            "The modulation window size must be a multiple of 0.01 s.")
    elif (modFrameSize * 100000) / 10 != round(y):
        raise Exception(
            "The modulation frame size must be a multiple of 0.01 s.")

    print("Frame and window sizes checked, all OK.")

    # Testing COLA over modulation windows
    acWin = get_window(window, round(windowSize * fs), not sym)
    modWin = get_window(window, round(modWindowSize / frameSize), not sym)

    acCola = check_COLA(acWin,
                        int(windowSize * fs),
                        int(round(windowSize - frameSize, 4) * fs),
                        tol=1e-10)
    if acCola == False:
        txt = "The constant overlap-add test (COLA) is not satisfied for the acoustic windows, "
        txt += "so the speech signal may not be perfectly reproducible."
        print(txt)
    else:
        print(
            "The constant overlap-add test (COLA) is satisfied for the acoustic windows."
        )
    modCola = check_COLA(modWin,
                         int(modWindowSize / frameSize),
                         int((modWindowSize - modFrameSize) / frameSize),
                         tol=1e-10)
    if modCola == False:
        txt = "The constant overlap-add test (COLA) is not satisfied for the modulation windows, "
        txt += "so the speech signal may not be perfectly reproducible."
        print(txt)
    else:
        print(
            "The constant overlap-add test (COLA) is satisfied for the modulation windows."
        )

    return acWin, modWin