Exemplo n.º 1
0
def tf_phase_vocode(s, frame_step_in, sampling_rate=16000):
    """This is unneccesary, even bad for some reason"""
    delta_t = tf.convert_to_tensor(frame_step_in / sampling_rate, tf.complex64)
    imag_i = tf.convert_to_tensor(1j, tf.complex64)
    print(imag_i.dtype)
    frames = tf.unstack(s)
    phase_shift = tf.zeros(s.shape[1], tf.complex64)
    for i, frame_tup in enumerate(zip(frames[:-1], frames[1:])):
        frame1, frame2 = frame_tup

        phase_change = tf.cast(
            tf.angle(frame2) - tf.angle(frame1), tf.complex64)

        freq_deviation = phase_change / delta_t - frame2
        freq_dev_angle = tf.mod(tf.angle(freq_deviation) + np.pi,
                                2 * np.pi) - np.pi
        freq_dev_angle = tf.cast(freq_dev_angle, tf.complex64)
        freq_dev_mag = tf.abs(freq_deviation)
        freq_dev_mag = tf.cast(freq_dev_mag, tf.complex64)
        wrapped_freq_deviation = freq_dev_mag * tf.exp(freq_dev_angle * imag_i)
        true_freq = frame2 + wrapped_freq_deviation

        phase_shift = phase_shift + delta_t * true_freq
        true_bins = tf.cast(tf.abs(frame2), tf.complex64) * tf.exp(
            tf.cast(tf.angle(phase_shift), tf.complex64) * imag_i)
        frames[i + 1] = true_bins
    return tf.stack(frames)
Exemplo n.º 2
0
def spectral_loss(expected, actual, mag_weight=1.0, phase_weight=1.0):
    exp = tf.transpose(
        expected,
        [0, 2, 1
         ])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])
    # TODO: Tunable params here (window size, window stride, window type)
    en = tf.random_normal(shape=tf.shape(exp),
                          mean=0.0,
                          stddev=0.00001,
                          dtype=tf.float32)
    an = tf.random_normal(shape=tf.shape(act),
                          mean=0.0,
                          stddev=0.00001,
                          dtype=tf.float32)
    estft = stft(exp + en, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act + an, 4096, 2048, window_fn=hamming_window, pad_end=True)
    esm = tf.abs(estft)
    esp = tf.angle(estft)
    asm = tf.abs(astft)
    asp = tf.angle(astft)
    mag_err = tf.reduce_mean(tf.abs(esm - asm))
    # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin
    phe = 1.0 - tf.cos(tf.abs(asp - esp))
    ph_err = tf.reduce_mean(phe)
    loss = mag_weight * mag_err + phase_weight * ph_err
    loss = tf.where(tf.is_nan(loss), 0., loss)
    return [loss, estft, astft]
Exemplo n.º 3
0
def cdense(x, dim, norm=False, actf=None, complex_activation=False, name=None, use_bias=False, training=True):
    indim = x.shape.as_list()[-1]
    with tf.name_scope(name, 'c_dense', [x, dim, norm, actf]) as scope:
        with tf.variable_scope(scope):
            w_r = tf.get_variable('kernel_r', shape=[indim, dim])
            w_i = tf.get_variable('kernel_i', shape=[indim, dim])
            w = tf.complex(w_r, w_i)
            tf.add_to_collection('kernels', w)
            x = tf.tensordot(x, w, [[-1], [0]])
            if norm:
                x = tf.complex(tf.nn.softplus(
                    tf.layers.batch_normalization(tf.abs(x), training=training)), 0.)
                x = x * tf.exp(tf.complex(0., tf.angle(x)))
            elif use_bias:
               b_r = tf.get_variable('bias_r', shape=[dim])
               b_i = tf.get_variable('bias_i', shape=[dim])
               b = tf.complex(b_r, b_i)
               x = x + b
            if actf != None:
                if complex_activation:
                    x = tf.complex(actf(tf.real(x)), actf(tf.imag(x)))
                else:
                    x = tf.complex(actf(tf.abs(x)), 0.) * \
                        tf.exp(tf.complex(0., tf.angle(x)))
    return x
Exemplo n.º 4
0
def analysis(x, N_w, N_s, NFFT, legacy=False):
    '''
    Polar form acoustic-domain analysis.

    Input/s:
        x - noisy speech.
        N_w - time-domain window length (samples).
        N_s - time-domain window shift (samples).
        NFFT - acoustic-domain DFT components.

    Output/s:
        Magnitude and phase spectrums.
    '''

    if legacy:

        ## MAGNITUDE & PHASE SPECTRUMS (ACOUSTIC DOMAIN)
        x_DFT = tf.signal.stft(x, N_w, N_s, NFFT, pad_end=True)
        x_MAG = tf.abs(x_DFT)
        x_PHA = tf.angle(x_DFT)
        return x_MAG, x_PHA

    else:

        ## MAGNITUDE & PHASE SPECTRUMS (ACOUSTIC DOMAIN)
        W = functools.partial(window_ops.hamming_window, periodic=False)
        x_DFT = tf.signal.stft(x, N_w, N_s, NFFT, window_fn=W, pad_end=True)
        x_MAG = tf.abs(x_DFT)
        x_PHA = tf.angle(x_DFT)
        return x_MAG, x_PHA
Exemplo n.º 5
0
def rmse_angle(pred_a, pred_v, label_a, label_v):
    angle_p = tf.angle(tf.complex(pred_a, pred_v))
    angle_y = tf.angle(tf.complex(label_a, label_v))

    _, rmse_v = l1diff_rms_error(pred_v, label_v)
    _, rmse_a = l1diff_rms_error(pred_a, label_a)
    _, rmse_an = l1diff_rms_error(angle_p, angle_y)

    return rmse_an + rmse_a, +rmse_v
Exemplo n.º 6
0
    def stfts_to_specgrams(self, stfts):
        """Converts stfts to specgrams.

    Args:
      stfts: Complex64 tensor of stft, shape [batch, time, freq, 1].

    Returns:
      specgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2].
    """
        num_channels = stfts.shape[3]

        # STEREO
        if (self._channel_mode == 'stereo'):
            stftsL = stfts[:, :, :, 0:1]
            stftsR = stfts[:, :, :, 1:2]

            stftsL = stftsL[:, :, :, 0]
            stftsR = stftsR[:, :, :, 0]

            channels = [stftsL, stftsR]
            specs_dict = {}

            for idx, channel in enumerate(channels):
                logmag = self._safe_log(tf.abs(channel))

                phase_angle = tf.angle(channel)
                if self._ifreq:
                    p = spectral_ops.instantaneous_frequency(phase_angle)
                    mp = tf.concat(
                        [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]],
                        axis=-1)
                else:
                    p = phase_angle / np.pi
                    mp = tf.concat(
                        [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]],
                        axis=-1)
                specs_dict[idx] = mp

            specs_concat = tf.concat((specs_dict[0], specs_dict[1]), axis=3)

            return specs_concat

        # MONO
        else:
            stfts = stfts[:, :, :, 0]

            logmag = self._safe_log(tf.abs(stfts))

            phase_angle = tf.angle(stfts)
            if self._ifreq:
                p = spectral_ops.instantaneous_frequency(phase_angle)
            else:
                p = phase_angle / np.pi

            return tf.concat(
                [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)
Exemplo n.º 7
0
def spectralLoss(expected, actual, mag_weight=1.0, phase_weight=1.0):
    exp = tf.transpose(expected, [0, 2, 1])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])
    # TODO: Tunable params here (window size, window stride, window type)
    estft = stft(exp, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act, 4096, 2048, window_fn=hamming_window, pad_end=True)
    esm = tf.abs(estft)
    esp = tf.angle(estft)
    asm = tf.abs(astft)
    asp = tf.angle(astft)
    mag_err = tf.reduce_mean(tf.abs(esm - asm))
    # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin
    ph_err = tf.reduce_mean(1.0 - tf.cos(tf.abs(asp - esp)))
    return mag_weight * mag_err + phase_weight * ph_err
Exemplo n.º 8
0
 def get_angle(self, x):
     real = self.get_realpart(x)
     imag = self.get_imagpart(x)
     # ang = T.arctan2(imag,real)
     comp = tf.complex(real, imag)
     ang = tf.angle(comp)
     return ang
Exemplo n.º 9
0
    def complex_networks_forward(self, mixed_wav_batch):
        mixed_spec_batch = misc_utils.tf_batch_stft(mixed_wav_batch,
                                                    PARAM.frame_length,
                                                    PARAM.frame_step)
        training = (self.mode == PARAM.MODEL_TRAIN_KEY)
        # clip mag
        if PARAM.complex_clip_mag is True:
            mixed_mag_batch = tf.abs(mixed_spec_batch)
            # self.debug_mag = mixed_mag_batch
            mixed_angle_batch = tf.angle(mixed_spec_batch)
            mixed_mag_batch = tf.clip_by_value(
                mixed_mag_batch, 0.0, float(PARAM.complex_clip_mag_max))
            mixed_spec_batch = tf.complex(mixed_mag_batch, 0.0) * tf.exp(
                tf.complex(0.0, mixed_angle_batch))

        complex_mask = self.CCNN_CRNN_CFC(mixed_spec_batch, training)
        if PARAM.net_out_mask:
            est_clean_spec_batch = c_ops.tf_complex_multiply(
                complex_mask, mixed_spec_batch)  # mag estimated
        else:
            est_clean_spec_batch = complex_mask
        _mixed_wav_len = tf.shape(mixed_wav_batch)[-1]
        _est_clean_wav_batch = misc_utils.tf_batch_istft(
            est_clean_spec_batch, PARAM.frame_length, PARAM.frame_step)
        est_clean_wav_batch = tf.slice(
            _est_clean_wav_batch, [0, 0],
            [-1, _mixed_wav_len
             ])  # if stft.pad_end=True, so est_wav may be longger than mixed.

        est_clean_mag_batch = tf.math.abs(est_clean_spec_batch)

        return est_clean_mag_batch, est_clean_spec_batch, est_clean_wav_batch
Exemplo n.º 10
0
def reduce_mean_angle(weights, angles, use_complex=False, name=None):
    """ Computes the weighted mean of angles. Accepts option to compute use complex exponentials or real numbers.
        Complex number-based version is giving wrong gradients for some reason, but forward calculation is fine.
        See https://en.wikipedia.org/wiki/Mean_of_circular_quantities
    Args:
        weights: [BATCH_SIZE, NUM_ANGLES]
        angles:  [NUM_ANGLES, NUM_DIHEDRALS]
    Returns:
                 [BATCH_SIZE, NUM_DIHEDRALS]
    """

    with tf.name_scope(name, 'reduce_mean_angle', [weights, angles]) as scope:
        weights = tf.convert_to_tensor(weights, name='weights')
        angles  = tf.convert_to_tensor(angles,  name='angles')

        if use_complex:
            # use complexed-valued exponentials for calculation
            cwts =        tf.complex(weights, 0.) # cast to complex numbers
            exps = tf.exp(tf.complex(0., angles)) # convert to point on complex plane

            unit_coords = tf.matmul(cwts, exps) # take the weighted mixture of the unit circle coordinates

            return tf.angle(unit_coords, name=scope) # return angle of averaged coordinate

        else:
            # use real-numbered pairs of values
            sins = tf.sin(angles)
            coss = tf.cos(angles)

            y_coords = tf.matmul(weights, sins)
            x_coords = tf.matmul(weights, coss)

            return tf.atan2(y_coords, x_coords, name=scope)
Exemplo n.º 11
0
def batch_time_compressedStft_mse(y1, y2, compress_idx):
    """
  y1: complex, [batch, time, feature_dim]
  y2: complex, [batch, time, feature_dim]
  """
    y1_abs_cpr = tf.pow(tf.abs(y1), compress_idx)
    y2_abs_cpr = tf.pow(tf.abs(y2), compress_idx)
    y1_angle = tf.angle(y1)
    y2_angle = tf.angle(y2)
    y1_cpr = tf.complex(y1_abs_cpr, 0.0) * tf.exp(tf.complex(0.0, y1_angle))
    y2_cpr = tf.complex(y2_abs_cpr, 0.0) * tf.exp(tf.complex(0.0, y2_angle))
    y1_con = tf.concat([tf.real(y1_cpr), tf.imag(y1_cpr)], -1)
    y2_con = tf.concat([tf.real(y2_cpr), tf.imag(y2_cpr)], -1)
    loss = tf.square(y1_con - y2_con)
    loss = tf.reduce_mean(tf.reduce_sum(loss, 0))
    return loss
Exemplo n.º 12
0
def wav2spec(src_dir, dst_dir):
    """
    Converts all wav files to spectrograms. 
    Also writes the paths to all spectrograms into a .txt
    :param src_dir: Path to all the wav files.
    :param dst_dir: Converted spectrograms will be saved here.
    :return: -
    """
    
    g = tf.Graph()
    with g.as_default():
        samples_pl = tf.placeholder(shape=[1,16000], dtype=tf.float32)
        stft = tf.contrib.signal.stft(samples_pl, 400, 160, 400)
        mag_graph = tf.abs(stft)
        l_mag_graph = tf.log(mag_graph + 0.000001)
        phase_graph = tf.angle(stft)

        disregarded_folders = ['DS_Store', '@eaDir', '.DS_Store'];

        sess = tf.Session()
        with open(dst_dir + "_list.txt", "a") as f:

            #iterate over all audio (wav) files:
            for folder in os.listdir(src_dir):
                if not os.path.isdir(src_dir + '/' + folder):
                    continue
                print folder
                i = 0
                if folder in disregarded_folders:
                    continue
                for wav in os.listdir(src_dir + '/' + folder):
                    if (wav in disregarded_folders):
                        continue
                    if not (wav.endswith(".wav")):
                        continue
                    path = src_dir + '/' + folder + '/' + wav
                    if not os.path.isfile(path):
                        continue

                    #convert wav to spectrogram:
                    samplerate, samples = scipy.io.wavfile.read(path)
                    samples = samples / (max(abs(samples)) + 0.000001)
                    samples = samples.astype(np.float32)
                    assert samplerate == 16000
                    assert len(samples.shape) == 1
                    samples = np.reshape(samples, (1, -1))
                    if samples.shape != (1,16000):
                        continue
                    l_mag, phase = sess.run([l_mag_graph, phase_graph], feed_dict={samples_pl: samples})
                    phase = get_phase_difference(phase[0])
                    spectrogram = stack(l_mag[0], phase)

                    #save the spectrogram:
                    file_ending = "/" + folder + str(i).zfill(5)
                    np.save(dst_dir + file_ending, spectrogram)
                    f.write(dst_dir + file_ending + ".npy\n")

                    if i % 500 == 0:
                        print "iteration in folder: ", i
                    i += 1
Exemplo n.º 13
0
    def inference(self, input, seed, amppattern):
        self.ranseed = seed
        input_shapes = input.get_shape().as_list()
        self.batch_size = input_shapes[0]
        with tf.variable_scope('inference'):
            complexfea, nb_filter = self.encoder_net(input)
            fea_dim = int(complexfea.get_shape().as_list()[-1] / 2)
            pro = self.ComplexProjectLayer('project',
                                           complexfea,
                                           pretrain=self.pretrain,
                                           trainable=self.trainable,
                                           use_bias=self.use_bias,
                                           type=self.prohecttype)
            feacomplex = tf.complex(complexfea[..., :fea_dim],
                                    complexfea[..., fea_dim:])
            feaphase = tf.angle(feacomplex)
            feaamplitude = tf.abs(feacomplex)
            realfeature, realmap = self.onstreamDecoder(
                'realdecoder',
                pro,
                nb_filter,
                fuseindex=self.fusion_index,
                blocktype=[1, 1, 2, 2],
                multifusiion=False)
            amplitudefea, ampmap = self.onstreamDecoder(
                'ampdecoder',
                feaamplitude,
                nb_filter,
                fuseindex=self.fusion_index,
                blocktype=[1, 1, 1, 1],
                multifusiion=False)
            phasefea, phamap = self.onstreamDecoder(
                'phadecoder',
                feaphase,
                nb_filter,
                fuseindex=self.fusion_index,
                blocktype=[1, 1, 1, 1],
                multifusiion=False)
            realmap = self._normlized_0to1(realmap)
            phamap = self._normlized_0to1(phamap) * 2 * np.pi - np.pi
            amppattern = tf.expand_dims(amppattern, -1)
            amppattern = tf.expand_dims(amppattern, 0)
            amppattern = tf.cast(amppattern, tf.float32)
            ampmap = ampmap + amppattern
            realfea_shape = realfeature.get_shape().as_list()
            complexfea = self.feaIDFT(phasefea, amplitudefea)
            complexfea = tf.image.resize_images(
                complexfea, (realfea_shape[1], realfea_shape[2]))

            fusionfeatures = tf.concat([realfeature, complexfea], axis=-1)
            nb_filter2 = int(realfea_shape[-1]) * 2
            finalfea, finalmap = self.onstreamDecoder(
                'finaldecoder',
                fusionfeatures,
                nb_filter2,
                fuseindex=self.fusion_index,
                blocktype=[1, 1, 2, 2],
                multifusiion=False)
            finalmap = self._normlized_0to1(finalmap)
        return realmap, ampmap, phamap, finalmap
Exemplo n.º 14
0
    def postprocessing(self):

        stft = tf.reshape(self.separated, [self.B * self.S, -1, self.F])

        angles = tf.angle(self.stfts)
        repeats = [self.S, 1, 1]
        shape = tf.shape(angles)
        angles = tf.expand_dims(angles, 1)
        angles = tf.tile(angles, [1, self.S, 1, 1])
        angles = tf.reshape(angles, shape * repeats)

        stft = tf.complex(stft, 0.0 * stft) * tf.exp(tf.complex(0.0, angles))

        istft = tf.contrib.signal.inverse_stft(
            stft,
            frame_length=self.window_size,
            frame_step=self.hop_size,
            window_fn=tf.contrib.signal.inverse_stft_window_fn(self.hop_size))
        output = tf.reshape(istft, [self.B, self.S, -1])
        self.output = output
        tf.summary.audio(name="audio/output/reconstructed",
                         tensor=tf.reshape(output, [-1, self.L]),
                         sample_rate=config.fs,
                         max_outputs=4)

        return output
Exemplo n.º 15
0
def cardioid(x):
    phase = tf.angle(x)
    scale = 0.5 * (1 + tf.cos(phase))
    output = tf.complex(tf.real(x) * scale, tf.imag(x) * scale)
    # output = 0.5*(1+tf.cos(phase))*z

    return output
Exemplo n.º 16
0
    def _tf_fft_process(self, tf_input):
        with tf.device('/gpu:0'):
            stft = tf.contrib.signal.stft(
                tf_input,
                frame_length=self.params.fft_length,
                frame_step=self.params.fft_step,
                pad_end=True)  # [channels, frames, ffts]
            stft = tf.reduce_mean(stft, axis=1)  # [channels, ffts]

            mag = tf.abs(stft)
            mag = tf.reduce_mean(tf.contrib.signal.frame(
                mag,
                self.params.fft_average,
                1,
                pad_end=True,
                pad_value=tf.reduce_mean(mag)),
                                 axis=-1)  # Spatial fft average
            # mag = tf.subtract(mag[0], mag[1])

        phase = tf.angle(stft)
        phase = tf.reduce_mean(tf.contrib.signal.frame(
            phase,
            self.params.phase_smooth,
            1,
            pad_end=True,
            pad_value=tf.reduce_mean(phase)),
                               axis=-1)  # Spatial angle average
        # phase = tf.add(phase[0], phase[1])

        return mag, phase
Exemplo n.º 17
0
def convert_to_spectrogram(waveforms, waveform_length, sample_rate,
                           spectrogram_shape, overlap):
    def normalize(inputs, mean, stddev):
        return (inputs - mean) / stddev

    time_steps, num_freq_bins = spectrogram_shape
    frame_length = num_freq_bins * 2
    frame_step = int((1.0 - overlap) * frame_length)
    num_samples = frame_step * (time_steps - 1) + frame_length

    # For Nsynth dataset, we are putting all padding in the front
    # This causes edge effects in the tail
    waveforms = tf.pad(waveforms, [[0, 0], [num_samples - waveform_length, 0]])

    stfts = tf.signal.stft(signals=waveforms,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           window_fn=functools.partial(tf.signal.hann_window,
                                                       periodic=True))
    # discard_dc
    stfts = stfts[..., 1:]

    magnitude_spectrograms = tf.abs(stfts)
    phase_spectrograms = tf.angle(stfts)

    # this matrix can be constant by graph optimization `Constant Folding`
    # since there are no Tensor inputs
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=num_freq_bins,
        num_spectrogram_bins=num_freq_bins,
        sample_rate=sample_rate,
        lower_edge_hertz=0.0,
        upper_edge_hertz=sample_rate / 2.0)
    mel_magnitude_spectrograms = tf.tensordot(magnitude_spectrograms,
                                              linear_to_mel_weight_matrix,
                                              axes=1)
    mel_magnitude_spectrograms.set_shape(
        magnitude_spectrograms.shape[:-1].concatenate(
            linear_to_mel_weight_matrix.shape[-1:]))
    mel_phase_spectrograms = tf.tensordot(phase_spectrograms,
                                          linear_to_mel_weight_matrix,
                                          axes=1)
    mel_phase_spectrograms.set_shape(phase_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    log_mel_magnitude_spectrograms = tf.log(mel_magnitude_spectrograms +
                                            1.0e-6)
    mel_instantaneous_frequencies = instantaneous_frequency(
        mel_phase_spectrograms, axis=-2)

    log_mel_magnitude_spectrograms = normalize(log_mel_magnitude_spectrograms,
                                               -3.76, 10.05)
    mel_instantaneous_frequencies = normalize(mel_instantaneous_frequencies,
                                              0.0, 1.0)

    return log_mel_magnitude_spectrograms, mel_instantaneous_frequencies
Exemplo n.º 18
0
def rect_to_polar(X):
    Z = channels_to_complex(X)
    R = tf.abs(Z)
    THETA = tf.angle(Z)

    if Z.shape[-1] == 1:
        R = tf.squeeze(R, (-1))
        THETA = tf.squeeze(THETA, (-1))

    return tf.stack([R, THETA], axis=-1)
Exemplo n.º 19
0
 def call(self, x):
     if self.use_magnitude:
         y_mag = tf.abs(x)
         y_phase = tf.angle(x)
         y_mag = self.activation(y_mag)
         y = tf.complex(y_mag, 0.) * tf.exp(tf.complex(0., y_phase))
     else:
         y = tf.complex(self.activation(tf.real(x)),
                        self.activation(tf.imag(x)))
     return y
Exemplo n.º 20
0
 def call(self, x, training=None):
     if self.use_magnitude:
         y_mag = tf.abs(x)
         y_phase = tf.angle(x)
         y_mag = self.dropout_real(y_mag)
         y = tf.complex(y_mag, 0.) * tf.exp(tf.complex(0., y_phase))
     else:
         y = tf.complex(self.dropout_real(tf.real(x)),
                        self.dropout_imag(tf.imag(x)))
     return y
Exemplo n.º 21
0
 def add_histogram(cls, name, x):
     if x.dtype == cls.TF_REAL:
         tf.summary.histogram(name, x)
     elif x.dtype == cls.TF_COMPLEX:
         with tf.name_scope(name):
             tf.summary.histogram('amplitude', tf.abs(x))
             tf.summary.histogram('phase', tf.angle(x))
     else:
         raise TypeError('Variable has the unsupported type {}'.format(
             x.dtype))
Exemplo n.º 22
0
def convert_to_spectrograms(waveforms, waveform_length, sample_rate,
                            spectrogram_shape, overlap):
    def normalize(inputs, mean, std):
        return (inputs - mean) / std

    # =========================================================================================
    time_steps, num_freq_bins = spectrogram_shape
    frame_length = num_freq_bins * 2
    frame_step = int((1 - overlap) * frame_length)
    num_samples = frame_step * (time_steps - 1) + frame_length
    # =========================================================================================
    # For Nsynth dataset, we are putting all padding in the front
    # This causes edge effects in the tail
    waveforms = tf.pad(waveforms, [[0, 0], [num_samples - waveform_length, 0]])
    # =========================================================================================
    stfts = tf.signal.stft(signals=waveforms,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           window_fn=functools.partial(tf.signal.hann_window,
                                                       periodic=True))
    # =========================================================================================
    # discard_dc
    stfts = stfts[..., 1:]
    # =========================================================================================
    magnitude_spectrograms = tf.abs(stfts)
    phase_spectrograms = tf.angle(stfts)
    # =========================================================================================
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=num_freq_bins,
        num_spectrogram_bins=num_freq_bins,
        sample_rate=sample_rate,
        lower_edge_hertz=0,
        upper_edge_hertz=sample_rate / 2)
    mel_magnitude_spectrograms = tf.tensordot(magnitude_spectrograms,
                                              linear_to_mel_weight_matrix,
                                              axes=1)
    mel_magnitude_spectrograms.set_shape(
        magnitude_spectrograms.shape[:-1].concatenate(
            linear_to_mel_weight_matrix.shape[-1:]))
    mel_phase_spectrograms = tf.tensordot(phase_spectrograms,
                                          linear_to_mel_weight_matrix,
                                          axes=1)
    mel_phase_spectrograms.set_shape(phase_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))
    # =========================================================================================
    log_mel_magnitude_spectrograms = tf.log(mel_magnitude_spectrograms + 1e-6)
    mel_instantaneous_frequencies = instantaneous_frequency(
        mel_phase_spectrograms)
    # =========================================================================================
    log_mel_magnitude_spectrograms = normalize(log_mel_magnitude_spectrograms,
                                               -4, 10)
    mel_instantaneous_frequencies = normalize(mel_instantaneous_frequencies, 0,
                                              1)
    # =========================================================================================
    return log_mel_magnitude_spectrograms, mel_instantaneous_frequencies
Exemplo n.º 23
0
def sumofsq(image_in, keep_dims=False, axis=-1, name="sumofsq", type="mag"):
    """Compute square root of sum of squares."""
    with tf.variable_scope(name):
        if type == "mag":
            image_out = tf.square(tf.abs(image_in))
        else:
            image_out = tf.square(tf.angle(image_in))
        image_out = tf.reduce_sum(image_out, keep_dims=keep_dims, axis=axis)
        image_out = tf.sqrt(image_out)

    return image_out
 def __SFKernel__(self, mycount, myimage, myimage_fft_mod):
     myimage = 2. * (self._support * myimage) - myimage
     myimage = tf.ifft3d(
         tf.multiply(
             self._modulus,
             tf.exp(
                 tf.complex(tf.zeros(myimage.shape),
                            tf.angle(tf.fft3d(myimage))))))
     myimage = 2. * (self._support * myimage) - myimage
     mycount -= 1
     return mycount, myimage, myimage_fft_mod
Exemplo n.º 25
0
 def enhanced_sources(self):
     if self._enhanced_sources is None:
         mixed_mag_specs = tf.abs(self.mixed_specs)**0.3
         masked_mag_specs = (mixed_mag_specs *
                             tf.cast(self.prediction, tf.float32))**(1 /
                                                                     0.3)
         self._enhanced_sources = get_sources(
             masked_mag_specs,
             tf.angle(self.mixed_specs),
             num_samples=self.num_audio_samples)
     return tf.identity(self._enhanced_sources, name='enhanced_sources')
Exemplo n.º 26
0
def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']):
    # magnitude_spec: [frames, fft_bins], of type tf.float32
    angles = tf.cast(tf.exp(2j * np.pi * tf.cast(
        tf.random_uniform(tf.shape(magnitude_spec)), dtype=tf.complex64)),
                     dtype=tf.complex64)
    complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64)
    stft_0 = complex_mag * angles
    y = istft_tf(stft_0)
    for i in range(iterations):
        angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64))
        y = istft_tf(complex_mag * angles)
    return y
Exemplo n.º 27
0
 def call(self, feature_sP, training):
     '''
 return [batch, T, F]->complex
 '''
     out = feature_sP
     for layer_fn in self._layers:
         out = layer_fn(out)
     # out: [batch, T, F, 2]
     out_complex = tf.complex(out[..., 0], out[..., 1])
     out_angle = tf.angle(out_complex)
     normed_out = tf.exp(tf.complex(0.0, out_angle))
     return normed_out
 def __ERKernel__(self, mycount, myimage, myimage_fft_mod):
     myimage = tf.ifft3d(
         tf.multiply(
             self._modulus,
             tf.exp(
                 tf.complex(tf.zeros(myimage.shape),
                            tf.angle(tf.fft3d(myimage))))))
     myimage = tf.multiply(myimage, self._support)
     myimage_fft_mod = tf.cast(tf.abs(tf.fft3d(myimage)),
                               dtype=tf.complex64)
     mycount -= 1
     return mycount, myimage, myimage_fft_mod
 def __HIOKernel__(self, mycount, myimage, myimage_fft_mod):
     origImage = tf.identity(myimage)
     myimage = tf.ifft3d(
         tf.multiply(
             self._modulus,
             tf.exp(
                 tf.complex(tf.zeros(myimage.shape),
                            tf.angle(tf.fft3d(myimage))))))
     myimage = tf.multiply(self._support, myimage) + tf.multiply(
         self._support_comp, origImage - self._beta * myimage)
     mycount -= 1
     return mycount, myimage, myimage_fft_mod
Exemplo n.º 30
0
def preprocess(x):
    specgram = signal.stft(
        x,
        400,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        160,  # 16000 * 0.010 -- default stride
    )
    # specgram is a complex tensor, so split it into abs and phase parts:
    phase = tf.angle(specgram) / np.pi
    # log(1 + abs) is a default transformation for energy units
    amp = tf.log1p(tf.abs(specgram))
    x2 = tf.stack([amp, phase], axis=3)  # shape is [bs, time, freq_bins, 2]
    x2 = tf.to_float(x2)
    return x2
Exemplo n.º 31
0
  def stfts_to_specgrams(self, stfts):
    """Converts stfts to specgrams.

    Args:
      stfts: Complex64 tensor of stft, shape [batch, time, freq, 1].

    Returns:
      specgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2].
    """
    stfts = stfts[:, :, :, 0]

    logmag = self._safe_log(tf.abs(stfts))

    phase_angle = tf.angle(stfts)
    if self._ifreq:
      p = spectral_ops.instantaneous_frequency(phase_angle)
    else:
      p = phase_angle / np.pi

    return tf.concat(
        [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)