Пример #1
0
def test_save_load():
    """test saving/loading of models that has stft, melspectorgrma, and log frequency."""

    src_mono, batch_src, input_shape = get_audio(data_format='channels_last',
                                                 n_ch=1)
    # test STFT save/load
    save_load_compare(STFT(input_shape=input_shape, pad_begin=True), batch_src,
                      allclose_complex_numbers)
    # test melspectrogram save/load
    save_load_compare(
        get_melspectrogram_layer(input_shape=input_shape, return_decibel=True),
        batch_src,
        np.testing.assert_allclose,
    )
    # test log frequency spectrogram save/load
    save_load_compare(
        get_log_frequency_spectrogram_layer(input_shape=input_shape,
                                            return_decibel=True),
        batch_src,
        np.testing.assert_allclose,
    )
    # test stft_mag_phase
    save_load_compare(
        get_stft_mag_phase(input_shape=input_shape, return_decibel=True),
        batch_src,
        np.testing.assert_allclose,
    )
    # test stft mag
    save_load_compare(get_stft_magnitude_layer(input_shape=input_shape),
                      batch_src, np.testing.assert_allclose)
Пример #2
0
 def _get_stft_model(following_layer=None, tflite_compatible=False):
     # compute with kapre
     stft_model = tensorflow.keras.models.Sequential()
     if tflite_compatible:
         stft_model.add(
             STFTTflite(
                 n_fft=n_fft,
                 win_length=win_length,
                 hop_length=hop_length,
                 window_name=None,
                 pad_end=pad_end,
                 input_data_format=data_format,
                 output_data_format=data_format,
                 input_shape=input_shape,
                 name='stft',
             )
         )
     else:
         stft_model.add(
             STFT(
                 n_fft=n_fft,
                 win_length=win_length,
                 hop_length=hop_length,
                 window_name=None,
                 pad_end=pad_end,
                 input_data_format=data_format,
                 output_data_format=data_format,
                 input_shape=input_shape,
                 name='stft',
             )
         )
     if following_layer is not None:
         stft_model.add(following_layer)
     return stft_model
def model_cnn_spec(timewindow, nfft, hop_length=4):
    """build very base CNN model on top of spectrogram.
    :returns: keras model object
    """
    # std_dev_input = 0.001
    inputs = keras.Input(shape=(timewindow, 3))
    x = STFT(n_fft=nfft,
             window_name=None,
             pad_end=False,
             hop_length=hop_length,
             input_data_format='channels_last',
             output_data_format='channels_last',)(inputs)
    x = Magnitude()(x)
    x = MagnitudeToDecibel()(x)
    x = MaxABSScaler()(x)
    #x = tf.keras.layers.Lambda(lambda image: tf.image.resize(image, (60,60)))(x)
    x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.activations.relu(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x)
    x = tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.activations.relu(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x)
    x = tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.activations.relu(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x)
    x = tf.keras.layers.Flatten()(x)
    x = layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(80, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(3, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model
Пример #4
0
def test_save_load(save_format):
    """test saving/loading of models that has stft, melspectorgrma, and log frequency."""

    src_mono, batch_src, input_shape = get_audio(data_format='channels_last',
                                                 n_ch=1)
    # test STFT save/load
    save_load_compare(
        STFT(input_shape=input_shape, pad_begin=True),
        batch_src,
        allclose_complex_numbers,
        save_format,
        STFT,
    )

    # test ConcatenateFrequencyMap
    specs_batch = np.random.randn(2, 3, 5, 4).astype(np.float32)
    save_load_compare(
        ConcatenateFrequencyMap(input_shape=specs_batch.shape[1:]),
        specs_batch,
        np.testing.assert_allclose,
        save_format,
        ConcatenateFrequencyMap,
    )

    if save_format == 'tf':
        # test melspectrogram save/load
        save_load_compare(
            get_melspectrogram_layer(input_shape=input_shape,
                                     return_decibel=True),
            batch_src,
            np.testing.assert_allclose,
            save_format,
        )
        # test log frequency spectrogram save/load
        save_load_compare(
            get_log_frequency_spectrogram_layer(input_shape=input_shape,
                                                return_decibel=True),
            batch_src,
            np.testing.assert_allclose,
            save_format,
        )
        # test stft_mag_phase
        save_load_compare(
            get_stft_mag_phase(input_shape=input_shape, return_decibel=True),
            batch_src,
            np.testing.assert_allclose,
            save_format,
        )
        # test stft mag
        save_load_compare(
            get_stft_magnitude_layer(input_shape=input_shape),
            batch_src,
            np.testing.assert_allclose,
            save_format,
        )
Пример #5
0
    def __init__(
        self,
        sample_rate=16000,
        n_fft=512,
        win_length=None,
        hop_length=None,
        pad=0,
        power=2,
        normalized=False,
        n_harmonic=6,
        semitone_scale=2,
        bw_Q=1.0,
        learn_bw=None,
    ):
        super(HarmonicSTFT, self).__init__()

        # Parameters
        self.sample_rate = sample_rate
        self.n_harmonic = n_harmonic
        self.bw_alpha = 0.1079
        self.bw_beta = 24.7
        self.win_length = win_length
        self.hop_length = hop_length
        self.n_fft = n_fft
        self.fft_bins = tf.linspace(0, self.sample_rate // 2, self.n_fft // 2 + 1)
        
        # Spectrogram
        self.stft = STFT(
            n_fft=self.n_fft,
            win_length=self.win_length,
            hop_length=n_fft//2,
            window_name="hann_window",
            input_shape=(80000, 1),
            pad_begin=True,
        )
        self.magnitude = Magnitude()
        self.to_decibel = MagnitudeToDecibel()
        self.zero = tf.zeros([1,])

        # Initialize the filterbank. Equally spaced in MIDI scale.
        harmonic_hz, self.level = initialize_filterbank(
            sample_rate, n_harmonic, semitone_scale
        )

        # Center frequncies to tensor
        self.f0 = tf.constant(harmonic_hz, dtype="float32")
        
        # Bandwidth parameters
        if learn_bw == 'only_Q':
            self.bw_Q = tf.Variable(np.array([bw_Q]), dtype="float32", trainable=True)
        elif learn_bw == 'fix':
            self.bw_Q = tf.constant(np.array([bw_Q]), dtype="float32")
def seismo_performer_with_spec(
        maxlen=400,
        nfft=64,
        hop_length=16,
        patch_size_1=22,
        patch_size_2=3,
        num_channels=3,
        num_patches=11,
        d_model=48,
        num_heads=2,
        ff_dim_factor=2,
        layers_depth=2,
        num_classes=3,
        drop_out_rate=0.1):
    """
    The model for P/S/N waves classification using ViT approach
    with converted raw signal to spectrogram and the treat it as input to PERFORMER
    Parameters:
    :maxlen: maximum samples of waveforms
    :nfft: number of FFTs in short-time Fourier transform
    :hop_length: Hop length in sample between analysis windows
    :patch_size_1: patch size for first dimention (depends on nfft/hop_length)
    :patch_size_2: patch size for second dimention (depends on nfft/hop_length)
    :num_channels: number of channels (usually it's equal to 3)
    :num_patches: resulting number of patches (FIX manual setup!)
    :d_model: Embedding size for each token
    :num_heads: Number of attention heads
    :ff_dim_factor: Hidden layer size in feed forward network inside transformer
                    ff_dim = d_model * ff_dim_factor
    :layers_depth: The number of transformer blocks
    :num_classes: The number of classes to predict
    :returns: Keras model object
    """
    num_patches = num_patches
    ff_dim = d_model * ff_dim_factor
    inputs = layers.Input(shape=(maxlen, num_channels))
    # do transform
    x = STFT(n_fft=nfft,
             window_name=None,
             pad_end=False,
             hop_length=hop_length,
             input_data_format='channels_last',
             output_data_format='channels_last',)(inputs)
    x = Magnitude()(x)
    x = MagnitudeToDecibel()(x)
    # custom normalization
    x = MaxABSScaler()(x)
    # patch the input channel
    x = Rearrange3d(p1=patch_size_1,p2=patch_size_2)(x)
    # embedding
    x = tf.keras.layers.Dense(d_model)(x)
    # add cls token
    x = ClsToken(d_model)(x)
    # positional embeddings
    x = PosEmbeding2(num_patches=num_patches + 1, projection_dim=d_model)(x)
    # encoder block
    for i in range(layers_depth):
        x = PerformerBlock(d_model, num_heads, ff_dim, rate=drop_out_rate)(x)
    # to MLP head
    x = tf.keras.layers.Lambda(lambda x: x[:, 0])(x)
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
    # MLP-head
    x = layers.Dropout(drop_out_rate)(x)
    x = tf.keras.layers.Dense(d_model*ff_dim_factor, activation='gelu')(x)
    x = layers.Dropout(drop_out_rate)(x)
    x = tf.keras.layers.Dense(d_model, activation='gelu')(x)
    x = layers.Dropout(drop_out_rate)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model