def test_save_load(): """test saving/loading of models that has stft, melspectorgrma, and log frequency.""" src_mono, batch_src, input_shape = get_audio(data_format='channels_last', n_ch=1) # test STFT save/load save_load_compare(STFT(input_shape=input_shape, pad_begin=True), batch_src, allclose_complex_numbers) # test melspectrogram save/load save_load_compare( get_melspectrogram_layer(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, ) # test log frequency spectrogram save/load save_load_compare( get_log_frequency_spectrogram_layer(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, ) # test stft_mag_phase save_load_compare( get_stft_mag_phase(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, ) # test stft mag save_load_compare(get_stft_magnitude_layer(input_shape=input_shape), batch_src, np.testing.assert_allclose)
def _get_stft_model(following_layer=None, tflite_compatible=False): # compute with kapre stft_model = tensorflow.keras.models.Sequential() if tflite_compatible: stft_model.add( STFTTflite( n_fft=n_fft, win_length=win_length, hop_length=hop_length, window_name=None, pad_end=pad_end, input_data_format=data_format, output_data_format=data_format, input_shape=input_shape, name='stft', ) ) else: stft_model.add( STFT( n_fft=n_fft, win_length=win_length, hop_length=hop_length, window_name=None, pad_end=pad_end, input_data_format=data_format, output_data_format=data_format, input_shape=input_shape, name='stft', ) ) if following_layer is not None: stft_model.add(following_layer) return stft_model
def model_cnn_spec(timewindow, nfft, hop_length=4): """build very base CNN model on top of spectrogram. :returns: keras model object """ # std_dev_input = 0.001 inputs = keras.Input(shape=(timewindow, 3)) x = STFT(n_fft=nfft, window_name=None, pad_end=False, hop_length=hop_length, input_data_format='channels_last', output_data_format='channels_last',)(inputs) x = Magnitude()(x) x = MagnitudeToDecibel()(x) x = MaxABSScaler()(x) #x = tf.keras.layers.Lambda(lambda image: tf.image.resize(image, (60,60)))(x) x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same")(x) x = tf.keras.layers.BatchNormalization()(x) x = tf.keras.activations.relu(x) x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x) x = tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same")(x) x = tf.keras.layers.BatchNormalization()(x) x = tf.keras.activations.relu(x) x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x) x = tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same")(x) x = tf.keras.layers.BatchNormalization()(x) x = tf.keras.activations.relu(x) x = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(x) x = tf.keras.layers.Flatten()(x) x = layers.Dropout(0.5)(x) x = tf.keras.layers.Dense(80, activation="relu")(x) x = layers.Dropout(0.5)(x) outputs = tf.keras.layers.Dense(3, activation="softmax")(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) return model
def test_save_load(save_format): """test saving/loading of models that has stft, melspectorgrma, and log frequency.""" src_mono, batch_src, input_shape = get_audio(data_format='channels_last', n_ch=1) # test STFT save/load save_load_compare( STFT(input_shape=input_shape, pad_begin=True), batch_src, allclose_complex_numbers, save_format, STFT, ) # test ConcatenateFrequencyMap specs_batch = np.random.randn(2, 3, 5, 4).astype(np.float32) save_load_compare( ConcatenateFrequencyMap(input_shape=specs_batch.shape[1:]), specs_batch, np.testing.assert_allclose, save_format, ConcatenateFrequencyMap, ) if save_format == 'tf': # test melspectrogram save/load save_load_compare( get_melspectrogram_layer(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, save_format, ) # test log frequency spectrogram save/load save_load_compare( get_log_frequency_spectrogram_layer(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, save_format, ) # test stft_mag_phase save_load_compare( get_stft_mag_phase(input_shape=input_shape, return_decibel=True), batch_src, np.testing.assert_allclose, save_format, ) # test stft mag save_load_compare( get_stft_magnitude_layer(input_shape=input_shape), batch_src, np.testing.assert_allclose, save_format, )
def __init__( self, sample_rate=16000, n_fft=512, win_length=None, hop_length=None, pad=0, power=2, normalized=False, n_harmonic=6, semitone_scale=2, bw_Q=1.0, learn_bw=None, ): super(HarmonicSTFT, self).__init__() # Parameters self.sample_rate = sample_rate self.n_harmonic = n_harmonic self.bw_alpha = 0.1079 self.bw_beta = 24.7 self.win_length = win_length self.hop_length = hop_length self.n_fft = n_fft self.fft_bins = tf.linspace(0, self.sample_rate // 2, self.n_fft // 2 + 1) # Spectrogram self.stft = STFT( n_fft=self.n_fft, win_length=self.win_length, hop_length=n_fft//2, window_name="hann_window", input_shape=(80000, 1), pad_begin=True, ) self.magnitude = Magnitude() self.to_decibel = MagnitudeToDecibel() self.zero = tf.zeros([1,]) # Initialize the filterbank. Equally spaced in MIDI scale. harmonic_hz, self.level = initialize_filterbank( sample_rate, n_harmonic, semitone_scale ) # Center frequncies to tensor self.f0 = tf.constant(harmonic_hz, dtype="float32") # Bandwidth parameters if learn_bw == 'only_Q': self.bw_Q = tf.Variable(np.array([bw_Q]), dtype="float32", trainable=True) elif learn_bw == 'fix': self.bw_Q = tf.constant(np.array([bw_Q]), dtype="float32")
def seismo_performer_with_spec( maxlen=400, nfft=64, hop_length=16, patch_size_1=22, patch_size_2=3, num_channels=3, num_patches=11, d_model=48, num_heads=2, ff_dim_factor=2, layers_depth=2, num_classes=3, drop_out_rate=0.1): """ The model for P/S/N waves classification using ViT approach with converted raw signal to spectrogram and the treat it as input to PERFORMER Parameters: :maxlen: maximum samples of waveforms :nfft: number of FFTs in short-time Fourier transform :hop_length: Hop length in sample between analysis windows :patch_size_1: patch size for first dimention (depends on nfft/hop_length) :patch_size_2: patch size for second dimention (depends on nfft/hop_length) :num_channels: number of channels (usually it's equal to 3) :num_patches: resulting number of patches (FIX manual setup!) :d_model: Embedding size for each token :num_heads: Number of attention heads :ff_dim_factor: Hidden layer size in feed forward network inside transformer ff_dim = d_model * ff_dim_factor :layers_depth: The number of transformer blocks :num_classes: The number of classes to predict :returns: Keras model object """ num_patches = num_patches ff_dim = d_model * ff_dim_factor inputs = layers.Input(shape=(maxlen, num_channels)) # do transform x = STFT(n_fft=nfft, window_name=None, pad_end=False, hop_length=hop_length, input_data_format='channels_last', output_data_format='channels_last',)(inputs) x = Magnitude()(x) x = MagnitudeToDecibel()(x) # custom normalization x = MaxABSScaler()(x) # patch the input channel x = Rearrange3d(p1=patch_size_1,p2=patch_size_2)(x) # embedding x = tf.keras.layers.Dense(d_model)(x) # add cls token x = ClsToken(d_model)(x) # positional embeddings x = PosEmbeding2(num_patches=num_patches + 1, projection_dim=d_model)(x) # encoder block for i in range(layers_depth): x = PerformerBlock(d_model, num_heads, ff_dim, rate=drop_out_rate)(x) # to MLP head x = tf.keras.layers.Lambda(lambda x: x[:, 0])(x) x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) # MLP-head x = layers.Dropout(drop_out_rate)(x) x = tf.keras.layers.Dense(d_model*ff_dim_factor, activation='gelu')(x) x = layers.Dropout(drop_out_rate)(x) x = tf.keras.layers.Dense(d_model, activation='gelu')(x) x = layers.Dropout(drop_out_rate)(x) outputs = layers.Dense(num_classes, activation='softmax')(x) model = keras.Model(inputs=inputs, outputs=outputs) return model