def build_audio_D(self): # load pretrain weights audio_input = Input(batch_shape=(self.batch_size, self.audio_len), name='audio_input') spectro = logMelSpectrogram(sample_rate=self.audio_sr)(audio_input) # Block 1 x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv1')(spectro) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(x) # Block 2 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv2')(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(x) # Block 3 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv3/conv3_1')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv3/conv3_2')(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(x) # Block 4 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv4/conv4_1')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv4/conv4_2')(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(x) fea = Flatten(name='flatten_')(x) #x = Dense(4096, name='fc1')(x) #x = LeakyReLU(alpha=0.2)(x) #x = Dense(self.audio_emb_dim, name='embeddings')(x) #fea = LeakyReLU(alpha=0.2)(x) valid = Dense(1, activation='sigmoid', name='predictions')(fea) labels = Dense(self.classes + 1, activation='softmax')(fea) audio_D = Model(inputs=audio_input, outputs=[valid, labels]) return audio_D
def build_audio_C(self): img_input = Input(batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels), name='img_input') x = Encoding_layer(name='vOICe')(img_input) spectro = logMelSpectrogram(name='logSpectrogram')(x) # Block 1 x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv1')(spectro) #x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(x) # Block 2 x = Conv2D(128, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv2')(x) #x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(x) # Block 3 x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_1')(x) #x = LeakyReLU(alpha=0.2)(x) x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_2')(x) #x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(x) # Block 4 x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_1')(x) #x = LeakyReLU(alpha=0.2)(x) x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_2')(x) #x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(x) x = Flatten(name='flatten_')(x) x = Dense(4096, activation='relu', name='fc1')(x) embeddings = Dense(self.audio_emb_dim, activation='relu', name='embeddings')(x) predicts = Dense(self.classes, activation='softmax', name='prediction')(embeddings) audio_model = Model(inputs=img_input, outputs=predicts) return audio_model