def build_audio_D(self):
        # load pretrain weights

        audio_input = Input(batch_shape=(self.batch_size, self.audio_len),
                            name='audio_input')
        spectro = logMelSpectrogram(sample_rate=self.audio_sr)(audio_input)

        # Block 1
        x = Conv2D(64, (3, 3), strides=(1, 1), padding='same',
                   name='conv1')(spectro)
        x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool1')(x)

        # Block 2
        x = Conv2D(128, (3, 3), strides=(1, 1), padding='same',
                   name='conv2')(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool2')(x)

        # Block 3
        x = Conv2D(256, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv3/conv3_1')(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(256, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv3/conv3_2')(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool3')(x)

        # Block 4
        x = Conv2D(512, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv4/conv4_1')(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(512, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv4/conv4_2')(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                         name='pool4')(x)

        fea = Flatten(name='flatten_')(x)
        #x = Dense(4096, name='fc1')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        #x = Dense(self.audio_emb_dim, name='embeddings')(x)
        #fea = LeakyReLU(alpha=0.2)(x)

        valid = Dense(1, activation='sigmoid', name='predictions')(fea)
        labels = Dense(self.classes + 1, activation='softmax')(fea)

        audio_D = Model(inputs=audio_input, outputs=[valid, labels])
        return audio_D
    def build_audio_C(self):

        img_input = Input(batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels),
                          name='img_input')

        x = Encoding_layer(name='vOICe')(img_input)
        spectro = logMelSpectrogram(name='logSpectrogram')(x)

        # Block 1
        x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu',  padding='same', name='conv1')(spectro)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(x)

        # Block 2
        x = Conv2D(128, (3, 3), strides=(1, 1), activation='relu',  padding='same', name='conv2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(x)

        # Block 3
        x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_1')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(x)

        # Block 4
        x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_1')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(x)


        x = Flatten(name='flatten_')(x)
        x = Dense(4096, activation='relu', name='fc1')(x)
        embeddings = Dense(self.audio_emb_dim, activation='relu', name='embeddings')(x)
        predicts = Dense(self.classes, activation='softmax', name='prediction')(embeddings)

        audio_model = Model(inputs=img_input, outputs=predicts)

        return audio_model