def build_model(self): s_input = Input(self.obs_shape) prob_old_input = Input([]) action_old_input = Input([], dtype='int32') gae_input = Input([]) v_target_input = Input([]) feature = Feature() x = feature(s_input) policy_dense = Dense(self.act_n, activation='softmax') value_dense = Dense(1) prob = policy_dense(x) v = value_dense(x) policy = Model(inputs=s_input, outputs=prob) value = Model(inputs=s_input, outputs=v) prob_cur = tf.gather(prob, action_old_input, batch_dims=1) ratio = prob_cur / (prob_old_input + 1e-3) surr1 = ratio * gae_input surr2 = K.clip(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * gae_input # 第二项为熵值计算,由于已经按照动作概率采样,因此计算时不再乘上概率,并且只需要计算当前动作概率的对数 policy_loss = -K.mean(K.minimum(surr1, surr2)) + K.mean( K.log(prob_cur + 1e-3)) * self.entropy_coef value_loss = K.mean((v[:, 0] - v_target_input)**2) loss = policy_loss + value_loss train_model = Model(inputs=[ s_input, prob_old_input, action_old_input, gae_input, v_target_input ], outputs=loss) train_model.add_loss(loss) train_model.compile(tf.keras.optimizers.Adam(self.lr)) return policy, value, train_model
def build(self, hidden_layers=[16], activations=['relu'], dropout=0.5, learning_rate=0.01, l2_norm=5e-4, p1=1.4, p2=0.7, epsilon=0.01): with self.device: x = Input(batch_shape=[self.n_nodes, self.n_features], dtype=tf.float32, name='features') adj = Input(batch_shape=[self.n_nodes, self.n_nodes], dtype=tf.float32, sparse=True, name='adj_matrix') index = Input(batch_shape=[None], dtype=tf.int32, name='index') self.GCN_layers = [GraphConvolution(hidden_layers[0], activation=activations[0], kernel_regularizer=regularizers.l2(l2_norm)), GraphConvolution(self.n_classes)] self.dropout_layer = Dropout(rate=dropout) logit = self.propagation(x, adj) logit = tf.ensure_shape(logit, (self.n_nodes, self.n_classes)) output = tf.gather(logit, index) output = Softmax()(output) model = Model(inputs=[x, adj, index], outputs=output) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy']) entropy_loss = entropy_y_x(logit) vat_loss = self.virtual_adversarial_loss(x, adj, logit, epsilon) model.add_loss(p1 * vat_loss + p2 * entropy_loss) self.model = model self.adv_optimizer = Adam(lr=learning_rate/10) self.built = True
def build_model(hp): params = hp.copy() params['e_dim'] = params['dim'] params['r_dim'] = params['dim'] params['name'] = 'embedding_model' embedding_model = models[params['embedding_model']] embedding_model = embedding_model(**params) triple = Input((3, )) ftriple = Input((3, )) inputs = [triple, ftriple] score = embedding_model(triple) fscore = embedding_model(ftriple) loss_function = loss_function_lookup(params['loss_function']) loss = loss_function(score, fscore, params['margin'] or 1, 1) model = Model(inputs=inputs, outputs=loss) model.add_loss(loss) model.compile(optimizer=Adam(learning_rate=ExponentialDecay( params['learning_rate'], decay_steps=100000, decay_rate=0.96)), loss=None) return model
def create_model(trainable=False): model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, alpha=ALPHA, weights="imagenet") for layer in model.layers: layer.trainable = trainable block = model.get_layer("block_16_project_BN").output x = Conv2D(112, padding="same", kernel_size=3, strides=1, activation="relu")(block) x = Conv2D(112, padding="same", kernel_size=3, strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(5, padding="same", kernel_size=1, activation="sigmoid")(x) model = Model(inputs=model.input, outputs=x) # divide by 2 since d/dweight learning_rate * weight^2 = 2 * learning_rate * weight # see https://arxiv.org/pdf/1711.05101.pdf regularizer = l2(WEIGHT_DECAY / 2) for weight in model.trainable_weights: with tf.keras.backend.name_scope("weight_regularizer"): model.add_loss(regularizer(weight)) return model
def build(self, hiddens=[16], activations=['relu'], dropout=0.5, l2_norm=5e-4, use_bias=False, lr=0.01, p1=1.4, p2=0.7): if self.kind == "P": raise RuntimeError( f"Currently {self.name} only supports for tensorflow backend.") with tf.device(self.device): x = Input(batch_shape=[None, self.graph.n_attrs], dtype=self.floatx, name='attr_matrix') adj = Input(batch_shape=[None, None], dtype=self.floatx, sparse=True, name='adj_matrix') index = Input(batch_shape=[None], dtype=self.intx, name='node_index') GCN_layers = [] for hidden, activation in zip(hiddens, activations): GCN_layers.append( GraphConvolution( hidden, activation=activation, use_bias=use_bias, kernel_regularizer=regularizers.l2(l2_norm))) GCN_layers.append( GraphConvolution(self.graph.n_classes, use_bias=use_bias)) self.GCN_layers = GCN_layers self.dropout = Dropout(rate=dropout) logit = self.forward(x, adj) output = Gather()([logit, index]) model = Model(inputs=[x, adj, index], outputs=output) model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer=Adam(lr=lr), metrics=['accuracy']) self.r_vadv = tf.Variable(TruncatedNormal(stddev=0.01)( shape=[self.graph.n_nodes, self.graph.n_attrs]), name="r_vadv") entropy_loss = entropy_y_x(logit) vat_loss = self.virtual_adversarial_loss(x, adj, logit) model.add_loss(p1 * vat_loss + p2 * entropy_loss) self.model = model self.adv_optimizer = Adam(lr=lr / 10)
def build(self, hidden_layers=[64], activations=['relu'], use_bias=False, dropout=0.6, learning_rate=0.01, l2_norm=1e-4, para_kl=5e-4, gamma=1.0): x = Input(batch_shape=[self.n_nodes, self.n_features], dtype=tf.float32, name='features') adj = [ Input(batch_shape=[self.n_nodes, self.n_nodes], dtype=tf.float32, sparse=True, name='adj_matrix_1'), Input(batch_shape=[self.n_nodes, self.n_nodes], dtype=tf.float32, sparse=True, name='adj_matrix_2') ] index = Input(batch_shape=[None], dtype=tf.int32, name='index') h = Dropout(rate=dropout)(x) h, KL_divergence = GaussionConvolution_F( hidden_layers[0], gamma=gamma, use_bias=use_bias, activation=activations[0], kernel_regularizer=regularizers.l2(l2_norm))([h, *adj]) # additional layers (usually unnecessay) for hid, activation in zip(hidden_layers[1:], activations[1:]): h = Dropout(rate=dropout)(h) h = GaussionConvolution_D(hid, gamma=gamma, use_bias=use_bias, activation=activation)([h, *adj]) h = Dropout(rate=dropout)(h) h = GaussionConvolution_D(self.n_classes, gamma=gamma, use_bias=use_bias)([h, *adj]) h = tf.ensure_shape(h, [self.n_nodes, self.n_classes]) h = tf.gather(h, index) output = Softmax()(h) model = Model(inputs=[x, *adj, index], outputs=output) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy']) model.add_loss(para_kl * KL_divergence) self.model = model self.built = True
def _build_model(self, training=True): inputs = Input(shape=(self.n_features_, )) x = Dense(self.hidden_neurons_[0], use_bias=False, activation=self.hidden_activation, activity_regularizer=l2(self.l2_regularizer))(inputs) for hidden_neurons in self.hidden_neurons_[1:-1]: x = Dense(hidden_neurons, use_bias=False, activation=self.hidden_activation, activity_regularizer=l2(self.l2_regularizer))(x) x = Dropout(self.dropout_rate)(x) # add name to last hidden layer x = Dense(self.hidden_neurons_[-1], use_bias=False, activation=self.hidden_activation, activity_regularizer=l2(self.l2_regularizer), name='net_output')(x) # build distance loss dist = tf.math.reduce_sum((x - self.c)**2, axis=-1) outputs = dist loss = tf.math.reduce_mean(dist) # Instantiate Deep SVDD dsvd = Model(inputs, outputs) # Weight decay w_d = 1e-6 * sum([np.linalg.norm(w) for w in dsvd.get_weights()]) # Use AutoEncoder version of DeepSVDD if self.use_ae: for reversed_neurons in self.hidden_neurons_[::-1]: x = Dense(reversed_neurons, use_bias=False, activation=self.hidden_activation, activity_regularizer=l2(self.l2_regularizer))(x) x = Dropout(self.dropout_rate)(x) x = Dense(self.n_features_, use_bias=False, activation=self.output_activation, activity_regularizer=l2(self.l2_regularizer))(x) dsvd.add_loss(loss + tf.math.reduce_mean(tf.math.square(x - inputs)) + w_d) else: dsvd.add_loss(loss + w_d) dsvd.compile(optimizer=self.optimizer) if self.verbose >= 1 and training: print(dsvd.summary()) return dsvd
def build(self, hiddens=[16], activations=['relu'], dropout=0., lr=0.01, l2_norm=5e-4, p1=1.4, p2=0.7, use_bias=False, epsilon=0.01): with tf.device(self.device): x = Input(batch_shape=[None, self.graph.n_attrs], dtype=self.floatx, name='attr_matrix') adj = Input(batch_shape=[None, None], dtype=self.floatx, sparse=True, name='adj_matrix') index = Input(batch_shape=[None], dtype=self.intx, name='node_index') GCN_layers = [] dropout_layers = [] for hidden, activation in zip(hiddens, activations): GCN_layers.append( GraphConvolution( hidden, activation=activation, use_bias=use_bias, kernel_regularizer=regularizers.l2(l2_norm))) dropout_layers.append(Dropout(rate=dropout)) GCN_layers.append( GraphConvolution(self.graph.n_classes, use_bias=use_bias)) self.GCN_layers = GCN_layers self.dropout_layers = dropout_layers logit = self.forward(x, adj) output = Gather()([logit, index]) model = Model(inputs=[x, adj, index], outputs=output) model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer=Adam(lr=lr), metrics=['accuracy']) entropy_loss = entropy_y_x(logit) vat_loss = self.virtual_adversarial_loss(x, adj, logit, epsilon) model.add_loss(p1 * vat_loss + p2 * entropy_loss) self.model = model self.adv_optimizer = Adam(lr=lr / 10)
def VAE_2(): #,batch_size= _BatchSize # inputs = Input(shape=(28*28), name='encoder_input',batch_size= _BatchSize) # 使用方法3时 指定batch_size inputs = Input(shape=(28 * 28), name='encoder_input') x = layers.Dense(128, activation='relu')(inputs) z_mean = layers.Dense(2, name='z_mean')(x) z_log_var = layers.Dense(2, name='z_log_var')(x) # 方法1: 直接把采样嵌入到模型中! # 1. 设定一个正太分布 eps = tf.random.normal((tf.shape(z_mean)[0],tf.shape(z_mean)[1])) # 2. 获得标准方差 std = tf.exp(z_log_var) # 3. 通过元素乘法进行采样 Sample_Z = layers.Add()([z_mean, layers.Multiply()([eps, std])]) # 方法2: 使用匿名函数Lambda配合sampling函数对层中每一个元素都进行操作 # Sample_Z = layers.Lambda(sampling, name='z')([z_mean, z_log_var]) # 方法3: 自定义子类: 抽样层,但是此法和嵌入模型中没有区别,注意 使用此法 需要在两个Input函数中指定 batchsize = _BatchSize # Sample_Z = Sample(z_log_var)(z_mean,z_log_var) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, Sample_Z], name='encoder') # encoder.summary() # build decoder model # latent_inputs = Input(shape=(2), name='z_sampling',batch_size= _BatchSize) # 使用方法3时指定batch_size latent_inputs = Input(shape=(2), name='z_sampling') x = layers.Dense(128, activation='relu')(latent_inputs) outputs = layers.Dense(28*28, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') # decoder.summary() # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs = inputs, outputs = outputs, name='vae_mlp') # 加入loss reconstruction_loss = tf.keras.losses.BinaryCrossentropy(reduction='sum',name='binary_crossentropy')(inputs, outputs) kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) kl_loss =-0.5 * tf.reduce_mean(kl_loss) # 如果这里的 kl_loss =-0.5 * tf.reduce_sum(kl_loss) 那么就会发生和第一个里面一样的错误 vae.add_loss(kl_loss) vae.add_metric(kl_loss, name='kl_loss',aggregation='mean') vae.add_loss(reconstruction_loss) vae.add_metric(reconstruction_loss, name='mse_loss',aggregation='mean') return vae,encoder,decoder
def ss(self, args, ripple_set): #super(RippleNet,self).__init__() #self._parse_args(args,ripple_set) self._build_embeddings() self._build_inputs() self._build_model() self._build_loss() print('self.user_id_shape', self.user_id.shape) model = Model(inputs=[self.user_id, self.item_id], outputs=self.score) model.add_loss(self.l2.l2 * self.l2_loss) model.add_loss(self.kge_weight * -self.kge_loss) return model
def build_RaGAN(self): def interpolating(x): u = K.random_uniform((K.shape(x[0])[0], ) + (1, ) * (K.ndim(x[0]) - 1)) return x[0] * u + x[1] * (1 - u) def comput_loss(x): real, fake = x fake_logit = fake - K.mean(real) # fake_logit = K.sigmoid(fake - K.mean(real)) real_logit = real - K.mean(fake) # real_logit = K.sigmoid(real - K.mean(fake)) return [fake_logit, real_logit] # Input HR images imgs_hr = Input(self.shape_hr) generated_hr = Input(self.shape_hr) # Create a high resolution image from the low resolution one real_discriminator_logits = self.discriminator(imgs_hr) fake_discriminator_logits = self.discriminator(generated_hr) total_loss = Lambda(comput_loss, name='comput_loss')( [real_discriminator_logits, fake_discriminator_logits]) # Output tensors to a Model must be the output of a Keras `Layer` fake_logit = Lambda(lambda x: x, name='fake_logit')(total_loss[0]) real_logit = Lambda(lambda x: x, name='real_logit')(total_loss[1]) # dis_loss = K.mean(K.binary_crossentropy(K.zeros_like(fake_logit), fake_logit) + # K.binary_crossentropy(K.ones_like(real_logit), real_logit)) epsilon = 0.000001 dis_loss = -(K.mean(K.log(K.sigmoid(real_logit) + epsilon)) + K.mean(K.log(1 - K.sigmoid(fake_logit) + epsilon))) # dis_loss = tf.reduce_mean( # tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(fake_logit), logits=fake_logit) + # tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_likes(real_logit), logits=real_logit)) # dis_loss = K.mean(- (real_logit - fake_logit)) + 10 * K.mean((grad_norms - 1) ** 2) model = Model(inputs=[imgs_hr, generated_hr], outputs=[fake_logit, real_logit]) model.add_loss(dis_loss) model.compile(optimizer=Adam(self.dis_lr)) model.metrics_names.append('dis_loss') model.metrics_tensors.append(dis_loss) # model.summary() return model
def build_model(batch_size=1, lr=1e-4): # Input Layers input_o = Input(shape=num_rooms, dtype=tf.int32, batch_size=batch_size) input_p = Input(shape=num_edges, dtype=tf.float32, batch_size=batch_size) input_t = Input(shape=input_size_t, dtype=tf.int32, batch_size=batch_size) box_gt = Input(shape=(num_rooms, 4), dtype=tf.float32, batch_size=batch_size) mask_gt = Input(shape=(num_rooms, mask_size, mask_size), dtype=tf.int32, batch_size=batch_size) # Embeddings embedding_o = Embedding(input_dim=num_objects, output_dim=embed_dim, input_length=num_rooms, mask_zero=True)(input_o) embedding_p = Embedding(input_dim=num_relation, output_dim=embed_dim, input_length=num_edges, mask_zero=True)(input_p) # Graph Convolutions new_s_obj, new_p_obj = GraphTripleConvNet(input_dim=Din, hidden_dim=H, batch_size=batch_size)( embedding_o, embedding_p, input_t) # Box and Mask Regression Nets output_box = box_net(gconv_dim=Dout)(new_s_obj) output_mask = Mask_regression(num_chan=Dout, mask_size=mask_size)(new_s_obj) output_rel = rel_aux_net(gconv_out=Dout, gconv_hidden_dim=H, out_dim=num_relation, batch_size=batch_size)(embedding_o, output_box, input_t) # Model model = Model([input_o, input_p, input_t, box_gt, mask_gt], [output_box, output_mask, output_rel]) model.add_loss( total_loss(box_gt, mask_gt, input_p, output_box, output_mask, output_rel)) model.compile(optimizer=optimizers.Adam(learning_rate=lr)) return model
def _build_compile(self, model_input): z_mean, z_log_var, z = self.encoder(model_input) surved_y_output = self.decoder_y(z) surved = Model(model_input, surved_y_output, name='SurVED') kl_loss_orig = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1) kl_loss = kl_loss_orig * self.kl_loss_weight surved.add_loss(K.mean(kl_loss)) surved.add_metric(kl_loss_orig, name='kl_loss', aggregation='mean') opt = Adam(lr=self.surved_lr) surved.compile(loss=self._get_loss(), optimizer=opt, metrics=[self.cindex, self.surv_mse_loss]) return surved
def create_autoencoder(block, reencode=False): encoder = create_encoder(block) decoder = create_decoder(block) image = encoder.input encoded, *masks = encoder(image) decoded = decoder([encoded, *masks]) outputs = [decoded] if reencode: encoder2 = create_encoder(block) reencoded, *_masks = encoder(decoded) autoencoder = Model(image, outputs=outputs) if reencode: autoencoder.add_loss( content_feature_loss(image, encoded, decoded, reencoded)) return autoencoder
def FirstStageModel(input_shape, latent_dim, base_dim=32, fc_dim=512, kernel_size=3, num_scale=3, block_per_scale=1, depth_per_block=2): # base_dim refers to channels; they are doubled at each downscaling desired_scale = input_shape[1] scales, dims = [], [] current_scale, current_dim = 4, base_dim while current_scale <= desired_scale: scales.append(current_scale) dims.append(current_dim) current_scale *= 2 current_dim = min(current_dim * 2, 1024) assert (scales[-1] == desired_scale) dims = list(reversed(dims)) print(dims, scales) encoder1 = Encoder1(input_shape, base_dim, kernel_size, num_scale, block_per_scale, depth_per_block, fc_dim, latent_dim) decoder1 = Decoder1(input_shape, latent_dim, dims, scales, kernel_size, block_per_scale, depth_per_block) x = Input(shape=input_shape) gamma = Input(shape=()) #adaptive gamma parameter z_mean, z_log_var, z = encoder1(x) x_hat = decoder1(z) vae1 = Model([x, gamma], x_hat) #loss k = (2 * input_shape[1] / latent_dim)**2 L_rec = 0.5 * K.sum(K.square(x - x_hat), axis=[1, 2, 3]) / gamma L_KL = 0.5 * K.sum(K.square(z_mean) + K.exp(z_log_var) - 1 - z_log_var, axis=-1) L_tot = K.mean(L_rec + k * L_KL) vae1.add_loss(L_tot) return (vae1, encoder1, decoder1)
def build_model(self, n_links, params): self.day_hour_dim = params.get('dayHourDim', 4) self.lr = params.get('lr', 0.01) self.dropout_prop = params.get('dropoutProp', 0.1) in_day_time = Input(shape = (1, ), name = 'day_time') in_y_true = Input(shape = (n_links, ), name = 'y_true') in_mask = Input(shape = (n_links, ), name = 'mask') l_day_time = Embedding(7 * 24, self.day_hour_dim, name='embed_day_time')(in_day_time) l_day_time = Reshape((self.day_hour_dim,), name='reshape_day_time')(l_day_time) out_run = Dense(n_links // 3, name='fc_1', activation='relu')(l_day_time) out_run = Dropout(self.dropout_prop, name='dropout')(out_run) out_run = Dense(n_links, name='fc_out', activation='relu')(out_run) opt = tf.keras.optimizers.RMSprop(learning_rate=self.lr) model = Model(inputs = [in_day_time, in_y_true, in_mask], outputs = [out_run]) model.add_loss(self.loss_fcn(in_y_true, out_run, in_mask)) model.compile(opt) return model
def init_detection_model(model_conf, trainable=False): """ :param trainable: using pretrained model configuration or not :param model_conf: loaded from json configuration model scheme :return: """ IMAGE_SIZE = model_conf['IMAGE_SIZE'] WEIGHT_DECAY = model_conf['WEIGHT_DECAY'] ALPHA = model_conf['ALPHA'] model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, alpha=ALPHA, weights="imagenet") for layer in model.layers: layer.trainable = trainable block = model.get_layer("block_16_project_BN").output x = Conv2D(112, padding="same", kernel_size=3, strides=1, activation="relu")(block) x = Conv2D(112, padding="same", kernel_size=3, strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(5, padding="same", kernel_size=1, activation="sigmoid")(x) model = Model(inputs=model.input, outputs=x) # divide by 2 since d/dweight learning_rate * weight^2 = 2 * learning_rate * weight # see https://arxiv.org/pdf/1711.05101.pdf regularizer = l2(WEIGHT_DECAY / 2) for weight in model.trainable_weights: with tf.keras.backend.name_scope("weight_regularizer"): model.add_loss(regularizer(weight)) return model
def build( latent_dim, input_shape, repeat=1, use_inception=True, batch_size=1, learning_rate=1e-4, ): encoder_input, encoder = _build_encoder( input_shape, latent_dim, repeat, use_inception ) decoder_input, decoder = _build_decoder( latent_dim, input_shape, repeat, use_inception ) z_mean, z_log_var, z = encoder(encoder_input) decoder_output = decoder(z) model = Model(encoder_input, decoder_output, name="vae") print(f"Encoder input: {encoder_input.shape}") print(f"Decoder output: {decoder_output.shape}") encoder_input.shape.assert_is_compatible_with(decoder_output.shape) # assert encoder_input.shape.as_list() == decoder_output.shape.as_list() reconstruction_loss = ReconstructionLoss(mean=True)([encoder_input, decoder_output]) # reconstruction_loss = tf.losses.mse(encoder_input, decoder_output) # reconstruction_loss = tf.reduce_sum(reconstruction_loss, axis=[1, 2]) kl_loss = KLLoss(mean=True)([z, z_mean, z_log_var]) # logpz = log_normal_pdf(z, 0.0, 0.0) # logqz_x = log_normal_pdf(z, z_mean, z_log_var) # kl_loss = logqz_x - logpz vae_loss = reconstruction_loss + kl_loss model.add_loss(vae_loss) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate)) # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=lambda yt, yp: vae_loss) model.add_metric( reconstruction_loss, aggregation="mean", name="reconstruction_loss" ) model.add_metric(kl_loss, aggregation="mean", name="kl_loss") return model, encoder, decoder
def make_backbone(num_states, hidden_units, num_actions, dropout_reg=1e-5, wd=1e-3): """ Build a tensorflow keras backbone model utilizing concrete dropout layers. """ losses: list = [] inp = Input(shape=(num_states, )) x = inp for i in hidden_units: x, loss = ConcreteDropout(Dense(i, activation='relu'), weight_regularizer=wd, dropout_regularizer=dropout_reg)(x) losses.append(loss) x = Dense(100, activation='relu')(x) out = Dense(num_actions, activation='linear')(x) model = Model(inp, out) model.add_loss(losses) return model
class VAE(object): def __init__(self, original_dimension=784, encoding_dimension=512, latent_dimension=2): self.original_dimension = original_dimension self.encoding_dimension = encoding_dimension self.latent_dimension = latent_dimension self.z_log_var = None self.z_mean = None self.inputs = None self.outputs = None self.encoder = None self.decoder = None self.vae = None def build_vae(self): # Build encoder self.inputs = Input(shape=(self.original_dimension, )) x = Dense(self.encoding_dimension)(self.inputs) x = ReLU()(x) self.z_mean = Dense(self.latent_dimension)(x) self.z_log_var = Dense(self.latent_dimension)(x) z = Lambda(sampling)([self.z_mean, self.z_log_var]) self.encoder = Model(self.inputs, [self.z_mean, self.z_log_var, z]) # Build decoder latent_inputs = Input(shape=(self.latent_dimension, )) x = Dense(self.encoding_dimension)(latent_inputs) x = ReLU()(x) self.outputs = Dense(self.original_dimension)(x) self.outputs = Activation('sigmoid')(self.outputs) self.decoder = Model(latent_inputs, self.outputs) # Build end-to-end VAE. self.outputs = self.encoder(self.inputs)[2] self.outputs = self.decoder(self.outputs) self.vae = Model(self.inputs, self.outputs) @tf.function def train(self, X_train, X_test, epochs=50, batch_size=64): reconstruction_loss = mse(self.inputs, self.outputs) reconstruction_loss *= self.original_dimension kl_loss = (1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var)) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) self.vae.add_loss(vae_loss) self.vae.compile(optimizer=Adam(lr=1e-3)) self.vae.fit(X_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, None)) return self.encoder, self.decoder, self.vae
def vae(self, config): enc_units = config["enc_units"] encoder_layers = len(enc_units) dec_units = config["dec_units"] decoder_layers = len(dec_units) interm_dim = config["interm_dim"] latent_dim = config["latent_dim"] activation = config["activation"] kernel_initializer = config["kernel_initializer"] kernel_regularizer = config["kernel_regularizer"] org_inputs = Input(shape=self.image_size[0] * self.image_size[1] * self.image_size[2]) x = Dense( enc_units[0] * 2, activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, )(org_inputs) for i in range(encoder_layers): x = Dense( enc_units[i], activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, )(x) x = Dense( interm_dim, activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, )(x) z_mean = Dense(latent_dim)(x) z_var = Dense(latent_dim)(x) # Sampling from intermediate dimensiont to get a probability density z = Lambda(self.sampling, output_shape=(latent_dim, ))([z_mean, z_var]) # Encoder model enc_model = Model(org_inputs, [z_mean, z_var]) latent_inputs = Input(shape=(latent_dim, )) outputs = Dense( dec_units[0] // 2, activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, )(latent_inputs) for i in range(decoder_layers): outputs = Dense( dec_units[i], activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, )(outputs) final_outputs = Dense(self.image_size[0] * self.image_size[1] * self.image_size[2], activation="sigmoid")(outputs) # Decoder model dec_model = Model(latent_inputs, final_outputs) out = dec_model(z) model = Model(org_inputs, out) kl_loss = -0.5 * tf.math.reduce_mean(z_var - tf.math.square(z_mean) - tf.math.exp(z_var) + 1) model.add_loss(kl_loss) return model
def build_model(hp): params = hp.values.copy() params1 = {k.replace('1', ''): params[k] for k in params if not '2' in k} params2 = {k.replace('2', ''): params[k] for k in params if not '1' in k} params1['e_dim'], params1['r_dim'] = params1['dim'], params1['dim'] params2['e_dim'], params2['r_dim'] = params2['dim'], params2['dim'] m1 = models[params1['embedding_model']] m2 = models[params2['embedding_model']] embedding_model1 = m1(**params1) embedding_model2 = m2(**params2) triple1 = Input((3, )) triple2 = Input((3, )) ci = Input((1, )) si = Input((1, )) conc = Input((1, )) inputs = [triple1, triple2, ci, si, conc] _, l1 = embedding_model1(triple1) _, l2 = embedding_model2(triple2) c = embedding_model1.entity_embedding(ci) s = embedding_model2.entity_embedding(si) c = tf.squeeze(c, axis=1) s = tf.squeeze(s, axis=1) for i, layer_num in enumerate( range(params['branching_num_layers_chemical'])): c = Dense(params['branching_units_chemical_' + str(i + 1)], activation='relu')(c) c = Dropout(0.2)(c) for i, layer_num in enumerate(range( params['branching_num_layers_species'])): s = Dense(params['branching_units_species_' + str(i + 1)], activation='relu')(s) s = Dropout(0.2)(s) for i, layer_num in enumerate( range(hp.Int('branching_num_layers_conc', 0, 3, default=1))): conc = Dense(params['branching_units_conc_' + str(i + 1)], activation='relu')(conc) conc = Dropout(0.2)(conc) x = Concatenate(axis=-1)([c, s, conc]) for i, layer_num in enumerate(range(hp.Int('num_layers', 0, 3, default=1))): x = Dense(params['units_' + str(i + 1)], activation='relu')(x) x = Dropout(0.2)(x) x = Dense(params['output_dim'], activation='sigmoid', name='output_1')(x) model = Model(inputs=inputs, outputs=[x]) model.add_loss(params1['loss_weight'] * l1 / 2 + params2['loss_weight'] * l2 / 2) model.compile( optimizer=Adam(learning_rate=params['learning_rate']), loss={'output_1': 'binary_crossentropy'}, loss_weights={'output_1': params['classification_loss_weight']}, metrics=['acc', f1, f2, Precision(), Recall(), AUC()]) return model
def build_model(hp, norm_params=None): params = hp.copy() params1 = {k.replace('1', ''): params[k] for k in params if not '2' in k} params2 = {k.replace('2', ''): params[k] for k in params if not '1' in k} params1['e_dim'], params1['r_dim'] = params1['dim'], params1['dim'] params2['e_dim'], params2['r_dim'] = params2['dim'], params2['dim'] params1['name'] = 'chemical_embedding_model' params2['name'] = 'species_embedding_model' m1 = models[params1['embedding_model']] m2 = models[params2['embedding_model']] embedding_model1 = m1(**params1) embedding_model2 = m2(**params2) triple1 = Input((3, )) triple2 = Input((3, )) ftriple1 = Input((3, )) ftriple2 = Input((3, )) ci = Input((1, )) si = Input((1, )) conc = Input((1, )) inputs = [triple1, ftriple1, triple2, ftriple2, ci, si, conc] score1 = embedding_model1(triple1) fscore1 = embedding_model1(ftriple1) loss_function1 = loss_function_lookup(params1['loss_function']) loss1 = loss_function1(score1, fscore1, params1['margin'] or 1, 1) score2 = embedding_model2(triple2) fscore2 = embedding_model2(ftriple2) loss_function2 = loss_function_lookup(params2['loss_function']) loss2 = loss_function2(score2, fscore2, params2['margin'] or 1, 1) c = embedding_model1.entity_embedding(ci) s = embedding_model2.entity_embedding(si) c = tf.squeeze(c, axis=1) s = tf.squeeze(s, axis=1) c = LayerNormalization(axis=-1)(c) s = LayerNormalization(axis=-1)(s) x = base_model(c, s, conc, params) model = Model(inputs=inputs, outputs=[x]) model.add_loss(params1['loss_weight'] * loss1 + params2['loss_weight'] * loss2) if params['use_pretrained']: for layer in embedding_model1.layers: if isinstance(layer, Embedding): layer.trainable = False for layer in embedding_model2.layers: if isinstance(layer, Embedding): layer.trainable = False compile_model(model, hp) return model
def build_model(): encoder_input = Input(shape=(time_step, input_dim), name='encoder_input') rnn1 = Bidirectional(GRU(rnn_dim, return_sequences=True), name='rnn1')(encoder_input) rnn2 = Bidirectional(GRU(rnn_dim), name='rnn2')(rnn1) z_mean = Dense(z_dim, name='z_mean')(rnn2) z_log_var = Dense(z_dim, name='z_log_var')(rnn2) def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon z = Lambda(sampling, output_shape=(z_dim, ), name='z')([z_mean, z_log_var]) class kl_beta(tf.keras.layers.Layer): def __init__(self): super(kl_beta, self).__init__() # your variable goes here self.beta = tf.Variable(0.0, trainable=False, dtype=tf.float32) def call(self, inputs, **kwargs): # your mul operation goes here return -self.beta * inputs beta = kl_beta() encoder = Model(encoder_input, z, name='encoder') # decoder decoder_latent_input = Input(shape=z_dim, name='z_sampling') repeated_z = RepeatVector(time_step, name='repeated_z_tension')(decoder_latent_input) rnn1_output = GRU(rnn_dim, name='decoder_rnn1', return_sequences=True)(repeated_z) rnn2_output = GRU(rnn_dim, name='decoder_rnn2', return_sequences=True)(rnn1_output) kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss = tf.reduce_mean(kl_loss) kl_loss = 0.5 * kl_loss kl_loss = beta(kl_loss) tensile_middle_output = TimeDistributed( Dense(tension_middle_dim, activation='elu'), name='tensile_strain_dense1')(rnn2_output) tensile_output = TimeDistributed( Dense(tension_output_dim, activation='elu'), name='tensile_strain_dense2')(tensile_middle_output) diameter_middle_output = TimeDistributed( Dense(tension_middle_dim, activation='elu'), name='diameter_strain_dense1')(rnn2_output) diameter_output = TimeDistributed( Dense(tension_output_dim, activation='elu'), name='diameter_strain_dense2')(diameter_middle_output) melody_rhythm_1 = TimeDistributed(Dense(start_middle_dim, activation='elu'), name='melody_start_dense1')(rnn2_output) melody_rhythm_output = TimeDistributed( Dense(melody_note_start_dim, activation='sigmoid'), name='melody_start_dense2')(melody_rhythm_1) melody_pitch_1 = TimeDistributed(Dense(melody_bass_dense_1_dim, activation='elu'), name='melody_pitch_dense1')(rnn2_output) melody_pitch_output = TimeDistributed( Dense(melody_output_dim, activation='softmax'), name='melody_pitch_dense2')(melody_pitch_1) bass_rhythm_1 = TimeDistributed(Dense(start_middle_dim, activation='elu'), name='bass_start_dense1')(rnn2_output) bass_rhythm_output = TimeDistributed( Dense(bass_note_start_dim, activation='sigmoid'), name='bass_start_dense2')(bass_rhythm_1) bass_pitch_1 = TimeDistributed(Dense(melody_bass_dense_1_dim, activation='elu'), name='bass_pitch_dense1')(rnn2_output) bass_pitch_output = TimeDistributed(Dense(bass_output_dim, activation='softmax'), name='bass_pitch_dense2')(bass_pitch_1) decoder_output = [ melody_pitch_output, melody_rhythm_output, bass_pitch_output, bass_rhythm_output, tensile_output, diameter_output ] decoder = Model(decoder_latent_input, decoder_output, name='decoder') model_input = encoder_input vae = Model(model_input, decoder(encoder(model_input)), name='encoder_decoder') vae.add_loss(kl_loss) vae.add_metric(kl_loss, name='kl_loss', aggregation='mean') optimizer = keras.optimizers.Adam() vae.compile(optimizer=optimizer, loss=[ 'categorical_crossentropy', 'binary_crossentropy', 'categorical_crossentropy', 'binary_crossentropy', 'mse', 'mse' ], metrics=[[keras.metrics.CategoricalAccuracy()], [keras.metrics.BinaryAccuracy()], [keras.metrics.CategoricalAccuracy()], [keras.metrics.BinaryAccuracy()], [keras.metrics.MeanSquaredError()], [keras.metrics.MeanSquaredError()]]) return vae
def build_model(self, n_links, freq, lags, preds, params): # Parse parameters day_hour_dim = params.get('dayHourDim', 2) lr = params.get('lr', 0.01) dropout_prop = params.get('dropoutProp', 0.1) rnn_hidden_state = params.get('rnnHiddenState', 10) time_steps_per_day = int( pd.to_timedelta('24H') / pd.to_timedelta(freq)) # Inputs in_dow_tod = Input(shape=(lags + preds, ), name='dow_tod') in_lags = Input(shape=( lags, n_links, ), name='lags') in_y_true = Input(shape=( preds, n_links, ), name='y_true') in_mask = Input(shape=( preds, n_links, ), name='mask') # We feed the entire time stampts through the same embeddings layer for coherent learning time_embedding = Embedding(7 * time_steps_per_day, day_hour_dim, name='time_embedding')(in_dow_tod) time_embedding_lags = tf.keras.layers.Lambda(lambda x: x[:, :lags, :])( time_embedding) time_embedding_preds = tf.keras.layers.Lambda( lambda x: x[:, -preds:, :])(time_embedding) # Pre-processing, encoder input bn_lags = tf.keras.layers.BatchNormalization(name='bn_1')(in_lags) concat_lags_time = tf.keras.layers.Concatenate( name=f'concat_lags_time')([time_embedding_lags, bn_lags]) # Encoder rnn_1 = tf.keras.layers.GRU(rnn_hidden_state, return_sequences=True, return_state=True, unroll=True, name=f'encoder', activation='tanh', dropout=dropout_prop, recurrent_dropout=dropout_prop) out_rnn, encoder_state = rnn_1(concat_lags_time) out_rnn = tf.keras.layers.Lambda(lambda x: x[:, -preds:, :], name=f'preds')(out_rnn) out_rnn = tf.keras.layers.BatchNormalization(name='bn_preds')(out_rnn) # Decoder rnn_2 = tf.keras.layers.GRU(rnn_hidden_state, return_sequences=True, return_state=False, unroll=True, name=f'decoder', activation='tanh', dropout=dropout_prop, recurrent_dropout=dropout_prop) out_rnn = rnn_2(out_rnn, initial_state=encoder_state) out_rnn = tf.keras.layers.Concatenate(name=f'concat_rnn_time')( [time_embedding_preds, out_rnn]) # Dense out_rnn = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(n_links // 3, name='fc_1', activation='relu'), name='time_distributed_fc_1')(out_rnn) out_rnn = Dropout(dropout_prop, name='dropout')(out_rnn) out = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(n_links, name='fc_out', activation='linear'), name='time_distributed_output')(out_rnn) opt = tf.keras.optimizers.RMSprop(learning_rate=lr) model = Model(inputs=[in_lags, in_dow_tod, in_y_true, in_mask], outputs=[out]) model.add_loss(self.loss_fcn(in_y_true, out, in_mask)) model.compile(opt) return model
class WANN(object): """ WANN: Weighting Adversarial Neural Network is an instance-based domain adaptation method suited for regression tasks. It supposes the supervised setting where some labeled target data are available. The goal of WANN is to compute a source instances reweighting which correct "shifts" between source and target domain. This is done by minimizing the Y-discrepancy distance between source and target distributions WANN involves three networks: - the weighting network which learns the source weights. - the task network which learns the task. - the discrepancy network which is used to estimate a distance between the reweighted source and target distributions: the Y-discrepancy Parameters ---------- get_base_model: callable, optional Constructor for the two networks: task and discrepancer. The constructor should take the four following arguments: - shape: the input shape - C: the projecting constant - activation: the last layer activation function - name: the model name If None, get_default_model is used. get_weighting_model: callable, optional Constructor for the weightig network. The constructor should take the same arguments as get_base_model. If None, get_base_model is used. C: float, optional (default=1.) Projecting constant: networks should be regularized by projecting the weights of each layer on the ball of radius C. C_w: float, optional (default=None) Projecting constant of the weighting network. If None C_w = C. optimizer: tf.keras Optimizer, optional (default="adam") Optimizer of WANN save_hist: boolean, optional (default=False) Wether to save the predicted weights and labels at each epochs or not """ def __init__(self, get_base_model=None, get_weighting_model=None, C=1., C_w=None, optimizer='adam', save_hist=False): self.get_base_model = get_base_model if self.get_base_model is None: self.get_base_model = _get_default_model self.get_weighting_model = get_weighting_model if self.get_weighting_model is None: self.get_weighting_model = get_base_model self.C = C self.C_w = C_w if self.C_w is None: self.C_w = C self.save_hist = save_hist self.optimizer = optimizer def fit(self, X, y, index=None, weights_target=None, **fit_params): """ Fit WANN Parameters ---------- X, y: numpy arrays Input data index: iterable Index should contains 2 lists or 1D-arrays corresponding to: index[0]: indexes of source labeled data in X, y index[1]: indexes of target labeled data in X, y weights_target: numpy array, optional (default=None) Weights for target sample. If None, all weights are set to 1. fit_params: key, value arguments Arguments to pass to the fit method (epochs, batch_size...) Returns ------- self """ self.fit_params = fit_params assert hasattr(index, "__iter__"), "index should be iterable" assert len(index) == 2, "index length should be 2" src_index = index[0] tgt_index = index[1] self._fit(X, y, src_index, tgt_index, weights_target) return self def _fit(self, X, y, src_index, tgt_index, weights_target): # Resize source and target index to the same length max_size = max((len(src_index), len(tgt_index))) resize_src_ind = np.array([src_index[i%len(src_index)] for i in range(max_size)]) resize_tgt_ind = np.array([tgt_index[i%len(tgt_index)] for i in range(max_size)]) # If no target weights, all are set to one if weights_target is None: resize_weights_target = np.ones(max_size) else: resize_weights_target = np.array([weights_target[i%len(weights_target)] for i in range(max_size)]) # Create WANN model if not hasattr(self, "model"): self._create_wann(shape=X.shape[1]) # Callback to save predicted weights and labels callbacks = [] if "callbacks" in self.fit_params: callbacks = self.fit_params["callbacks"] del self.fit_params["callbacks"] # Initialize weighting network self.weights_predictor.compile(optimizer=copy.deepcopy(self.optimizer), loss="mse") #copy.deepcopy(self.optimizer) self.weights_predictor.fit(X[src_index], np.ones(len(src_index)), **self.fit_params) # Fit self.model.fit([X[resize_src_ind], X[resize_tgt_ind], y[resize_src_ind], y[resize_tgt_ind], resize_weights_target], callbacks = callbacks, **self.fit_params) return self def _create_wann(self, shape): # Build task, weights_predictor and discrepancer network # Weights_predictor should end with a relu activation self.weights_predictor = self.get_weighting_model( shape, activation='relu', C=self.C_w, name="weights") self.task = self.get_base_model( shape, activation=None, C=self.C, name="task") self.discrepancer = self.get_base_model( shape, activation=None, C=self.C, name="discrepancer") # Create input layers for Xs, Xt, ys, yt and target weights input_source = Input(shape=(shape,)) input_target = Input(shape=(shape,)) output_source = Input(shape=(1,)) output_target = Input(shape=(1,)) weights_target = Input(shape=(1,)) Flip = _GradReverse() # Get networks output for both source and target weights_source = self.weights_predictor(input_source) output_task_s = self.task(input_source) output_task_t = self.task(input_target) output_disc_s = self.discrepancer(input_source) output_disc_t = self.discrepancer(input_target) # Reversal layer at the end of discrepancer output_disc_s = Flip(output_disc_s) output_disc_t = Flip(output_disc_t) # Create model and define loss self.model = Model([input_source, input_target, output_source, output_target, weights_target], [output_task_s, output_task_t, output_disc_s, output_disc_t, weights_source], name='WANN') loss_task_s = K.mean(multiply([weights_source, K.square(output_source - output_task_s)])) loss_task_t = K.mean(multiply([weights_target, K.square(output_target - output_task_t)])) loss_disc_s = K.mean(multiply([weights_source, K.square(output_source - output_disc_s)])) loss_disc_t = K.mean(multiply([weights_target, K.square(output_target - output_disc_t)])) loss_task = loss_task_s #+ loss_task_t loss_disc = loss_disc_t - loss_disc_s loss = loss_task + loss_disc self.model.add_loss(loss) self.model.add_metric(tf.reduce_sum(K.mean(weights_source)), name="weights", aggregation="mean") self.model.add_metric(tf.reduce_sum(loss_task_s), name="task_s", aggregation="mean") self.model.add_metric(tf.reduce_sum(loss_task_t), name="task_t", aggregation="mean") self.model.add_metric(tf.reduce_sum(loss_disc), name="disc", aggregation="mean") self.model.add_metric(tf.reduce_sum(loss_disc_s), name="disc_s", aggregation="mean") self.model.add_metric(tf.reduce_sum(loss_disc_t), name="disc_t", aggregation="mean") self.model.compile(optimizer=self.optimizer) return self def predict(self, X): """ Predict method: return the prediction of task network Parameters ---------- X: array input data Returns ------- y_pred: array prediction of task network """ return self.task.predict(X) def get_weight(self, X): """ Return the predictions of weighting network Parameters ---------- X: array input data Returns ------- array: weights """ return self.weights_predictor.predict(X) def save(self, path): """ Save task network Parameters ---------- path: str path where to save the model """ self.task.save(path) self.weights_predictor.save(path + "_weights") return self
class ADDA: """ ADDA: Adversarial Discriminative Domain Adaptation ADDA is a feature-based domain adaptation method. The purpose of ADDA is to build a new feature representation in which source and target data could not be distinguished by any **discriminator** network. This feature representation is built with two **encoder** networks: - a **source encoder** trained to provide good features in order to learn the task on the source domain. The task is learned through a **task** network trained with the **source encoder**. - a **target encoder** trained to fool a **discriminator** network which tries to classify source and target data in the encoded space. The **target encoder** and the **discriminator** are trained in an adversarial fashion in the same way as GAN. The parameters of the four networks are optimized in a two stage algorithm where **source encoder** and **task** networks are first fitted according to the following optimization problem: .. math:: \min_{\phi_S, F} \mathcal{L}_{task}(F(\phi_S(X_S)), y_S) In the second stage, **target encoder** and **discriminator** networks are fitted according to: .. math:: \max_{\phi_T} \min_{D} \mathcal{L}_{01}(D(\phi_S(X_S)), \\textbf{0}) + \mathcal{L}_{01}(D(\phi_T(X_T)), \\textbf{1}) Where: - :math:`(X_S, y_S), (X_T)` are respectively the labeled source data and the unlabeled target data. - :math:`\phi_S, \phi_T, F, D` are respectively the **source encoder**, the **target encoder**, the **task** and the **discriminator** networks. The method has been originally introduced for **unsupervised** classification DA but it could be widen to other task in **supervised** DA straightforwardly. Parameters ---------- get_src_encoder : callable, optional (default=None) Constructor for source encoder networks. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network. If ``None``, shallow networks with 10 neurons are used as encoder networks. get_tgt_encoder : callable, optional (default=None) Constructor for target encoder networks. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network. If ``None``, shallow networks with 10 neurons are used as encoder networks. get_task : callable, optional (default=None) Constructor for task network. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network and an ``output_shape`` argument giving the shape of the last layer. If ``None``, a linear network is used as task network. get_discriminator : callable, optional (default=None) Constructor for discriminator network. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network. If ``None``, a linear network is used as discriminator network. src_enc_params : dict, optional (default=None) Additional arguments for ``get_src_encoder``. tgt_enc_params : dict, optional (default=None) Additional arguments for ``get_tgt_encoder``. task_params : dict, optional (default=None) Additional arguments for ``get_task``. disc_params : dict, optional (default=None) Additional arguments for ``get_task``. compil_params : key, value arguments, optional Additional arguments for network compiler (loss, optimizer...). If none, loss is set to ``"binary_crossentropy"`` and optimizer to ``"adam"``. Attributes ---------- src_encoder_ : tensorflow Model Fitted source encoder network. tgt_encoder_ : tensorflow Model Fitted source encoder network. task_ : tensorflow Model Fitted task network. discriminator_ : tensorflow Model Fitted discriminator network. src_model_ : tensorflow Model Fitted source model: the union of source encoder and task networks. tgt_model_ : tensorflow Model Fitted target model: the union of target encoder, task and discriminator networks. References ---------- .. [1] `[1] <https://arxiv.org/pdf/1702.05464.pdf>`_ E. Tzeng, J. Hoffman, \ K. Saenko, and T. Darrell. "Adversarial discriminative domain adaptation". \ In CVPR, 2017. """ def __init__(self, get_src_encoder=None, get_tgt_encoder=None, get_task=None, get_discriminator=None, src_enc_params={}, tgt_enc_params={}, task_params={}, disc_params={}, **compil_params): self.get_src_encoder = get_src_encoder self.get_tgt_encoder = get_tgt_encoder self.get_task = get_task self.get_discriminator = get_discriminator self.src_enc_params = src_enc_params self.tgt_enc_params = tgt_enc_params self.task_params = task_params self.disc_params = disc_params self.compil_params = compil_params if self.get_src_encoder is None: self.get_src_encoder = get_default_encoder if self.get_tgt_encoder is None: self.get_tgt_encoder = get_default_encoder if self.get_task is None: self.get_task = get_default_task if self.get_discriminator is None: self.get_discriminator = get_default_task if self.src_enc_params is None: self.src_enc_params = {} if self.tgt_enc_params is None: self.tgt_enc_params = {} if self.task_params is None: self.task_params = {} if self.disc_params is None: self.disc_params = {} def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, fit_params_src=None, **fit_params_tgt): """ Fit ADDA. Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. fit_params_src : dict, optional (default=None) Arguments given to the fit process of source encoder and task networks (epochs, batch_size...). If None, ``fit_params_src = fit_params_tgt`` fit_params_tgt : key, value arguments Arguments given to the fit method of the ADDA model, i.e. fitting of target encoder and discriminator. (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) if fit_params_src is None: fit_params_src = fit_params_tgt if tgt_index_labeled is None: src_index_bis = src_index else: src_index_bis = np.concatenate((src_index, tgt_index_labeled)) self._create_model(X.shape[1:], y.shape[1:]) max_size = max(len(src_index_bis), len(tgt_index)) resize_tgt_ind = np.resize(tgt_index, max_size) resize_src_ind = np.resize(src_index_bis, max_size) self.src_model_.fit(X[src_index_bis], y[src_index_bis], **fit_params_src) self.tgt_model_.fit( [self.src_encoder_.predict(X[resize_src_ind]), X[resize_tgt_ind]], **fit_params_tgt) return self def _create_model(self, shape_X, shape_y): compil_params = copy.deepcopy(self.compil_params) if not "loss" in compil_params: compil_params["loss"] = "binary_crossentropy" if not "optimizer" in compil_params: compil_params["optimizer"] = "adam" self.src_encoder_ = check_network(self.get_src_encoder, "get_src_encoder", input_shape=shape_X, **self.src_enc_params) self.tgt_encoder_ = check_network(self.get_tgt_encoder, "get_tgt_encoder", input_shape=shape_X, **self.tgt_enc_params) if self.src_encoder_.output_shape != self.tgt_encoder_.output_shape: raise ValueError("Target encoder output shape does not match " "the one of source encoder.") self.task_ = check_network( self.get_task, "get_task", input_shape=self.src_encoder_.output_shape[1:], output_shape=shape_y, **self.task_params) self.discriminator_ = check_network( self.get_discriminator, "get_discriminator", input_shape=self.src_encoder_.output_shape[1:], **self.disc_params) input_task = Input(shape_X) encoded_source = self.src_encoder_(input_task) tasked = self.task_(encoded_source) self.src_model_ = Model(input_task, tasked, name="ModelSource") self.src_model_.compile(**compil_params) input_source = Input(self.src_encoder_.output_shape[1:]) input_target = Input(shape_X) encoded_target = self.tgt_encoder_(input_target) discrimined_target = GradientReversal()(encoded_target) discrimined_target = self.discriminator_(discrimined_target) discrimined_source = self.discriminator_(input_source) loss = (-K.mean(K.log(discrimined_target)) - K.mean(K.log(1 - discrimined_source))) self.tgt_model_ = Model([input_source, input_target], [discrimined_source, discrimined_target], name="ModelTarget") self.tgt_model_.add_loss(loss) compil_params.pop("loss") self.tgt_model_.compile(**compil_params) return self def predict(self, X, domain="target"): """ Return the predictions of task network on the encoded feature space. ``domain`` arguments specify how features from ``X`` will be considered: as ``"source"`` or ``"target"`` features. If ``"source"``, source encoder will be used. If ``"target"``, target encoder will be used. Parameters ---------- X : array Input data. domain : str, optional (default="target") Choose between ``"source"`` and ``"target"`` encoder. Returns ------- y_pred : array Prediction of task network. Notes ----- As ADDA is an anti-symetric feature-based method, one should indicates the domain of ``X`` in order to apply the appropriate feature transformation. """ if domain == "target": X = self.tgt_encoder_.predict(X) elif domain == "source": X = self.src_encoder_.predict(X) else: raise ValueError("Choose between source or target for domain name") return self.task_.predict(X)
class CVAE(): def __init__(self, x_input_size, b_input_size, lb_input_size, sf_input_size = 1, enc = (256, 256, 128), dec = (128, 256, 256), latent_k = 30, alpha = 0.01, input_dropout = 0., encoder_dropout = 0.1, nonmissing_indicator = None, init = tf.keras.initializers.Orthogonal(), optimizer = None, lr = 0.001, clipvalue = 5, clipnorm = 1, theta_min = 1e-6, theta_max = 1e2): self.x_input_size = x_input_size self.b_input_size = b_input_size self.lb_input_size = lb_input_size self.z_input_size = latent_k self.sf_input_size = sf_input_size self.disp_input_size = b_input_size self.enc = enc self.dec = dec self.latent_k = latent_k self.alpha = alpha self.input_dropout = input_dropout self.encoder_dropout = encoder_dropout self.init = init self.lr = lr self.clipvalue = clipvalue self.clipnorm = clipnorm self.theta_min = theta_min self.theta_max = theta_max if optimizer is None: self.optimizer = tf.keras.optimizers.Adam(learning_rate = lr, clipnorm = clipnorm, clipvalue = clipvalue) else: self.optimizer = optimizer self.extra_models = {} self.model = None def build(self, print_model = False): """ Inputs. """ self.x_input = Input(shape = (self.x_input_size, ), name = 'x_input') self.b_input = Input(shape = (self.b_input_size, ), name = 'B') self.sf_input = Input(shape = (self.sf_input_size, ), name = 'sf_input') self.z_input = Input(shape = (self.z_input_size, ), name = 'z_input') self.disp_input = Input(shape = (self.disp_input_size, ), name = 'nb_input') self.x_raw_input = Input(shape = (self.x_input_size, ), name = 'x_raw_input') self.lb_input = Input(shape = (self.lb_input_size, ), name = 'lb_input') """ Build the encoder. """ self.z = keras.layers.concatenate([self.x_input, self.b_input]) for i, hid_size in enumerate(self.enc): dense_layer_name = 'e%s' % i bn_layer_name = 'be%s' % i self.z = Dense(hid_size, activation = None, use_bias = True, kernel_initializer = self.init, name = dense_layer_name)(self.z) self.z = LeakyReLU(alpha = 0.01)(self.z) self.z = BatchNormalization(center = False, scale = True, name = bn_layer_name)(self.z) if i == 0: self.z = Dropout(self.encoder_dropout)(self.z) self.z_mean = Dense(self.latent_k, activation = None, use_bias = True, kernel_initializer = self.init, name = 'z_mean_dense')(self.z) self.z_mean = LeakyReLU(alpha = 0.01, name = 'z_mean_act')(self.z_mean) self.z_mean = BatchNormalization(center = False, scale = True, name = 'bz')(self.z_mean) self.z_log_var = Dense(self.latent_k, activation = None, use_bias = True, kernel_initializer = tf.keras.initializers.Orthogonal(gain = 0.01), name = 'z_log_var')(self.z) # Sampling latent space self.z_out = Lambda(sample_z, output_shape = (self.latent_k, ))([self.z_mean, self.z_log_var]) self.extra_models['mean_out'] = Model([self.x_input, self.b_input], self.z_mean, name = 'mean_out') self.extra_models['var_out'] = Model([self.x_input, self.b_input], self.z_log_var, name = 'var_out') self.extra_models['samp_out'] = Model([self.x_input, self.b_input], self.z_out, name = 'samp_out') """ Build the prediction network. """ self.lb_pred = Dense(self.latent_k, activation = 'sigmoid', use_bias = True, kernel_initializer = self.init, name = 'pred_sigmoid')(self.z_mean) self.lb_pred = BatchNormalization(center = False, scale = True, name = 'lz1')(self.lb_pred) self.lb_pred = Dense(int(0.5*self.latent_k), activation = 'sigmoid', use_bias = True, kernel_initializer = self.init, name = 'pred_sigmoid2')(self.lb_pred) self.lb_pred = BatchNormalization(center = False, scale = True, name = 'lz2')(self.lb_pred) self.lb_pred = Dense(self.lb_input_size, activation = 'softmax', use_bias = True, kernel_initializer = self.init, name = 'pred_softmax')(self.lb_pred) self.extra_models['lb_pred'] = Model([self.x_input, self.b_input], self.lb_pred, name = 'lb_pred') """ Build the decoder. """ #### decoder network self.decoder_dense_layers = [] self.decoder_leaky_layers = [] for i, hid_size in enumerate(self.dec): dense_layer_name = 'd%s' % i self.decoder_dense_layers.append ( Dense(hid_size, activation = None, use_bias = True, kernel_initializer = self.init, name = dense_layer_name) ) self.decoder_leaky_layers.append ( LeakyReLU(alpha = 0.01) ) self.last_layer_mu = Dense(self.x_input_size, activation = None, use_bias = True, kernel_initializer = self.init, name = 'mu_out') #### start from sampled latent values self.decoder11 = keras.layers.concatenate([self.z_out, self.b_input]) for i, hid_size in enumerate(self.dec): self.decoder11 = self.decoder_dense_layers[i](self.decoder11) self.decoder11 = self.decoder_leaky_layers[i](self.decoder11) self.mu_hat = self.last_layer_mu(self.decoder11) self.mu_hat_sf = AddLayer(name = 'mu_hat_sf')([self.mu_hat, self.sf_input]) self.mu_hat_exp_sf = ExpLayer(name = 'mu_hat_exp_sf')(self.mu_hat_sf) self.mu_hat_exp = ExpLayer(name = 'mu_hat_exp')(self.mu_hat) #### start from zeroed latent values self.decoder12_mean = keras.layers.concatenate([self.z_input, self.b_input]) for i, hid_size in enumerate(self.dec): self.decoder12_mean = self.decoder_dense_layers[i](self.decoder12_mean) self.decoder12_mean = self.decoder_leaky_layers[i](self.decoder12_mean) self.mu_hat_mean = self.last_layer_mu(self.decoder12_mean) self.mu_hat_mean_sf = AddLayer(name = 'mu_hat_mean_sf')([self.mu_hat_mean, self.sf_input]) self.mu_hat_mean_exp_sf = ExpLayer(name = 'mu_hat_mean_exp_sf')(self.mu_hat_mean_sf) self.mu_hat_mean_exp = ExpLayer(name = 'mu_hat_mean_exp')(self.mu_hat_mean) self.extra_models['decoder_mean'] = Model([self.z_input, self.b_input], [self.mu_hat_mean_exp], name = 'decoder_mean') """ Build the dispersion network. """ self.last_layer_theta = Dense(self.x_input_size, activation = None, use_bias = True, kernel_initializer = self.init, name = 'theta_out') #### start from sampled latent values self.theta_hat = self.last_layer_theta(self.disp_input) self.theta_hat = ClipLayer(name = 'clip_theta_hat')(self.theta_hat) self.theta_hat_exp = ExpLayer(name = 'theta_hat_exp')(self.theta_hat) #### start from zeroed latent values self.theta_hat_mean = self.last_layer_theta(self.disp_input) self.theta_hat_mean = ClipLayer(name = 'clip_theta_hat_mean')(self.theta_hat_mean) self.theta_hat_mean_exp = ExpLayer(name = 'theta_hat_mean_exp')(self.theta_hat_mean) self.extra_models['disp_model'] = Model(self.disp_input, self.theta_hat_mean_exp, name = 'disp_model') """ Build the whole network. """ # decoder output self.out_hat = keras.layers.concatenate([self.mu_hat_sf, self.theta_hat], name = 'out') self.out_hat_mean = keras.layers.concatenate([self.mu_hat_mean_sf, self.theta_hat_mean], name = 'out_mean') # the whole model self.model = Model(inputs = [self.z_input, self.x_input, self.b_input, self.sf_input, self.disp_input, self.x_raw_input, self.lb_input], outputs = [self.out_hat, self.out_hat_mean, self.lb_pred], name = 'model') if print_model: self.model.summary() self.pred_loss = K.sum( tf.keras.losses.categorical_crossentropy(self.lb_input, self.lb_pred), axis = -1) self.kl_loss = -0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis = -1) self.recon_loss = ((1 - self.alpha) * self.nb_loss_func(self.x_raw_input, self.mu_hat_exp_sf) + self.alpha * self.nb_loss0_func(self.x_raw_input, self.mu_hat_mean_exp_sf)) def add_loss(self, pred_weight, kl_weight=1): self.final_loss = kl_weight * self.kl_loss + self.recon_loss + pred_weight * self.pred_loss self.model.add_loss(self.final_loss) self.model.add_metric(self.pred_loss, name='pred_loss') self.model.add_metric(self.kl_loss, name='kl_loss') self.model.add_metric(self.recon_loss, name='recon_loss') def compile_model(self, pred_weight, kl_weight=1, optimizer = None): self.add_loss(pred_weight, kl_weight) if optimizer is not None: self.optimizer = optimizer self.model.compile(optimizer = self.optimizer) def kl_loss_func(self): kl_loss = -0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis = -1) return kl_loss def nb_loss_func(self, y_true, y_pred): log_mu = self.mu_hat_sf log_theta = self.theta_hat mu = self.mu_hat_exp_sf theta = self.theta_hat_exp f0 = -1 * tf.math.lgamma(y_true + 1) f1 = -1 * tf.math.lgamma(theta) f2 = tf.math.lgamma(y_true + theta) f3 = - (y_true + theta) * tf.math.log(theta + mu) f4 = theta * log_theta f5 = y_true * log_mu final = - K.sum(f0 + f1 + f2 + f3 + f4 + f5, axis = 1) return final def nb_loss0_func(self, y_true, y_pred): log_mu = self.mu_hat_mean_sf log_theta = self.theta_hat_mean mu = self.mu_hat_mean_exp_sf theta = self.theta_hat_mean_exp f0 = -1 * tf.math.lgamma(y_true + 1) f1 = -1 * tf.math.lgamma(theta) f2 = tf.math.lgamma(y_true + theta) f3 = - (y_true + theta) * tf.math.log(theta + mu) f4 = theta * log_theta f5 = y_true * log_mu final = - K.sum(f0 + f1 + f2 + f3 + f4 + f5, axis = 1) return final def load_weights(self, filename): self.model.load_weights(filename) def save_weights(self, filename, save_extra = False, extra_filenames = None): self.model.save_weights(filename) if save_extra: self.extra_models['mean_out'].save_weights(extra_filenames["mean_out"]) self.extra_models['var_out'].save_weights(extra_filenames["var_out"]) self.extra_models['samp_out'].save_weights(extra_filenames["samp_out"]) self.extra_models['disp_model'].save_weights(extra_filenames["disp_model"]) self.extra_models['decoder_mean'].save_weights(extra_filenames["decoder_mean"]) def predict_latent(self, X, B): latent_mean = self.extra_models['mean_out'].predict([X, B]) return latent_mean def predict_beta(self, X, B, sf): zmean = self.extra_models['mean_out'].predict([X, B]) X_lambda = self.extra_models['decoder_mean'].predict([zmean, B]) X_theta = self.extra_models['disp_model'].predict(B) X_lambda = (X_lambda.T * sf).T return X_lambda, X_theta def model_initialize(self, adata, epochs=300, batch_size=64, validation_split=0.1, shuffle=True, fit_verbose=1, lr_patience=1, lr_factor=0.1, lr_verbose=True, es_patience=2, es_verbose=True): callbacks = [] lr_cb = ReduceLROnPlateau(monitor='val_pred_loss', patience=lr_patience, factor=lr_factor, verbose=lr_verbose) callbacks.append(lr_cb) es_cb = EarlyStopping(monitor='val_pred_loss', patience=es_patience, verbose=es_verbose) callbacks.append(es_cb) z_blank = np.zeros((adata.n_obs, self.latent_k), dtype=np.float32) inputs = [z_blank, adata.X, adata.obsm['saver_batch'], np.log(adata.obs.size_factors), adata.obsm['saver_batch'], adata.raw.X, adata.obsm['saver_targetL']] outputs = [adata.raw.X, adata.raw.X, adata.obsm['saver_targetL']] loss = self.model.fit(inputs, outputs, epochs=epochs, batch_size=batch_size, shuffle=shuffle, callbacks=callbacks, validation_split=validation_split, verbose=fit_verbose) return loss def model_finetune(self, adata, epochs=300, batch_size=64, validation_split=0.1, shuffle=True, fit_verbose=1, lr_patience=4, lr_factor=0.1, lr_verbose=True, es_patience=6, es_verbose=True): callbacks = [] lr_cb = ReduceLROnPlateau(monitor='val_loss', patience=lr_patience, factor=lr_factor, verbose=lr_verbose) callbacks.append(lr_cb) es_cb = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=es_verbose) callbacks.append(es_cb) z_blank = np.zeros((adata.n_obs, self.latent_k), dtype=np.float32) inputs = [z_blank, adata.X, adata.obsm['saver_batch'], np.log(adata.obs.size_factors), adata.obsm['saver_batch'], adata.raw.X, adata.obsm['saver_targetL']] outputs = [adata.raw.X, adata.raw.X, adata.obsm['saver_targetL']] loss = self.model.fit(inputs, outputs, epochs=epochs, batch_size=batch_size, shuffle=shuffle, callbacks=callbacks, validation_split=validation_split, verbose=fit_verbose) return loss
class DeepCORAL: """ DeepCORAL: Deep CORrelation ALignment DeepCORAL is an extension of CORAL method. It learns a nonlinear transformation which aligns correlations of layer activations in deep neural networks. The method consist in training both an **encoder** and a **task** network. The **encoder** network maps input features into new encoded ones on which the **task** network is trained. The parameters of the two networks are optimized in order to minimize the following loss function: .. math:: \mathcal{L} = \mathcal{L}_{task} + \\lambda ||C_S - C_T||_F^2 Where: - :math:`\mathcal{L}_{task}` is the task loss computed with source and labeled target data. - :math:`C_S` is the correlation matrix of source data in the encoded feature space. - :math:`C_T` is the correlation matrix of target data in the encoded feature space. - :math:`||.||_F` is the Frobenius norm. - :math:`\\lambda` is a trade-off parameter. Thus the **encoder** network learn a new feature representation on wich the correlation matrixes of source and target data are "close" and where a **task** network is able to learn the task with source labeled data. Notice that DeepCORAL only uses labeled source and unlabeled target data. It belongs then to "unsupervised" domain adaptation methods. However, labeled target data can be added to the training process straightforwardly. Parameters ---------- get_encoder: callable, optional (default=None) Constructor for encoder network. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network and an ``output_shape`` argument giving the shape of the last layer. If ``None``, a shallow network with 10 neurons is used as encoder network. get_task: callable, optional (default=None) Constructor for task network. The constructor should return a tensorflow compiled Model. It should also take at least an ``input_shape`` argument giving the input shape of the network. If ``None``, a linear network is used as task network. lambdap : float, optional (default=1.0) Trade-Off parameter. enc_params: dict, optional (default=None) Additional arguments for ``get_encoder`` task_params: dict, optional (default=None) Additional arguments for ``get_task`` compil_params: key, value arguments, optional Additional arguments for network compiler (loss, optimizer...). If none, loss is set to ``"mean_squared_error"`` and optimizer to ``"adam"``. Attributes ---------- encoder_ : tensorflow Model Fitted encoder network. task_ : tensorflow Model Fitted task network. model_ : tensorflow Model Fitted model: the union of encoder and task networks. See also -------- CORAL References ---------- .. [1] `[1] <https://arxiv.org/pdf/1607.01719.pdf>`_ Sun B. and Saenko K. \ "Deep CORAL: correlation alignment for deep domain adaptation." In ICCV, 2016. """ def __init__(self, get_encoder=None, get_task=None, lambdap=1.0, enc_params=None, task_params=None, **compil_params): self.get_encoder = get_encoder self.get_task = get_task self.lambdap = lambdap self.enc_params = enc_params self.task_params = task_params self.compil_params = compil_params if self.get_encoder is None: self.get_encoder = get_default_encoder if self.get_task is None: self.get_task = get_default_task if self.enc_params is None: self.enc_params = {} if self.task_params is None: self.task_params = {} def fit(self, X, y, src_index, tgt_index, tgt_index_labeled=None, sample_weight=None, **fit_params): """ Fit encoder and task networks. Source data and unlabeled target data are used for the correlation alignment in the encoded space. Source data and labeled target data are used to learn the task. Parameters ---------- X : numpy array Input data. y : numpy array Output data. src_index : iterable indexes of source labeled data in X, y. tgt_index : iterable indexes of target unlabeled data in X, y. tgt_index_labeled : iterable, optional (default=None) indexes of target labeled data in X, y. sample_weight : numpy array, optional (default=None) Individual weights for each sample. fit_params : key, value arguments Arguments given to the fit method of the estimator (epochs, batch_size...). Returns ------- self : returns an instance of self """ check_indexes(src_index, tgt_index, tgt_index_labeled) self._create_model(X.shape[1:], y.shape[1:]) if tgt_index_labeled is None: task_index = src_index else: task_index = np.concatenate((src_index, tgt_index_labeled)) max_size = max((len(src_index), len(tgt_index), len(task_index))) resized_src_ind = np.resize(src_index, max_size) resized_tgt_ind = np.resize(tgt_index, max_size) resized_task_ind = np.resize(task_index, max_size) self.model_.fit([ X[resized_src_ind], X[resized_tgt_ind], X[resized_task_ind], y[resized_task_ind], np.ones(max_size) ], **fit_params) return self def predict(self, X): """ Return the prediction of task network on the encoded features. Parameters ---------- X: array input data Returns ------- y_pred: array prediction of task network """ return self.task_.predict(self.encoder_.predict(X)) def _create_model(self, shape_X, shape_y): self.encoder_ = self.get_encoder(input_shape=shape_X, **self.enc_params) self.task_ = self.get_task(input_shape=self.encoder_.output_shape[1:], output_shape=shape_y, **self.task_params) input_src = Input(shape_X) input_tgt = Input(shape_X) input_task = Input(shape_X) output_src = Input(shape_y) input_ones = Input((1, )) encoded_src = self.encoder_(input_src) encoded_tgt = self.encoder_(input_tgt) encoded_task = self.encoder_(input_task) tasked = self.task_(encoded_task) compil_params = copy.deepcopy(self.compil_params) if "loss" in compil_params: task_loss = K.mean(self.compil_params["loss"](output_src, tasked)) compil_params.pop('loss') else: task_loss = K.mean(losses.mean_squared_error(output_src, tasked)) ones_dot_encoded_src = K.dot(K.transpose(input_ones), encoded_src) corr_src = (1 / (K.sum(input_ones) - 1)) * ( K.dot(K.transpose(encoded_src), encoded_src) - (1 / K.sum(input_ones)) * K.dot(K.transpose(ones_dot_encoded_src), ones_dot_encoded_src)) ones_dot_encoded_tgt = K.dot(K.transpose(input_ones), encoded_tgt) corr_tgt = (1 / (K.sum(input_ones) - 1)) * ( K.dot(K.transpose(encoded_tgt), encoded_tgt) - (1 / K.sum(input_ones)) * K.dot(K.transpose(ones_dot_encoded_tgt), ones_dot_encoded_tgt)) corr_loss = (1. / 4.) * K.mean(K.square(corr_src - corr_tgt)) loss = task_loss + self.lambdap * corr_loss self.model_ = Model( [input_src, input_tgt, input_task, output_src, input_ones], [encoded_src, encoded_tgt, tasked], name="DeepCORAL") self.model_.add_loss(loss) if not "optimizer" in compil_params: compil_params["optimizer"] = "adam" self.model_.compile(**compil_params) return self
def create_model(x_train, x_test, y_test, encoding_dim, intermediate_dim, epochs): # --------------------- # Run Random Forest Classifier # --------------------- def run_rf(train_img, y_train, test_img, y_test, msg): clf = RandomForestClassifier(50) clf.fit(train_img, np.squeeze(y_train)) print(msg + 'Score') print(clf.score(test_img, np.squeeze(y_test))) return clf.score(test_img, np.squeeze(y_test)) def sampling(args): # construct the search space """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon original_dim = len(x_train[1]) input_shape = (original_dim, ) batch_size = 32 latent_dim = encoding_dim act_fncs = elu learn_rate = 0.0026607621768993824 lr_decay = 0.0021721614264192577 # network parameters # VAE model = encoder + decoder # build encoder model inputs = Input(shape=input_shape, name='encoder_input') x = Dense(intermediate_dim, activation=act_fncs)(inputs) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() # plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(intermediate_dim, activation=act_fncs)(latent_inputs) outputs = Dense(original_dim, activation=act_fncs)(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() # plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer=Adam(lr=learn_rate, decay=lr_decay), metrics=['accuracy']) vae.summary() vae.save_weights('VAE_weights.h5') train_steps = x_train.shape[0] // batch_size valid_steps = x_test.shape[0] // batch_size result = vae.fit(x_train, shuffle=True, epochs=epochs, verbose=1, batch_size=batch_size, validation_data=(x_test, None)) encoder = Model(inputs, z_mean) z_test = encoder.predict(x_test, batch_size=batch_size) train_img = encoder.predict(x_train, batch_size=batch_size) # take max validation accuracy as metric # Run Random Forest Classifier and plot result if n < 4 ok = run_rf(train_img, y_train, z_test, y_test, "VAE-RF ") return ok, z_test, train_img, result.history['loss'], result.history[ 'val_loss']