def get_model_and_train_step(): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = test_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(MAE()(targets, outputs)) model.add_loss(tf.reduce_mean(mae(targets, outputs))) return get_ctl_train_step(model)
def test_invalid_variable_input(self): inputs = Input(shape=(1,)) outputs = test_utils.Bias()(inputs) model = Model(inputs, outputs) with self.assertRaisesRegex( ValueError, 'Expected a symbolic Tensors or a callable for the loss value'): model.add_loss(model.weights[0])
def test_invalid_constant_input(self): inputs = Input(shape=(1, )) outputs = test_utils.Bias()(inputs) model = Model(inputs, outputs) with self.assertRaisesRegex( ValueError, "Expected a symbolic Tensors or a callable for the loss value", ): model.add_loss(1.0)
def __init__(self, shape, action_shape, latent): super().__init__(shape, action_shape) # define the VAE layers input_img = Input(shape=shape) encoded = autoencoder.build_encoder(shape, 0.1, input_img) variational, mu, sigma = autoencoder.build_variational(latent, encoded) # decoded = autoencoder.MyAutoencoder.build_decoder(shape, 0.1, variational, latent) decoded = autoencoder.build_decoder(shape, 0.1, variational, latent) # define autoencoder loss # Compute VAE loss dec_loss = metrics.mean_squared_error(K.flatten(input_img), K.flatten(decoded)) kl_loss = -0.5 * K.sum(1 + sigma - K.square(mu) - K.exp(sigma), axis=-1) vae_loss = K.mean(dec_loss + kl_loss) # vae_loss = metrics.mean_squared_error(input_img, decoded) # define the ae model ae = Model(input_img, decoded) ae.add_loss(vae_loss) ae.summary() opt = Adam(lr=0.0001, clipvalue=0.5) # opt = SGD(lr=0.5, momentum=.9, clipvalue=0.5) ae.compile(optimizer=opt, loss=None, metrics=['accuracy']) # loss of None for compatibility self.ae = ae # define the model layers prev_action = Input((action_shape, ), name="Prev_Action") # define the control layers control_input = Concatenate() control = Dense(1024, activation='elu', name="Control") actions = Dense(action_shape, name="Actions") # define the full model full_input = control_input([prev_action, variational]) full_control = control(full_input) full_actions = actions(full_control) full = Model([input_img, prev_action], [decoded, variational, full_actions]) full.summary() full.compile(opt, loss='mean_squared_error') self.full = full # define the controller training model latent_input = Input((latent, )) train_input = control_input([prev_action, latent_input]) control_train = control(train_input) train_actions = actions(control_train) c_train = Model([prev_action, latent_input], train_actions) c_train.summary() c_train.compile(opt, loss='mean_squared_error') self.c_train = c_train
def main(): encoder, _, vae = create_models() encoder.load_weights('encoder-trained.h5') encoder.trainable = False seg = create_model() x = Input(shape=(64, 64, 3), name='input_image') mask = seg(x) y = concatenate([x, mask]) z_mean, z_log_var = encoder(y) kl_loss = K.mean( -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)) model = Model(x, [z_mean, z_log_var]) model.add_loss(kl_loss) model.compile('nadam') # model.load_weights('segnet.010.h5') model.summary() ck = ModelCheckpoint('segnet.{epoch:02d}.h5', save_weights_only=True) data = loader(False) model.fit_generator(data, 1000, 100, callbacks=[ck]) # Test code samples = 5 data = loader(True, samples) import matplotlib.pyplot as plt d, gt = next(data) d = d[:, :, :, :3] q = seg.predict_on_batch(d) for i in range(len(d)): p = i * 3 plt.subplot(samples, 3, p + 1) plt.imshow(d[i].squeeze(), cmap='gray') plt.subplot(samples, 3, p + 2) plt.imshow(gt[i].squeeze(), cmap='gray') plt.subplot(samples, 3, p + 3) plt.imshow(q[i].squeeze(), cmap='gray') plt.show()
def get_model_and_train_step(): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) def callable_loss(): return tf.reduce_sum(model.weights) model.add_loss(callable_loss) return get_ctl_train_step(model)
def test_add_entropy_loss_on_functional_model(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(losses.binary_crossentropy(targets, outputs)) model.compile('sgd', run_eagerly=testing_utils.should_run_eagerly()) with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log: model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertNotIn('Gradients do not exist for variables', str(mock_log.call_args))
def compile_model(model,lambda_smoothness = 0, lambda_flow=0.0001, lambda_mse=0, occ_punishment = 0): i1=model.inputs[0] i2=model.inputs[1] o1=model.outputs[0] o2 = image_warp(-o1,o1) oxf, oxb = mask(i1,i2,o1,o2) mask_f = oxf[:,:,:,0] mask_b = oxb[:,:,:,1] err_f, err_b = photometric_error(i1,i2,o1,o2) flow_f, flow_b = flow_error(o1,o2) ###--------Occlusion_aware_mse_rec_image------------------------------------- occ_loss1 = (tf.reduce_sum(tf.boolean_mask(charbonnier(err_f), mask_f)))#/(436*1024) occ_loss2 = (tf.reduce_sum(tf.boolean_mask(charbonnier(err_b), mask_b)))#/(436*1024) occ_loss = (occ_loss1 + occ_loss2)*lambda_mse ###--------Occlusion_aware_mse_flow------------------------------------ flow_loss1 = tf.reduce_sum(tf.boolean_mask(charbonnier(flow_f), mask_f)) flow_loss2 = tf.reduce_sum(tf.boolean_mask(charbonnier(flow_b), mask_f)) flow_loss = (flow_loss1 + flow_loss2)*lambda_flow ###--------Punishment_for_occlusion----------------------------------------- occ_punish1 = tf.multiply(tf.reduce_sum(tf.cast(mask_f, tf.float32)),occ_punishment) occ_punish2 = tf.multiply(tf.reduce_sum(tf.cast(mask_b, tf.float32)),occ_punishment) occ_punish = occ_punish1 + occ_punish2 ###--------Gradient_smoothness-------------------------------------------- ux,uy=grad_xy(o1[:,:,:,:1]) vx,vy=grad_xy(o1[:,:,:,1:2]) sm_loss_o1 = K.mean(K.abs(ux*ux)+ K.abs(uy*uy)+ K.abs(vx*vx)+ K.abs(vy*vy)) ux,uy=grad_xy(o2[:,:,:,:1]) vx,vy=grad_xy(o2[:,:,:,1:2]) sm_loss_o2 = K.mean(K.abs(ux*ux)+ K.abs(uy*uy)+ K.abs(vx*vx)+ K.abs(vy*vy)) sm_loss = (sm_loss_o1 + sm_loss_o2)*lambda_smoothness ### Reconstruction_loss_ssim_(occlusion_not considered) i2_rec=image_warp(i1,o1) i1_rec=image_warp(i2,o2) re_loss1=DSSIMObjective(kernel_size=50)(i2,i2_rec) re_loss2=DSSIMObjective(kernel_size=50)(i1,i1_rec) re_loss_ssim = re_loss1 + re_loss2 total_loss = sm_loss + occ_loss + occ_punish + re_loss_ssim + flow_loss model = Model(inputs=[i1,i2], outputs=[o1]) model.add_loss(total_loss) model.compile(optimizer=keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)) return model
def compile_model(model, lambda1=0.005): s1 = tf.get_variable("sig1", trainable=True, initializer=tf.constant([0.3])) s2 = tf.get_variable("sig2", trainable=True, initializer=tf.constant([0.7])) s1_2 = s1 * s1 s2_2 = s2 * s2 I1 = model.inputs[0] I2 = model.inputs[1] o1 = model.outputs[0] I2_rec = image_warp(I1, o1) I1_rec = image_warp(I2, -o1) ux, uy = grad_xy(o1[:, :, :, :1]) vx, vy = grad_xy(o1[:, :, :, 1:2]) sm_loss = (K.mean( K.abs(ux * ux) + K.abs(uy * uy) + K.abs(vx * vx) + K.abs(vy * vy))) # re_loss_mse = K.mean(K.square(I2 - input1_rec)) re_loss1 = DSSIMObjective(kernel_size=50)(I2, I2_rec) re_loss2 = DSSIMObjective(kernel_size=50)(I1, I1_rec) ############################################### # input1_rec=image_warp(model.inputs[0],model.outputs[0],num_batch=b_size) # input0_rec=image_warp(model.inputs[1],-model.outputs[0],num_batch=b_size) # ux,uy=grad_xy(model.outputs[0][:,:,:,:1]) # vx,vy=grad_xy(model.outputs[0][:,:,:,1:2]) # sm_loss=lambda1*(K.mean(K.abs(ux*ux)+ K.abs(uy*uy)+ K.abs(vx*vx)+ K.abs(vy*vy))) # re_loss=DSSIMObjective(kernel_size=50)(model.inputs[1],input1_rec) ################################################ # loss_mse = K.mean(K.square(model.outputs[0] - Y)) re_loss = re_loss1 + re_loss2 total_loss = (1 / s1_2) * re_loss + (1 / s2_2) * sm_loss + K.log( s1_2) + K.log(s2_2) model = Model(inputs=[I1, I2], outputs=[o1]) model.add_loss(total_loss) model.compile(loss="mse", optimizer=keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)) #model.compile(optimizer='rmsprop') return model
def test_loss_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(MAE()(targets, outputs)) model.add_loss(tf.reduce_mean(mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.05), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
def draw(self): bert = self.load_bert() # Model Configuration input_tokens = Input(shape=(None, )) input_segments = Input(shape=(None, )) input_head_edge = Input(shape=(None, )) input_tail_edge = Input(shape=(None, )) # Set the mask layer mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(input_tokens) # Add the layers embedding = bert([input_tokens, input_segments]) pred_head = Dense(1, use_bias=False)(embedding) pred_head = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [pred_head, mask]) pred_tail = Dense(1, use_bias=False)(embedding) pred_tail = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [pred_tail, mask]) # Build the model for predictions pred_model = Model(inputs=[input_tokens, input_segments], outputs=[pred_head, pred_tail]) # Build the model for training train_model = Model(inputs=[ input_tokens, input_segments, input_head_edge, input_tail_edge ], outputs=[pred_head, pred_tail]) # Compute the loss loss_head = K.mean( K.categorical_crossentropy(input_head_edge, pred_head, from_logits=True)) pred_tail -= (1 - K.cumsum(input_head_edge, 1)) * 1e10 loss_tail = K.mean( K.categorical_crossentropy(input_tail_edge, pred_tail, from_logits=True)) loss = loss_head + loss_tail train_model.add_loss(loss) # Build the model train_model.compile(optimizer=Adam(LEARNING_RATE)) train_model.summary() return pred_model, train_model
def test_loss_with_sample_weight_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) sw = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets, sw], outputs) model.add_loss(MAE()(targets, outputs, sw)) model.add_loss(3 * tf.reduce_mean(sw * mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.025), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
def get_model(num_user, num_item, hidden_dim, latent_dim, stddev): x = Input(shape=(num_item, )) y = Input(shape=(num_user, )) #r = Input(shape=(1,)) x_h = Dense(hidden_dim, activation="relu")(x) y_h = Dense(hidden_dim, activation="relu")(y) x_z_mean = Dense(latent_dim)(x_h) x_z_log_var = Dense(latent_dim)(x_h) y_z_mean = Dense(latent_dim)(y_h) y_z_log_var = Dense(latent_dim)(y_h) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=stddev) return z_mean + K.exp(z_log_var / 2) * epsilon x_z = Lambda(sampling, output_shape=(latent_dim, ))([x_z_mean, x_z_log_var]) y_z = Lambda(sampling, output_shape=(latent_dim, ))([y_z_mean, y_z_log_var]) x_d_h = Dense(hidden_dim, activation="relu")(x_z) x_d = Dense(num_item, activation="sigmoid")(x_d_h) y_d_h = Dense(hidden_dim, activation="relu")(y_z) y_d = Dense(num_user, activation="sigmoid")(y_d_h) x_mask = K.cast(x > 0, "float") y_mask = K.cast(y > 0, "float") x_y = Concatenate(axis=-1)([x_z, y_z]) f_h = Dense(latent_dim, activation="relu")(x_y) f = Dense(1, activation="sigmoid")(f_h) xent_loss = num_item * metrics.binary_crossentropy(x, x_mask * x_d) yent_loss = num_user * metrics.binary_crossentropy(y, y_mask * y_d) xkl_loss = -0.5 * K.sum( 1 + x_z_log_var - K.square(x_z_mean) - K.exp(x_z_log_var), axis=-1) ykl_loss = -0.5 * K.sum( 1 + y_z_log_var - K.square(y_z_mean) - K.exp(y_z_log_var), axis=-1) #rent_loss = metrics.binary_crossentropy(r,f) loss = K.mean(xent_loss + xkl_loss + yent_loss + ykl_loss) model = Model([x, y], f) model.add_loss(loss) model.compile(optimizer="adam", loss="binary_crossentropy") model.summary() return model
def compile_model(model, lambda1=0.05): s1 = tf.get_variable("sig1", trainable=True, initializer=tf.constant([0.3])) s2 = tf.get_variable("sig2", trainable=True, initializer=tf.constant([0.7])) s1_2 = s1 * s1 s2_2 = s1 * s1 I1 = model.inputs[0] I2 = model.inputs[1] o1 = model.outputs[0] # this is to calculate the inverse_warp o2 = image_warp(-o1, o1) I2_rec = image_warp(I1, o1) I1_rec = image_warp(I2, o2) ux, uy = grad_xy(o1[:, :, :, :1]) vx, vy = grad_xy(o1[:, :, :, 1:2]) sm_loss = (K.mean( K.abs(ux * ux) + K.abs(uy * uy) + K.abs(vx * vx) + K.abs(vy * vy))) # re_loss_mse = K.mean(K.square(I2 - input1_rec)) re_loss1 = DSSIMObjective(kernel_size=50)(I2, I2_rec) re_loss2 = DSSIMObjective(kernel_size=50)(I1, I1_rec) re_loss = re_loss1 + re_loss2 total_loss = (1 / s1_2) * re_loss + (1 / s2_2) * sm_loss + K.log( s1_2) + K.log(s2_2) model = Model(inputs=[I1, I2], outputs=[o1]) model.add_loss(total_loss) model.compile(optimizer=keras.optimizers.Adadelta( lr=1.0, rho=0.95, epsilon=None, decay=0.0)) return model
def __build_model(self): src_seq_input = Input(shape=(None, ), name="src_seq_input", dtype='int32') tgt_seq_input = Input(shape=(None, ), name="tgt_seq_input", dtype='int32') src_seq = src_seq_input tgt_seq = Lambda(lambda x: x[:, :-1], name="tgt_seq")(tgt_seq_input) tgt_true = Lambda(lambda x: x[:, 1:], name="tgt_true")(tgt_seq_input) context_attn_mask = Lambda(lambda x: padding_mask(x[0], x[1]), name="context_attn_mask")( [tgt_seq, src_seq]) # (N, T_t, T_s) enc_output, enc_self_attn = self.encoder( src_seq) # (N, T_s, dim_model) output, dec_self_attn, ctx_attn = self.decoder(tgt_seq, enc_output, context_attn_mask) final_output = self.linear(output) y_pred = self.softmax(final_output) loss = Lambda(_get_loss, name="loss")([final_output, tgt_true]) ppl = Lambda(K.exp)(loss) accuracy = Lambda(_get_accuracy, name="accuracy")([final_output, tgt_true]) pred_model = Model([src_seq_input, tgt_seq_input], y_pred) train_model = Model([src_seq_input, tgt_seq_input], loss) train_model.add_loss([loss]) train_model.compile(self.optimizer, None) train_model.metrics_names.append('ppl') train_model.metrics_tensors.append(ppl) train_model.metrics_names.append('accuracy') train_model.metrics_tensors.append(accuracy) return pred_model, train_model
def VAE(original_dim, hidden_dim=128, encoder_dim=16): # encoder inputs = Input(shape=(original_dim, )) hidden_layer1 = Dense(hidden_dim, activation='relu')(inputs) encoded = Dense(encoder_dim, activation='relu')(hidden_layer1) z_mean = Dense(encoder_dim)(encoded) z_log_var = Dense(encoder_dim)(encoded) #使用均值变量(mean vector)和标准差变量(standard deviation vector)合成隐变量 def sampling(args): z_mean, z_log_var = args #使用标准正态分布初始化 epsilon = K.random_normal(shape=(K.shape(z_mean)[0], hidden_dim), mean=0., stddev=1.0) #合成公式 return z_mean + K.exp(z_log_var / 2) * epsilon z = Lambda(sampling, output_shape=(hidden_dim, ))([z_mean, z_log_var]) encoder = Model(inputs=inputs, outputs=z) encoder.summary() # decoder hidden_layer2 = Dense(hidden_dim, activation='relu')(z) decoded = Dense(original_dim, activation='tanh')(hidden_layer2) decoder = Model(inputs=encoded, outputs=decoded) decoder.summary() # vae vae = Model(inputs, decoded) reconstruction_loss = original_dim * binary_crossentropy(inputs, decoded) kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = -0.5 * K.sum(kl_loss, axis=-1) vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True)
def get_distillation_encoder(input_shape, encode_shape, loss_type, complexity, conv_regularizer, conv_constraint): inputs = Input(input_shape, name='inputs') linear_encode = get_linear_encoder(inputs, encode_shape, conv_regularizer, conv_constraint) nonlinear_encode = get_nonlinear_encoder(inputs, input_shape, encode_shape, 'fix_', complexity, None, None, None, None) model = Model(inputs=inputs, outputs=[linear_encode, nonlinear_encode]) if loss_type == 'mse': reconstruction_loss = K.mean(keras_mse(linear_encode, nonlinear_encode)) else: reconstruction_loss = K.mean(keras_mae(linear_encode, nonlinear_encode)) model.add_loss(reconstruction_loss) return model
def Model(self): # Input to the model: (batch_size, None, None, channels) # None corresponds to any dimension noisy = Input(shape=(None, None, self.settings['channels']), name='Noisy') clean = Input(shape=(None, None, self.settings['channels']), name='Clean') x = noisy # For each stage of the model for i in range(self.settings['stages']): u_t_1 = x # Get data fidelity term: (u_{t_1} - f) df = Subtract(name='DataFidelityLayer' + str(i + 1))([u_t_1, noisy]) # Scale data fidelity term with lambda weight scaled_df = Scalar_Multiply(learn_scalar=True, scalar_init=0.1, name='ScaleFidelity' + str(i + 1))(df) # Apply kappa_L: K_L(u_{t_1}) inference = TNRD_Inference(filters=self.settings['filters'], kernel_size=self.settings['kernel_size'], name='Inference' + str(i + 1))(u_t_1) # Add data fidelity term: K_L(u_{t_1}) + \lambda(u_{t_1} - f) x = Add(name='AddFidelity' + str(i + 1))([inference, scaled_df]) # Apply Descent step to finish TNRD stage: u_t_1 - {K_L(u_{t_1}) + \lambda(u_{t_1} - f)} x = Subtract(name='Descent' + str(i + 1))([u_t_1, x]) model = Model(inputs=[noisy,clean], outputs=x) # Add the sum_squared_error_loss model.add_loss(sum_squared_error_loss(clean,x)) return model
def __build_model(self): assert self.max_depth >= 1, "The parameter max_depth is at least 1" src_seq_input = Input(shape=(self.max_seq_len, ), dtype="int32", name="src_seq_input") mask = Lambda(lambda x: padding_mask(x, x))(src_seq_input) emb_output = self.__input(src_seq_input) enc_output = self.__encoder(emb_output, mask) if self.use_crf: crf = CRF(self.tgt_vocab_size + 1, sparse_target=self.sparse_target) y_pred = crf(self.__output(enc_output)) else: y_pred = self.__output(enc_output) model = Model(inputs=[src_seq_input], outputs=[y_pred]) parallel_model = model if self.num_gpu > 1: parallel_model = multi_gpu_model(model, gpus=self.num_gpu) if self.use_crf: parallel_model.compile(self.optimizer, loss=crf_loss, metrics=[crf_accuracy]) else: confidence_penalty = K.mean(self.confidence_penalty_weight * K.sum(y_pred * K.log(y_pred), axis=-1)) model.add_loss(confidence_penalty) parallel_model.compile(optimizer=self.optimizer, loss=categorical_crossentropy, metrics=['accuracy']) return model, parallel_model
def create_models(): n_channels = 3 + 1 image_shape = (64, 64, n_channels) n_encoder = 1024 latent_dim = 128 decode_from_shape = (8, 8, 256) n_decoder = np.prod(decode_from_shape) leaky_relu_alpha = 0.2 def conv_block(x, filters, leaky=True, transpose=False, name=''): conv = Conv2DTranspose if transpose else Conv2D activation = LeakyReLU(leaky_relu_alpha) if leaky else Activation( 'relu') layers = [ conv(filters, 5, strides=2, padding='same', name=name + 'conv'), BatchNormalization(name=name + 'bn'), activation ] if x is None: return layers for layer in layers: x = layer(x) return x # Encoder def create_encoder(): x = Input(shape=image_shape, name='enc_input') y = conv_block(x, 64, name='enc_blk_1_') y = conv_block(y, 128, name='enc_blk_2_') y = conv_block(y, 256, name='enc_blk_3_') y = Flatten()(y) y = Dense(n_encoder, name='enc_h_dense')(y) y = BatchNormalization(name='enc_h_bn')(y) y = LeakyReLU(leaky_relu_alpha)(y) z_mean = Dense(latent_dim, name='z_mean')(y) z_log_var = Dense(latent_dim, name='z_log_var')(y) return Model(x, [z_mean, z_log_var], name='encoder') # Decoder decoder = Sequential([ Dense(n_decoder, input_shape=(latent_dim, ), name='dec_h_dense'), BatchNormalization(name='dec_h_bn'), LeakyReLU(leaky_relu_alpha), Reshape(decode_from_shape), *conv_block(None, 256, transpose=True, name='dec_blk_1_'), *conv_block(None, 128, transpose=True, name='dec_blk_2_'), *conv_block(None, 32, transpose=True, name='dec_blk_3_'), Conv2D(1, 5, activation='sigmoid', padding='same', name='dec_output') ], name='decoder') def _sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. Instead of sampling from Q(z|X), sample eps = N(0,I) # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon sampler = Lambda(_sampling, output_shape=(latent_dim, ), name='sampler') encoder = create_encoder() # Build graph x = Input(shape=image_shape, name='input_image') z_mean, z_log_var = encoder(x) z = sampler([z_mean, z_log_var]) y = decoder(z) vae = Model(x, y, name='vae') # KL divergence loss kl_loss = K.mean( -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)) vae.add_loss(kl_loss) return encoder, decoder, vae
gen = gen_data(conf.batch_size, 'train') print([i.shape for i in next(gen)[0]]) num_anchor = conf.num_anchor x_in = Input([conf.net_in_size, conf.net_in_size, 3], name='image_array') y_e_reg = Input((num_anchor, 4), name='y_e_reg') y_e_cls = Input((num_anchor, ), name='y_e_cls') y_o_reg = Input((num_anchor, 4), name='y_o_reg') y_o_cls = Input((num_anchor, ), name='y_o_cls') fs_cls, fs_regr, ss_cls, ss_regr, pal = train_net(x_in, y_e_reg, y_e_cls, y_o_reg, y_o_cls) model = Model(inputs=[x_in, y_e_reg, y_e_cls, y_o_reg, y_o_cls], outputs=[fs_cls, fs_regr, ss_cls, ss_regr, pal]) loss_name = 'PAL' layer = model.get_layer(loss_name) loss = layer.output model.add_loss(loss) model.summary() if conf.continue_training: print('loading trained weights from..', conf.weights_to_transfer) model.load_weights(conf.weights_to_transfer, by_name=True) model.compile(optimizer=SGD(lr=conf.lr, decay=conf.weight_decay, momentum=0.9), loss=None) model.fit_generator(generator=gen, validation_steps=2, steps_per_epoch=conf.steps_per_epoch, epochs=conf.epochs, callbacks=callback)
x = GlobalAveragePooling1D()(x) x = Concatenate()([x, opinion_vec_ori]) c_out = Dense(len(cp2id), activation='softmax', name='cp_out_Dense')(x) a_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], a_out) cp_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], c_out) train_model = Model( [x1_in, x2_in, seq_a_in, opinion_mask_in, lf_pos_in, rt_pos_in, c_in], [a_out, c_out]) loss_c = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_p')([c_in, c_out]) train_model.add_loss(loss_A) train_model.add_loss(loss_c) total_steps, warmup_steps = calc_train_steps( num_example=train_data[0].shape[0], batch_size=BATCH_SIZE, epochs=100, warmup_proportion=0.05, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6) train_model.compile(optimizer=optimizer) train_model.metrics_tensors.append(loss_A) train_model.metrics_names.append('loss_A')
def main(): seq_id, seq_O, seq_P, id_to_label, id_to_term = encode_seq( df_label=df_label, maxlen=MAX_LEN) class Evaluation(Callback): def __init__(self, val_data, interval=1): self.val_data = val_data self.interval = interval self.best_f1 = 0. self.true_vp_val = [ (row["id"], row["OpinionTerms"], row["Polarities"], row['O_start'], row['O_end']) for rowid, row in df_label[ df_label['id'].isin(self.val_data[0])].iterrows() ] def on_epoch_end(self, epoch, log={}): if epoch % self.interval == 0: o_out, p_out = pred_model.predict( self.val_data[1:4], batch_size=BATCH_SIZE) # CRF概率 o_pred = np.argmax(o_out, axis=2) p_pred = np.argmax(p_out, axis=2) texts = [ df_review[df_review['id'] == i]["Reviews"].values[0] for i in self.val_data[0] ] pred_vp_val = decode_seq(self.val_data[0], o_pred, p_pred, id_to_label, texts) precision, recall, f1 = cal_opinion_metrics( pred_vp_val, self.true_vp_val) if f1 > self.best_f1: self.best_f1 = f1 self.model.save_weights( f'./model_op/op_model_0924_viteb.weights') print(f'best = {f1}') tokenizer = BertTokenizer(token_dict) seq_input, seq_seg = bert_text_to_seq(list(df_review["Reviews"]), tokenizer, maxlen=MAX_LEN) true_vp = [(row["id"], row["OpinionTerms"], row["Polarities"], row['O_start'], row['O_end']) for rowid, row in df_label.iterrows()] pred_vp = decode_seq(seq_id, seq_O, seq_P, id_to_label, list(df_review["Reviews"])) cal_opinion_metrics(pred_vp, true_vp) seq_O = to_categorical(seq_O) seq_P = to_categorical(seq_P) df_review['pos_tag'] = df_review['Reviews'].progress_apply(pos_tag) with open('./data/postag2id_0922_laptop_make_up.pkl', 'rb') as f: postag2id = pickle.load(f) df_review['pos_tag'] = df_review['pos_tag'].progress_apply( lambda postag: [postag2id[x] for x in postag]) seq_postag = np.array(df_review['pos_tag'].values.tolist()) view_train, view_val = split_viewpoints(seq_id, seq_input, seq_seg, seq_O, seq_P, seq_postag) print(view_val[0]) print('------------------- 保存验证集的id ---------------------') print('保存final 验证集的val ids') # np.save('./data/final_makeup_laptop_val_ids', view_val[0]) print('------------------- 保存完毕 ---------------------------') # exit() bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(MAX_LEN, ), name='x1_in') x2_in = Input(shape=(MAX_LEN, ), name='x2_in') o_in = Input(shape=( MAX_LEN, len(id_to_term) + 1, ), name='o_in') p_in = Input(shape=( MAX_LEN, len(id_to_label) + 1, ), name='p_in') pos_tag_in = Input(shape=(MAX_LEN, ), name='pos_tag_in') pos_tag_emb = Embedding(len(postag2id), POS_TAG_DIM, trainable=True)(pos_tag_in) x = bert_model([x1_in, x2_in]) x = Concatenate()([x, pos_tag_emb]) p_out = Dense(len(id_to_label) + 1, activation='softmax')(x) # p_out 是极性的输出 crf = CRF(len(id_to_term) + 1) o_out = crf(x) loss_seq_O = crf.loss_function(o_in, o_out) # 直接加入 Lambda层后 计算图会出错 loss_seq_O = Lambda(lambda x: K.mean(x))(loss_seq_O) # loss_seq_O = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_seq_O')([o_in, o_out]) loss_p = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_c')([p_in, p_out]) train_model = Model([x1_in, x2_in, pos_tag_in, o_in, p_in], [o_out, p_out]) pred_model = Model([x1_in, x2_in, pos_tag_in], [o_out, p_out]) train_model._losses = [] train_model._per_input_losses = {} train_model.add_loss(loss_seq_O) train_model.add_loss(loss_p) print(view_train[0].shape[0]) total_steps, warmup_steps = calc_train_steps( num_example=view_train[0].shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, warmup_proportion=0.1, ) # optimizer = Adam(lr=1e-5) optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-6) train_model.compile(optimizer=optimizer) train_model.metrics_tensors.append(loss_seq_O) train_model.metrics_names.append('loss_seq_O') train_model.metrics_tensors.append(loss_p) train_model.metrics_names.append('loss_p') train_model.summary() eval_callback = Evaluation(val_data=view_val) train_model.fit(view_train[1:], epochs=EPOCHS, shuffle=True, batch_size=BATCH_SIZE, callbacks=[eval_callback])
def main(): encoder, decoder, discriminator, vae, vae_loss = create_models() # # encoder.compile('rmsprop', 'mse') # # x = np.random.uniform(-1.0, 1.0, size=[1, 64, 64, 1]) # y1 = np.random.uniform(-1.0, 1.0, size=[1, 128]) # y2 = np.random.uniform(-1.0, 1.0, size=[1, 128]) # # encoder.fit(x, [y1, y2], callbacks=[TensorBoard()]) # # return batch_size = 32 (x_train, y_train), (x_test, y_test) = mnist.load_data() # Resize to 64x64 x_train_new = np.zeros((x_train.shape[0], 64, 64), dtype='int32') for i, img in enumerate(x_train): x_train_new[i] = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC) x_train = x_train_new del x_train_new # Normalize to [-1, 1] #x_train = np.pad(x_train, ((0, 0), (18, 18), (18, 18)), mode='constant', constant_values=0) x_train = np.expand_dims(x_train, -1) x_train = (x_train.astype('float32') - 127.5) / 127.5 x_train = np.clip(x_train, -1., 1.) # Assume images in x_train # x_train = np.zeros((100, 64, 64, 3)) discriminator.compile('rmsprop', 'binary_crossentropy', ['accuracy']) discriminator.trainable = False model = Model(vae.inputs, discriminator(vae.outputs), name='vaegan') model.add_loss(vae_loss) model.compile('rmsprop', 'binary_crossentropy', ['accuracy']) import keras.callbacks as cbks import os.path verbose = True checkpoint = cbks.ModelCheckpoint(os.path.join('.', 'model.{epoch:02d}.h5'), save_weights_only=True) callbacks = [TensorBoard(batch_size=batch_size), checkpoint] epochs = 100 steps_per_epoch = x_train.shape[0] // batch_size do_validation = False callback_metrics = [ 'disc_loss', 'disc_accuracy', 'vaegan_loss', 'vaegan_accuracy' ] model.history = cbks.History() callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history] if verbose: callbacks += [cbks.ProgbarLogger(count_mode='steps')] callbacks = cbks.CallbackList(callbacks) # it's possible to callback a different model than self: if hasattr(model, 'callback_model') and model.callback_model: callback_model = model.callback_model else: callback_model = model callbacks.set_model(callback_model) callbacks.set_params({ 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics, }) callbacks.on_train_begin() epoch_logs = {} for epoch in range(epochs): callbacks.on_epoch_begin(epoch) for batch_index in range(steps_per_epoch): batch_logs = {} batch_logs['batch'] = batch_index batch_logs['size'] = batch_size callbacks.on_batch_begin(batch_index, batch_logs) rand_indexes = np.random.randint(0, x_train.shape[0], size=batch_size) real_images = x_train[rand_indexes] fake_images = vae.predict(real_images) # print(fake_images.shape) half_batch = batch_size // 2 inputs = np.concatenate( [real_images[:half_batch], fake_images[:half_batch]]) # Label real and fake images y = np.ones([batch_size, 1], dtype='float32') y[half_batch:, :] = 0 # Train the Discriminator network metrics = discriminator.train_on_batch(inputs, y) # print('discriminator', metrics) y = np.ones([batch_size, 1], dtype='float32') vg_metrics = model.train_on_batch(fake_images, y) # print('full', metrics) batch_logs['disc_loss'] = metrics[0] batch_logs['disc_accuracy'] = metrics[1] batch_logs['vaegan_loss'] = vg_metrics[0] batch_logs['vaegan_accuracy'] = vg_metrics[1] callbacks.on_batch_end(batch_index, batch_logs) callbacks.on_epoch_end(epoch, epoch_logs) rand_indexes = np.random.randint(0, x_train.shape[0], size=1) real_images = x_train[rand_indexes] model.save_weights('trained.h5') a = encoder.predict(real_images) print(a)
def call(self, inputs): z = inputs # z.shape=(batch_size, latent_dim) z = K.expand_dims(z, 1) return z - K.expand_dims(self.mean, 0) def compute_output_shape(self, input_shape): return (None, self.num_classes, input_shape[-1]) gaussian = Gaussian(num_classes, name='priors') z_prior_mean = gaussian(z) clvae = Model([x, y_in], [x_recon, z_prior_mean]) z_mean = K.expand_dims(z_mean, 1) z_log_var = K.expand_dims(z_log_var, 1) lamb = 0.5 xent_loss = 0.5 * K.mean((x - x_recon)**2, 0) kl_loss = - 0.5 * (z_log_var - K.square(z_prior_mean)) kl_loss = K.mean(K.batch_dot(K.expand_dims(y_in, 1), kl_loss), 0) clvae_loss = lamb * K.sum(xent_loss) + K.sum(kl_loss) clvae.add_loss(clvae_loss) clvae.compile(optimizer='adam') clvae.summary() clvae_history = clvae.fit([x_train, to_categorical(y_train)], shuffle=True, epochs=epochs, batch_size=batch_size, validation_data=([x_test, to_categorical(y_test)], None))
class VO: def __init__(self, input_shape, mode='train'): self.model = None self.input_shape = input_shape self.depth_shape = input_shape[:3] self.frame0 = None # 当前帧 self.frame1 = None # 前一帧 self.intrinsic = None self.mode = mode def build(self, frozen=False, separated_weights=False): self.intrinsic = get_intrinsics(conf['intrinsics']) frame0 = Input(batch_shape=self.input_shape, name='img0') frame1 = Input(batch_shape=self.input_shape, name='img1') dep = DepthModel(input_tensor=frame1, mode='single') dep.build() if frozen: for l in dep.model.layers: l.trainable = False odo = OdometryModel(inputs=[frame0, frame1]) odo.build() if separated_weights: dep.load_weights(conf['depth_weights']) odo.load_weights(conf['pose_weights']) print('weights_loaded') depthmap = dep.model.output pose = odo.model.output mat = Lambda(vec2mat, name='euler2mat')(pose) img_syn = Lambda(self.img_syn, name='synthesis' )([frame1, depthmap, mat]) inputs = [frame0, frame1] if self.mode is 'train': syn_loss = Lambda(self.syn_loss, name='syn_loss' )([frame0, img_syn]) # smo_loss = Lambda(self.smo_loss, # name='smo_loss')(depthmap) outputs = [depthmap, pose, syn_loss] # ,smo_loss] else: outputs = [depthmap, pose] self.model = Model(inputs=inputs, outputs=outputs) def compile(self): syn_loss = self.model.get_layer('syn_loss').output # smo_loss = self.model.get_layer('smo_loss').output self.model.add_loss(syn_loss) #self.model.add_loss(smo_loss) adam = Adam(lr=0.01) self.model.compile(optimizer=adam, loss=[None] * len(self.model.outputs), ) def load_weights(self, paths): for file in paths: self.model.load_weights(file, by_name=True) def img_syn(self, inputs): img_src, depth, pose = inputs depth = K.clip(depth, 1e-6, 1e6) depth = 1.0 / depth img_tgt = synthesis(img_src, depth, pose, self.input_shape, self.intrinsic) return img_tgt def smo_loss(self, depthmap): loss_smo = smoothness(depthmap) return loss_smo def syn_loss(self, inputs): img_tgt, img_syn = inputs img_tgt_cropped = K.slice(img_tgt, (0, 40, 40, 0), (-1, 400, 560, -1)) img_syn_cropped = K.slice(img_syn, (0, 40, 40, 0), (-1, 400, 560, -1)) loss = K.mean(mean_absolute_error(img_tgt_cropped, img_syn_cropped)) return loss def _expand(self, x, shape): x = K.expand_dims(x, axis=1) ones = K.zeros_like(x) # shape=K.cast(shape,'float32') times = K.cast(shape[1] * shape[2] / 16 - 1, 'float32') ones = K.tile(ones, [1, times, 1, 1]) expanded = K.concatenate([x, ones], axis=1) expanded = K.reshape(expanded, shape) expanded = K.expand_dims(expanded) return expanded def model_from_file(self, model_file): self.model = model_from_file(model_file) def save_as_json(self, path): save_model(path, self.model)
x2 = Dense(char_size, use_bias=False, activation='tanh')(x) ps2 = Dense(1, use_bias=False)(x2) ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps2, x_mask]) model = Model([x_in, c_in], [ps1, ps2]) train_model = Model([x_in, c_in, s1_in, s2_in], [ps1, ps2]) loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True)) # categorical_crossentropy输出张量与目标张量之间的分类交叉熵。 # mean 张量在某一指定轴的均值。 loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True)) loss = loss1 + loss2 train_model.add_loss(loss) # 指定自定义的损失函数,通过调用 self.add_loss(loss_tensor) train_model.compile(optimizer=Adam(1e-3)) # compile用于配置训练模型。 optimizer: 字符串(优化器名)或者优化器实例。 train_model.summary() # model.summary() 打印出模型概述信息。 def extract_entity(text_in, c_in): """解码函数,应自行添加更多规则,保证解码出来的是一个公司名 """ if c_in not in class2id: return 'NaN' _x = [char2id.get(c, 1) for c in text_in] _x = np.array([_x]) _c = np.array([[class2id[c_in]]]) _ps1, _ps2 = model.predict([_x, _c]) # 为输入样本生成输出预测。
def get_MCLwP_allcnn(input_shape, n_class, encode_shape, distil_prediction, distillation_coef, complexity, conv_regularizer, conv_constraint, bl_regularizer, bl_constraint): inputs = Input(input_shape, name='inputs') """ the trainable part """ linear_encode = get_linear_encoder(inputs, encode_shape, conv_regularizer, conv_constraint) nonlinear_decode_trainable = get_nonlinear_decoder( linear_encode, encode_shape, input_shape, None, complexity, conv_regularizer, conv_constraint, bl_regularizer, bl_constraint) hiddens_trainable = allcnn_module(nonlinear_decode_trainable, input_shape, None, conv_regularizer, conv_constraint) hiddens_trainable = Conv2D( n_class, (1, 1), strides=(1, 1), padding='same', kernel_regularizer=regularizers.l2(conv_regularizer) if conv_regularizer is not None else None, kernel_constraint=constraints.max_norm(conv_constraint, axis=[0, 1, 2]) if conv_constraint is not None else None, name='allcnn_conv3_3')(hiddens_trainable) hiddens_trainable = BN(name='allcnn_bn3_3')(hiddens_trainable) hiddens_trainable = Activation('relu')(hiddens_trainable) hiddens_trainable = GlobalAveragePooling2D()(hiddens_trainable) outputs_trainable = Activation('softmax', name='allcnn_prediction')(hiddens_trainable) """ fix part """ nonlinear_encode = get_nonlinear_encoder(inputs, input_shape, encode_shape, 'fix_', complexity, None, None, None, None) nonlinear_decode_fix = get_nonlinear_decoder(nonlinear_encode, encode_shape, input_shape, 'fix_', complexity, None, None, None, None) hiddens_fix = allcnn_module(nonlinear_decode_fix, input_shape, 'fix_', None, None) hiddens_fix = Conv2D( n_class, (1, 1), strides=(1, 1), padding='same', kernel_regularizer=regularizers.l2(conv_regularizer) if conv_regularizer is not None else None, kernel_constraint=constraints.max_norm(conv_constraint, axis=[0, 1, 2]) if conv_constraint is not None else None, name='fix_allcnn_conv3_3')(hiddens_fix) hiddens_fix = BN(name='fix_allcnn_bn3_3')(hiddens_fix) hiddens_fix = Activation('relu')(hiddens_fix) hiddens_fix = GlobalAveragePooling2D()(hiddens_fix) outputs_fix = Activation('softmax', name='fix_allcnn_prediction')(hiddens_fix) model = Model(inputs=inputs, outputs=[outputs_trainable, outputs_fix]) #model = Model(inputs=inputs, outputs=outputs_trainable) if distil_prediction: distillation_loss = K.mean(keras_kl( outputs_trainable, outputs_fix)) + K.mean( keras_kl(outputs_fix, outputs_trainable)) model.add_loss(distillation_coef * distillation_loss) return model