def train(self, X): def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] epsilon = K.random_normal(shape=(batch, dim), seed=0) return z_mean + K.exp(0.5 * z_log_var) * epsilon input = [] dims = [] denses = [] encoding_dim = self.n_components output = [] for i in range(self.n): input.append(Input(shape=(self.shape[i], ))) dims.append(int(encoding_dim * 1 / self.n)) for i in range(self.n): denses.append(Dense(dims[i])(input[i])) if self.n > 1: merged_dense = concatenate(denses, axis=-1) else: merged_dense = denses[0] encoded = Dense(encoding_dim)(merged_dense) encoded = BatchNormalization()(encoded) encoded = Activation('relu')(encoded) z_mean = Dense(encoding_dim)(encoded) z_log_var = Dense(encoding_dim)(encoded) z = Lambda(sampling, output_shape=(encoding_dim, ), name='z')([z_mean, z_log_var]) model = Dense(self.n_components)(z) model = BatchNormalization()(model) model = Activation('relu')(model) for i in range(self.n): output.append(Dense(self.shape[i])(model)) vae = Model(input, output) encoder = Model(input, z) kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 / np.sum(dims) k_mse_loss = 0 for i in range(self.n): k_mse_loss += mse(input[i], output[i]) / self.n vae.add_loss(k_mse_loss) vae.add_metric(k_mse_loss, name='mse_loss') vae.add_loss(kl_loss) vae.add_metric(kl_loss, name='kl_loss') vae.compile(optimizer=Adam()) print(vae.summary()) h = vae.fit(X, epochs=self.epochs, verbose=2) log_file = "./mvae.log" fp = open(log_file, 'w') for hi in h.history['mse_loss']: fp.write("%f\n" % (hi)) fp.close() return
def fastbert(teacher, classifier, speed=speed): inputs = teacher.inputs # frozen layers for layer in teacher.model.layers: layer.trainable = False classifier.trainable = False x_pre = teacher.apply_embeddings(inputs) emb_name = 'FastBert-embedding' clf_pre = teacher.apply(x_pre, FastbertClassifierLayer, name=emb_name, labels_num=num_classes) student_outputs = [clf_pre] outputs = [clf_pre, x_pre] for idx in range(teacher.num_hidden_layers): clf_pre, x_pre = outputs name = 'FastBert-%d' % idx x_next = teacher.apply_attention_layers(x_pre, idx) clf_next = teacher.apply(x_pre, FastbertClassifierLayer, name=name, labels_num=num_classes) student_outputs.append(clf_next) x = SwitchTwo(speed)([clf_pre, x_pre, x_next]) clf = SwitchTwo(speed)([clf_pre, clf_pre, clf_next]) outputs = [clf, x] clf_prob, x = outputs x = classifier(x) output = SwitchTwo(speed)([clf_prob, clf_prob, x]) model_infer = Model(inputs, output) label_inputs = Input(shape=(None, )) model_train = Model(inputs + [label_inputs], student_outputs) for i, prob in enumerate(student_outputs): ce_loss = K.sparse_categorical_crossentropy(label_inputs, prob) kl_loss = kullback_leibler_divergence(x, prob) model_train.add_loss(ce_loss) model_train.add_metric(ce_loss, name='ce_loss-%d' % i) model_train.add_loss(kl_loss) model_train.add_metric(kl_loss, name='loss-%d' % i) model_1 = Model(inputs, student_outputs[1]) model_2 = Model(inputs, student_outputs[2]) return model_train, model_infer, model_1, model_2
class GAHs_trans: def __init__(self, i_tokens, o_tokens, len_limit, d_model=256, \ d_inner_hid=512, n_head=4, layers=2, dropout=0.1, \ share_word_emb=False): self.i_tokens = i_tokens self.o_tokens = o_tokens self.len_limit = len_limit self.d_model = d_model self.decode_model = None self.readout_model = None self.layers = layers d_emb = d_model self.src_loc_info = True d_k = d_v = d_model // n_head assert d_k * n_head == d_model and d_v == d_k self.pos_emb = PosEncodingLayer(len_limit, d_emb) if self.src_loc_info else None self.emb_dropout = Dropout(dropout) self.i_word_emb = Embedding(i_tokens.num(), d_emb) if share_word_emb: assert i_tokens.num() == o_tokens.num() self.o_word_emb = i_word_emb else: self.o_word_emb = Embedding(o_tokens.num(), d_emb) self.encoder = MultiLayerEncoder(d_model, d_inner_hid, n_head, layers, dropout) self.decoder = Decoder(d_model, d_inner_hid, n_head, layers, dropout) self.target_layer = TimeDistributed(Dense(o_tokens.num(), use_bias=False)) def compile(self, optimizer='adam', active_layers=999, opt=None): src_seq_input = Input(shape=(None,), dtype='int32') tgt_seq_input = Input(shape=(None,), dtype='int32') # customized masks masks = [Input(shape=(self.len_limit,self.len_limit),dtype='float32') for i in range(len(opt.all_roles))] mask_comb = [] for i in opt.sample_i: mask_comb.append(masks[i]) src_seq = src_seq_input tgt_seq = Lambda(lambda x:x[:,:-1])(tgt_seq_input) tgt_true = Lambda(lambda x:x[:,1:])(tgt_seq_input) src_emb = self.i_word_emb(src_seq) tgt_emb = self.o_word_emb(tgt_seq) if self.pos_emb: src_emb = add_layer([src_emb, self.pos_emb(src_seq)]) tgt_emb = add_layer([tgt_emb, self.pos_emb(tgt_seq)]) src_emb = self.emb_dropout(src_emb) # customized masks added enc_output = self.encoder(src_emb, src_seq, active_layers=active_layers, masks = mask_comb) dec_output = self.decoder(tgt_emb, tgt_seq, src_seq, enc_output, active_layers=active_layers) final_output = self.target_layer(dec_output) def get_loss(y_pred, y_true): y_true = tf.cast(y_true, 'int32') # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) loss = tf.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) mask = tf.cast(tf.not_equal(y_true, 0), 'float32') loss = tf.reduce_sum(loss * mask, -1) / tf.reduce_sum(mask, -1) loss = K.mean(loss) return loss def get_accu(y_pred, y_true): mask = tf.cast(tf.not_equal(y_true, 0), 'float32') # [1,1,1,0,0,0] corr = K.cast(K.equal(K.cast(y_true, 'int32'), K.cast(K.argmax(y_pred, axis=-1), 'int32')), 'float32') corr = K.sum(corr * mask, -1) / K.sum(mask, -1) return K.mean(corr) # def bleu(target, pred): # mask = tf.cast(tf.not_equal(target, 0), 'float32') # [1,1,1,0,0,0] # predicts = K.eval(K.cast(K.argmax(pred, axis=-1), 'int32')) # reference = K.eval(K.cast(target, 'int32')) # reference = [[x] for x in reference] # score = nltk.translate.bleu_score.corpus_bleu(reference, predicts) # #score = nltk.translate.bleu_score.corpus_bleu(target, pred, smoothing_function=smoothing.method4) # return score loss = get_loss(final_output, tgt_true) self.ppl = K.exp(loss) self.accu = get_accu(final_output, tgt_true) # self.bleu = bleu(tgt_true, final_output) # calculate BLEU score # print('BLEU-1: %f' % corpus_bleu(tgt_true, final_output, weights=(1.0, 0, 0, 0))) # print('BLEU-2: %f' % corpus_bleu(tgt_true, final_output, weights=(0.5, 0.5, 0, 0))) # print('BLEU-3: %f' % corpus_bleu(tgt_true, final_output, weights=(0.3, 0.3, 0.3, 0))) # print('BLEU-4: %f' % corpus_bleu(tgt_true, final_output, weights=(0.25, 0.25, 0.25, 0.25))) self.model = Model([src_seq_input, tgt_seq_input]+masks, final_output) self.model.add_loss([loss]) # self.model.metrics_tensors = [] # added by me self.model.compile(optimizer, None) self.model.metrics_names.append('ppl') # self.model.metrics_tensors.append(self.ppl) self.model.add_metric(self.ppl, 'ppl') self.model.metrics_names.append('accu') self.model.add_metric(self.accu,'accu') # self.model.add_metric(self.bleu,'bleu') # self.model.metrics_tensors.append(self.accu) def make_src_seq_matrix(self, input_seqs): if type(input_seqs[0]) == type(''): input_seqs = [input_seqs] maxlen = max(map(len, input_seqs)) src_seq = np.zeros((len(input_seqs), maxlen+3), dtype='int32') src_seq[:,0] = self.i_tokens.startid() for i, seq in enumerate(input_seqs): for ii, z in enumerate(seq): src_seq[i,1+ii] = self.i_tokens.id(z) src_seq[i,1+len(seq)] = self.i_tokens.endid() return src_seq def make_readout_decode_model(self, max_output_len=32): src_seq_input = Input(shape=(None,), dtype='int32') tgt_start_input = Input(shape=(1,), dtype='int32') src_seq = src_seq_input enc_mask = Lambda(lambda x:K.cast(K.greater(x, 0), 'float32'))(src_seq) src_emb = self.i_word_emb(src_seq) if self.pos_emb: src_emb = add_layer([src_emb, self.pos_emb(src_seq)]) src_emb = self.emb_dropout(src_emb) enc_output = self.encoder(src_emb, src_seq) tgt_emb = self.o_word_emb(tgt_start_input) tgt_seq = Lambda(lambda x:K.repeat_elements(x, max_output_len, 1))(tgt_start_input) rep_input = Lambda(lambda x:K.repeat_elements(x, max_output_len, 1))(tgt_emb) cell = ReadoutDecoderCell(self.o_word_emb, self.pos_emb, self.decoder, self.target_layer) final_output = InferRNN(cell, return_sequences=True)(rep_input, initial_state=[tgt_start_input, K.ones_like(tgt_start_input), K.zeros_like(tgt_seq)] + \ [rep_input for _ in self.decoder.layers], constants=[enc_output, enc_mask]) final_output = Lambda(lambda x:K.squeeze(x, -1))(final_output) self.readout_model = Model([src_seq_input, tgt_start_input], final_output) def decode_sequence_readout_x(self, X, batch_size=32, max_output_len=64): if self.readout_model is None: self.make_readout_decode_model(max_output_len) target_seq = np.zeros((X.shape[0], 1), dtype='int32') target_seq[:,0] = self.o_tokens.startid() ret = self.readout_model.predict([X, target_seq], batch_size=batch_size, verbose=1) return ret def generate_sentence(self, rets, delimiter=''): sents = [] for x in rets: end_pos = min([i for i, z in enumerate(x) if z == self.o_tokens.endid()]+[len(x)]) rsent = [*map(self.o_tokens.token, x)][:end_pos] sents.append(delimiter.join(rsent)) return sents def decode_sequence_readout(self, input_seqs, delimiter=''): if self.readout_model is None: self.make_readout_decode_model() src_seq = self.make_src_seq_matrix(input_seqs) target_seq = np.zeros((src_seq.shape[0],1), dtype='int32') target_seq[:,0] = self.o_tokens.startid() rets = self.readout_model.predict([src_seq, target_seq]) rets = self.generate_sentence(rets, delimiter) if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0] return rets def make_fast_decode_model(self): src_seq_input = Input(shape=(None,), dtype='int32') src_emb = self.i_word_emb(src_seq_input) if self.pos_emb: src_emb = add_layer([src_emb, self.pos_emb(src_seq_input)]) src_emb = self.emb_dropout(src_emb) enc_output = self.encoder(src_emb, src_seq_input) self.encode_model = Model(src_seq_input, enc_output) self.decoder_pre_step = DecoderPerStep(self.decoder) src_seq_input = Input(shape=(None,), dtype='int32') tgt_one_input = Input(shape=(1,), dtype='int32') enc_ret_input = Input(shape=(None, self.d_model)) dec_ret_inputs = [Input(shape=(None, self.d_model)) for _ in self.decoder.layers] tgt_pos = Lambda(lambda x:tf.shape(x)[1])(dec_ret_inputs[0]) tgt_one = self.o_word_emb(tgt_one_input) if self.pos_emb: tgt_one = add_layer([tgt_one, self.pos_emb(tgt_pos, pos_input=True)]) dec_outputs = self.decoder_pre_step([tgt_one, src_seq_input, enc_ret_input]+dec_ret_inputs) final_output = self.target_layer(dec_outputs[-1]) self.decode_model = Model([tgt_one_input, src_seq_input, enc_ret_input]+dec_ret_inputs, dec_outputs[:-1]+[final_output]) def decode_sequence_fast(self, input_seqs, batch_size=32, delimiter='', verbose=0): if self.decode_model is None: self.make_fast_decode_model() src_seq = self.make_src_seq_matrix(input_seqs) start_mark, end_mark = self.o_tokens.startid(), self.o_tokens.endid() max_len = self.len_limit encode_model = self.encode_model decode_model = self.decode_model decode_batch = lambda x: decode_batch_greedy(x, encode_model, decode_model, start_mark, end_mark, max_len) rets = [] rng = range(0, src_seq.shape[0], batch_size) if verbose and src_seq.shape[0] > batch_size: rng = tqdm(rng, total=len(rng)) for iter in rng: rets.extend( decode_batch(src_seq[iter:iter+batch_size]) ) rets = [delimiter.join(list(map(self.o_tokens.token, ret))) for ret in rets] if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0] return rets def beam_search(self, input_seqs, topk=5, batch_size=8, length_penalty=1, delimiter='', verbose=0): if self.decode_model is None: self.make_fast_decode_model() src_seq = self.make_src_seq_matrix(input_seqs) start_mark, end_mark = self.o_tokens.startid(), self.o_tokens.endid() max_len = self.len_limit encode_model = self.encode_model decode_model = self.decode_model decode_batch = lambda x: decode_batch_beam_search(x, topk, encode_model, decode_model, start_mark, end_mark, max_len) rets = {} rng = range(0, src_seq.shape[0], batch_size) if verbose and src_seq.shape[0] > batch_size: rng = tqdm(rng, total=len(rng)) for iter in rng: for i, x, y in decode_batch(src_seq[iter:iter+batch_size]): rets.setdefault(iter+i, []).append( (x, y/np.power(len(x)+1, length_penalty)) ) rets = {x:sorted(ys,key=lambda x:x[-1], reverse=True) for x,ys in rets.items()} rets = [rets[i] for i in range(len(rets))] rets = [[(delimiter.join(list(map(self.o_tokens.token, x))), y) for x, y in r] for r in rets] if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0] return rets
d_loss = K.mean(x_real_score - x_fake_score) real_grad = K.gradients(x_real_score, [x_real])[0] fake_grad = K.gradients(x_fake_score, [x_fake])[0] real_grad_norm = K.sum(real_grad**2, axis=[1, 2, 3])**(p / 2) fake_grad_norm = K.sum(fake_grad**2, axis=[1, 2, 3])**(p / 2) grad_loss = K.mean(real_grad_norm + fake_grad_norm) * k / 2 w_dist = K.mean(x_fake_score - x_real_score) d_train_model.add_loss(d_loss + grad_loss) d_train_model.compile(optimizer=Adam(2e-4, 0.5)) #d_train_model.metrics_names.append('w_dist') #d_train_model.metrics_tensors.append(w_dist) d_train_model.add_metric(w_dist, 'w_dist') # 自定义的 metrics # 整合模型(训练生成器) g_model.trainable = True d_model.trainable = False x_fake = g_model(z_in) x_fake_score = d_model(x_fake) g_train_model = Model(z_in, x_fake_score) g_loss = K.mean(x_fake_score) g_train_model.add_loss(g_loss) g_train_model.compile(optimizer=Adam(2e-4, 0.5)) # 检查模型结构
def Vae_MNIST_NN1(input_tensor=None, train=False): np.random.seed(0) # MNIST dataset image_size = 28 if train: (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (image_size, image_size, 1) input_tensor = Input(shape=input_shape) batch_size = 128 epochs = 50 elif input_tensor is None: print('you have to proved input_tensor when testing') exit() latent_dim = 200 intermediate_dims = np.array([400]) # VAE model = encoder + decoder # build encoder model original_dim = image_size * image_size inputs = Reshape((original_dim,), name='encoder_input')(input_tensor) x = Dense(intermediate_dims[0], activation='relu')(inputs) for i in range(intermediate_dims.shape[0]): if i != 0: x = Dense(intermediate_dims[i], activation='relu')(x) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(input_tensor, [z_mean, z_log_var, z], name='encoder') #encoder.summary() # build decoder model intermediate_dims = np.flipud(intermediate_dims) latent_inputs = Input(shape=(latent_dim,), name='z_sampling') x = Dense(intermediate_dims[0], activation='relu')(latent_inputs) for i in range(intermediate_dims.shape[0]): if i != 0: x = Dense(intermediate_dims[i], activation='relu')(x) pos_mean = Dense(original_dim, name='pos_mean')(x) pos_log_var = Dense(original_dim, name='pos_log_var')(x) # instantiate decoder model decoder = Model(latent_inputs, [pos_mean, pos_log_var], name='decoder') #decoder.summary() # instantiate VAE model outputs = decoder(encoder(input_tensor)[2]) vae = Model(input_tensor, outputs, name='vae_mlp') #vae.summary() if train: # VAE loss = reconstruction_loss + kl_loss loss_a = float(np.log(2 * np.pi)) + outputs[1] loss_m = K.square(outputs[0] - inputs) / K.exp(outputs[1]) reconstruction_loss = -0.5 * K.sum((loss_a + loss_m), axis=-1) kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(-reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer="adam") vae.summary() vae.add_metric(reconstruction_loss, "reconstruct") vae.add_metric(kl_loss, "kl") vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) # save model vae.save_weights('./vae_mnist_nn1.h5') else: vae.load_weights('./vae_mnist_nn1.h5') return vae
x = K.l2_normalize(x, 1) y = K.l2_normalize(y, 1) return K.sum(x * y, 1, keepdims=True) t1_loss = z_real_mean - z_fake_ng_mean t2_loss = z_fake_mean - z_fake_ng_mean z_corr = correlation(z_in, z_fake) qp_loss = 0.25 * t1_loss[:, 0]**2 / K.mean( (x_real - x_fake_ng)**2, axis=[1, 2, 3]) train_model.add_loss(K.mean(t1_loss + t2_loss - 1. * z_corr) + K.mean(qp_loss)) train_model.compile(optimizer=RMSprop(1e-4, 0.99)) #train_model.metrics_names.append('t_loss') #train_model.metrics_tensors.append(K.mean(t1_loss)) train_model.add_metric(K.mean(t1_loss), 't_loss') #train_model.metrics_names.append('z_corr') #train_model.metrics_tensors.append(K.mean(z_corr)) train_model.add_metric(K.mean(z_corr), 'z_loss') # 检查模型结构 train_model.summary() class ExponentialMovingAverage: """对模型权重进行指数滑动平均。 用法:在model.compile之后、第一次训练之前使用; 先初始化对象,然后执行inject方法。 """ def __init__(self, model, momentum=0.9999): self.momentum = momentum
def get_model(num_users, num_items, layers=None, reg_layers=None, fake_layers=None, fake_reg_layers=None, last_activation='sigmoid', fake_last_activation='sigmoid'): if reg_layers is None: reg_layers = [0, 0] if layers is None: layers = [20, 10] if fake_reg_layers is None: fake_reg_layers = [0, 0] if fake_layers is None: fake_layers = [20, 10] assert len(layers) == len(reg_layers) assert len(fake_layers) == len(fake_reg_layers) num_layer = len(layers) # Number of layers in the MLP fake_num_layer = len(layers) # Input variables fake_user_input = Input(shape=(1, ), dtype='int32', name='fake_user_input') user_input = Input(shape=(1, ), dtype='int32', name='user_input') item_input = Input(shape=(1, ), dtype='int32', name='item_input') rating_output = Input(shape=(1, ), dtype='float32', name='rating_output') MLP_Embedding_Fake_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, name='fake_user_embedding', embeddings_initializer='random_normal', embeddings_regularizer=l2( reg_layers[0]), input_length=1) MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, name='user_embedding', embeddings_initializer='random_normal', embeddings_regularizer=l2(reg_layers[0]), input_length=1) MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, name='item_embedding', embeddings_initializer='random_normal', embeddings_regularizer=l2(reg_layers[0]), input_length=1) # Crucial to flatten an embedding vector! fake_user_latent = Flatten()(MLP_Embedding_Fake_User(fake_user_input)) user_latent = Flatten()(MLP_Embedding_User(user_input)) item_latent = Flatten()(MLP_Embedding_Item(item_input)) # The 0-th layer is the concatenation of embedding layers # vector = merge([user_latent, item_latent], mode = 'concat') vector = merge.concatenate([user_latent, item_latent]) # MLP layers for idx in range(1, num_layer): layer = Dense(layers[idx], kernel_regularizer=l2(reg_layers[idx]), activation='relu', name='layer%d' % idx) vector = layer(vector) # Final prediction layer prediction = Dense(1, activation=last_activation, kernel_initializer='lecun_uniform', name='prediction')(vector) fake_vector = merge.concatenate([fake_user_latent, item_latent]) for idx in range(1, fake_num_layer): layer = Dense(fake_layers[idx], kernel_regularizer=l2(fake_reg_layers[idx]), activation='relu', name='fake_layer%d' % idx) fake_vector = layer(fake_vector) fake_prediction = Dense(1, activation=fake_last_activation, kernel_initializer='lecun_uniform', name='fake_prediction')(fake_vector) model = Model( inputs=[fake_user_input, user_input, item_input, rating_output], outputs=prediction) loss = K.mean( K.square(prediction - fake_prediction) + K.square(rating_output - prediction)) model.add_loss(loss) model.add_metric(loss, name='loss') model.add_metric(K.mean(K.abs(prediction - rating_output)), name='mae') model.add_metric(K.sqrt(K.mean(K.square(prediction - rating_output))), name='rmse') return model