classifier = Dense(len(chars) + 4) yl = Dense(char_size)(yl) yl = LeakyReLU(0.2)(yl) yl = classifier(yl) yl = Lambda(lambda x: (x[0] + x[1]) / 2)([yl, x_prior]) # 与先验结果平均 yl = Activation('softmax')(yl) yr = Dense(char_size)(yr) yr = LeakyReLU(0.2)(yr) yr = classifier(yr) yr = Lambda(lambda x: (x[0] + x[1]) / 2)([yr, x_prior]) # 与先验结果平均 yr = Activation('softmax')(yr) # 交叉熵作为loss,但mask掉padding部分 cross_entropy_1 = K.sparse_categorical_crossentropy(yl_in[:, 1:], yl[:, :-1]) cross_entropy_1 = K.sum(cross_entropy_1 * y_mask[:, 1:, 0]) / K.sum( y_mask[:, 1:, 0]) cross_entropy_2 = K.sparse_categorical_crossentropy(yr_in[:, 1:], yr[:, :-1]) cross_entropy_2 = K.sum(cross_entropy_2 * y_mask[:, 1:, 0]) / K.sum( y_mask[:, 1:, 0]) cross_entropy = (cross_entropy_1 + cross_entropy_2) / 2 model = Model([x_in, yl_in, yr_in], [yl, yr]) model.add_loss(cross_entropy) model.compile(optimizer=Adam(1e-3)) def gen_sent(s, topk=3, maxlen=64): """双向beam search解码 每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
def perplexity(y_true, y_pred): cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) perplexity = K.pow(2.0, cross_entropy) return perplexity
''' x = Dense(units=128, activation='relu')(x) x = Dropout(0.1)(x) ans_start = Dense(1, activation='sigmoid')(x) ans_end = Dense(1, activation='sigmoid')(x) passage_mask = passage_mask_in ''' #model = Model([x1_in, x2_in, y_in,ans_start_pos_in,ans_end_pos_in, passage_mask_in], [p, ans_start, ans_end]) model = Model([x1_in, x2_in, y_in, test_in], [p]) loss_p = K.binary_crossentropy(y_in, p) loss_p = K.mean(loss_p) test_loss = K.sparse_categorical_crossentropy(test_in, p_test) test_loss = K.mean(test_loss) ''' p_ans_start_loss = K.sparse_categorical_crossentropy(ans_start_pos_in, ans_start) p_ans_start_loss = K.sum(p_ans_start_loss * passage_mask) / K.sum(passage_mask) p_ans_end_loss = K.sparse_categorical_crossentropy(ans_end_pos_in, ans_end) p_ans_end_loss = K.sum(p_ans_end_loss * passage_mask) / K.sum(passage_mask) loss = loss_p + p_ans_start_loss + p_ans_end_loss ''' loss = loss_p + test_loss model.add_loss(loss) model.compile( optimizer=Adam(1e-6), # 用足够小的学习率
def my_sparse_categorical_crossentropy(y_true, y_pred): return K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False)
def __init__( self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities'): super(KerasModel, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) from keras import backend as K import keras from pkg_resources import parse_version assert parse_version(keras.__version__) >= parse_version('2.0.7'), 'Keras version needs to be 2.0.7 or newer' # noqa: E501 if predicts == 'probs': predicts = 'probabilities' assert predicts in ['probabilities', 'logits'] images_input = model.input label_input = K.placeholder(shape=(1,)) predictions = model.output shape = K.int_shape(predictions) _, num_classes = shape assert num_classes is not None self._num_classes = num_classes if predicts == 'probabilities': loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=False) # transform the probability predictions into logits, so that # the rest of this code can assume predictions to be logits predictions = self._to_logits(predictions) elif predicts == 'logits': loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=True) # sparse_categorical_crossentropy returns 1-dim tensor, # gradients wants 0-dim tensor (for some backends) loss = K.squeeze(loss, axis=0) grads = K.gradients(loss, images_input) if K.backend() == 'tensorflow': # tensorflow backend returns a list with the gradient # as the only element, even if loss is a single scalar # tensor; # theano always returns the gradient itself (and requires # that loss is a single scalar tensor) assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] elif K.backend() == 'cntk': # pragma: no cover assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] grad = K.reshape(grad, (1,) + grad.shape) else: assert not isinstance(grads, list) grad = grads self._loss_fn = K.function( [images_input, label_input], [loss]) self._batch_pred_fn = K.function( [images_input], [predictions]) self._pred_grad_fn = K.function( [images_input, label_input], [predictions, grad])
def __init__(self, clip_values, model, use_logits=False, channel_index=3, defences=None): """ Create a `Classifier` instance from a Keras model. Assumes the `model` passed as argument is compiled. :param clip_values: Tuple of the form `(min, max)` representing the minimum and maximum values allowed for features. :type clip_values: `tuple` :param model: Keras model :type model: `keras.models.Sequential` :param use_logits: True if the output of the model are the logits. :type use_logits: `bool` :param channel_index: Index of the axis in data containing the color channels or features. :type channel_index: `int` :param defences: Defences to be activated with the classifier. :type defences: `str` or `list(str)` """ import keras.backend as k # TODO Generalize loss function? super(KerasClassifier, self).__init__(clip_values, channel_index, defences) self._model = model self._input = model.input self._output = model.output _, self._nb_classes = k.int_shape(model.output) self._input_shape = k.int_shape(model.input)[1:] # Get predictions and loss function label_ph = k.placeholder(shape=(None, )) if not use_logits: if k.backend() == 'tensorflow': preds, = self._output.op.inputs loss = k.sparse_categorical_crossentropy(label_ph, preds, from_logits=True) else: loss = k.sparse_categorical_crossentropy( label_ph, self._output, from_logits=use_logits) # Convert predictions to logits for consistency with the other cases eps = 10e-8 preds = k.log(k.clip(self._output, eps, 1. - eps)) else: preds = self._output loss = k.sparse_categorical_crossentropy(label_ph, self._output, from_logits=use_logits) loss_grads = k.gradients(loss, self._input) if k.backend() == 'tensorflow': loss_grads = loss_grads[0] elif k.backend() == 'cntk': raise NotImplementedError( 'Only TensorFlow and Theano support is provided for Keras.') # Set loss, grads and prediction functions self._preds_op = preds self._loss = k.function([self._input], [loss]) self._loss_grads = k.function([self._input, label_ph], [loss_grads]) self._preds = k.function([self._input], [preds])
def __init__(self, model, bounds, aux_lp, channel_axis=3, preprocessing=(0, 1), predicts='probabilities'): super(TwoInputKerasModel, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) from keras import backend as K import keras from pkg_resources import parse_version assert parse_version(keras.__version__) >= parse_version( '2.0.7'), 'Keras version needs to be 2.0.7 or newer' # noqa: E501 if predicts == 'probs': predicts = 'probabilities' assert predicts in ['probabilities', 'logits'] images_input = model.input # image is the first input label_input = K.placeholder(shape=(1, )) predictions = model.output shape = K.int_shape(predictions) _, num_classes = shape assert num_classes is not None self._num_classes = num_classes self.aux_lp = aux_lp aux_learning_phase = K.learning_phase() if predicts == 'probabilities': if K.backend() == 'tensorflow': # predictions = predictions.op.inputs[0] loss = K.sparse_categorical_crossentropy(label_input, predictions, from_logits=False) predictions = self._to_logits(predictions) else: logging.warning('relying on numerically unstable conversion' ' from probabilities to softmax') loss = K.sparse_categorical_crossentropy(label_input, predictions, from_logits=False) # transform the probability predictions into logits, so that # the rest of this code can assume predictions to be logits predictions = self._to_logits(predictions) elif predicts == 'logits': loss = K.sparse_categorical_crossentropy(label_input, predictions, from_logits=True) # sparse_categorical_crossentropy returns 1-dim tensor, # gradients wants 0-dim tensor (for some backends) loss = K.squeeze(loss, axis=0) grads = K.gradients(loss, images_input) grad_loss_output = K.placeholder(shape=(num_classes, 1)) external_loss = K.dot(predictions, grad_loss_output) # remove batch dimension of predictions external_loss = K.squeeze(external_loss, axis=0) # remove singleton dimension of grad_loss_output external_loss = K.squeeze(external_loss, axis=0) grads_loss_input = K.gradients(external_loss, images_input) if K.backend() == 'tensorflow': # tensorflow backend returns a list with the gradient # as the only element, even if loss is a single scalar tensor; # theano always returns the gradient itself (and requires # that loss is a single scalar tensor) assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] assert isinstance(grads_loss_input, list) assert len(grads_loss_input) == 1 grad_loss_input = grads_loss_input[0] elif K.backend() == 'cntk': # pragma: no cover assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] grad = K.reshape(grad, (1, ) + grad.shape) assert isinstance(grads_loss_input, list) assert len(grads_loss_input) == 1 grad_loss_input = grads_loss_input[0] grad_loss_input = K.reshape(grad_loss_input, (1, ) + grad_loss_input.shape) # noqa: E501 else: assert not isinstance(grads, list) grad = grads grad_loss_input = grads_loss_input self._loss_fn = K.function( [images_input, label_input, aux_learning_phase], [loss]) self._batch_pred_fn = K.function([images_input, aux_learning_phase], [predictions]) self._pred_grad_fn = K.function( [images_input, aux_learning_phase, label_input], [predictions, grad]) self._bw_grad_fn = K.function( [grad_loss_output, images_input, aux_learning_phase], [grad_loss_input])
def my_loss(arg): action_pred, action_true, discount_episode_reward = arg action_true = K.cast(action_true, dtype=tf.int32) loss = K.sparse_categorical_crossentropy(action_true, action_pred) loss = loss * K.flatten(discount_episode_reward) return loss
def masked_loss(y_true, y_pred): y_mask = K.cast(K.any(y_true, axis=-1), "float32") loss = K.switch(y_mask, K.sparse_categorical_crossentropy(y_true, y_pred), K.zeros_like(y_mask, dtype=K.floatx())) return K.sum(loss) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
def sparse_masked_mlm_loss(y_true, y_pred): mask = K.cast(K.any(y_true, axis=-1), "float32") cce = K.sparse_categorical_crossentropy(y_true, y_pred) masked_cce = mask * cce return K.sum(masked_cce) / (K.sum(mask) + K.epsilon())
def sparse_categorical_crossentropy(y_true, y_pred): return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
def sparse_categorical_crossentropy(y_true, y_pred): '''expects an array of integer classes. Note: labels shape must have the same number of dimensions as output shape. If you get a shape error, add a length-1 dimension to labels. ''' return K.sparse_categorical_crossentropy(y_pred, y_true)
def sparse_logits_categorical_crossentropy(y_true, y_pred, scale=30): return K.sparse_categorical_crossentropy(y_true, scale * y_pred, from_logits=True)
def SparseEncrop(y_ture, y_pred): loss = K.sparse_categorical_crossentropy(y_ture, y_pred, from_logits=True) return K.mean(loss)
def build_model_from_config(config_file, checkpoint_file, training=False, trainable=False, seq_len=None, ): """Build the model from config file. :param config_file: The path to the JSON configuration file. :param training: If training, the whole model will be returned. :param trainable: Whether the model is trainable. :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in position embeddings will be sliced to fit the new length. :return: model and config """ with open(config_file, 'r') as reader: config = json.loads(reader.read()) if seq_len is not None: config['max_position_embeddings'] = min(seq_len, config['max_position_embeddings']) if trainable is None: trainable = training model = get_model( token_num=config['vocab_size'], pos_num=config['max_position_embeddings'], seq_len=config['max_position_embeddings'], embed_dim=config['hidden_size'], transformer_num=config['num_hidden_layers'], head_num=config['num_attention_heads'], feed_forward_dim=config['intermediate_size'], training=False, trainable=True, ) inputs, outputs = model bio_label = Input(shape=(maxlen,)) event = Input(shape=(1,)) mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(inputs[0]) event_embedding = Embedding(len(event2id),config['hidden_size'],mask_zero=True)(event) event_bc = Lambda(lambda input: input[0] * 0 + input[1])([outputs, event_embedding]) outputs = Add()([outputs,event_bc]) outputs = Dropout(0.15)(outputs) attention = TimeDistributed(Dense(1, activation='tanh'))(outputs) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(config['hidden_size'])(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([outputs, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) t_dim = K.int_shape(outputs)[-1] bert_attention = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs,attention]) cnn1 = MaskedConv1D(filters=hidden_size, kernel_size=3, activation='relu', padding='same')(bert_attention) #BIOE bio_pred = Dense(4, activation='softmax')(cnn1) entity_model = keras.models.Model([inputs[0], inputs[1],event], [bio_pred]) # 预测subject的模型 train_model = keras.models.Model([inputs[0], inputs[1],bio_label,event],[bio_pred]) loss = K.sparse_categorical_crossentropy(bio_label, bio_pred) loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask) train_model.add_loss(loss) train_model.summary() train_model.compile( optimizer=keras.optimizers.Adam(lr=3e-5), ) load_model_weights_from_checkpoint(train_model, config, checkpoint_file, training) return train_model,entity_model
def build_model(): # 定义模型 n = 5 # 只抽取五言诗 latent_dim = 64 # 隐变量维度 hidden_dim = 64 # 隐层节点数 # 定义编码 input_sentence = Input(shape=(2 * n + 1, ), dtype='int32') # (None, 11) input_vec = Embedding(len(vocab2id), hidden_dim)(input_sentence) # (None, 11, 64) h = GCNN(residual=True)(input_vec) # (None, 11, 64) h = GCNN(residual=True)(h) # (None, 11, 64) h = GlobalAveragePooling1D()(h) # (None, 64) # 算均值和方差 z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) # 给均值和方差 让其去采样 z = Lambda(sampling)([z_mean, z_log_var]) # 定义解码 decoder_hidden = Dense(hidden_dim * (2 * n + 1)) decoder_cnn = GCNN(residual=True) decoder_dense = Dense(len(vocab2id), activation='softmax') h = decoder_hidden(z) h = Reshape((2 * n + 1, hidden_dim))(h) h = decoder_cnn(h) output = decoder_dense(h) # 建立模型 vae = Model(input_sentence, output) # 定义损失 重构损失+KL损失 xent_loss = K.sum( K.sparse_categorical_crossentropy(input_sentence, output), 1) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) # add_loss是新增的方法,用于更灵活地添加各种loss vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() # 重用解码层,构建单独的生成模型 decoder_input = Input(shape=(latent_dim, )) _ = decoder_hidden(decoder_input) _ = Reshape((2 * n + 1, hidden_dim))(_) _ = decoder_cnn(_) _output = decoder_dense(_) generator = Model(decoder_input, _output) # 利用生成模型随机生成一首诗 def gen(): latent_dim = 64 n = 5 r = generator.predict(np.random.randn(1, latent_dim))[0] r = r.argmax(axis=1) return ''.join([id2vocab[i] for i in r[:2 * n + 1]]) # 回调器,方便在训练过程中输出 class Evaluate(Callback): def __init__(self): super(Evaluate, self).__init__() self.log = [] def on_epoch_end(self, epoch, logs=None): self.log.append(gen()) print(u' %s' % (self.log[-1])).encode('utf-8') evaluator = Evaluate() vae.fit(x, shuffle=True, epochs=100, batch_size=64, callbacks=[evaluator]) vae.save_weights('shi.model') for i in range(20): print(gen())
def per_pixel_softmax_cross_entropy_loss(y_true, y_pred): return K.sum(K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True))
def __init__(self, clip_values, model, use_logits=False, channel_index=3, defences=None, preprocessing=(0, 1), input_layer=0, output_layer=0, custom_activation=False): """ Create a `Classifier` instance from a Keras model. Assumes the `model` passed as argument is compiled. :param clip_values: Tuple of the form `(min, max)` representing the minimum and maximum values allowed for features. :type clip_values: `tuple` :param model: Keras model :type model: `keras.models.Model` :param use_logits: True if the output of the model are the logits. :type use_logits: `bool` :param channel_index: Index of the axis in data containing the color channels or features. :type channel_index: `int` :param defences: Defences to be activated with the classifier. :type defences: `str` or `list(str)` :param preprocessing: Tuple of the form `(substractor, divider)` of floats or `np.ndarray` of values to be used for data preprocessing. The first value will be substracted from the input. The input will then be divided by the second one. :type preprocessing: `tuple` :param input_layer: Which layer to consider as the Input when the model has multple input layers. :type input_layer: `int` :param output_layer: Which layer to consider as the Output when the model has multiple output layers. :type output_layer: `int` :param custom_activation: True if the model uses the last activation other than softmax and requires to use the output probability rather than the logits by attacks. :type custom_activation: `bool` """ import keras.backend as k super(KerasClassifier, self).__init__(clip_values=clip_values, channel_index=channel_index, defences=defences, preprocessing=preprocessing) self._model = model if hasattr(model, 'inputs'): self._input = model.inputs[input_layer] else: self._input = model.input if hasattr(model, 'outputs'): self._output = model.outputs[output_layer] else: self._output = model.output _, self._nb_classes = k.int_shape(self._output) self._input_shape = k.int_shape(self._input)[1:] self._custom_activation = custom_activation logger.debug( 'Inferred %i classes and %s as input shape for Keras classifier.', self.nb_classes, str(self.input_shape)) # Get predictions and loss function label_ph = k.placeholder(shape=(None, )) if not use_logits: if k.backend() == 'tensorflow': if custom_activation: preds = self._output loss = k.sparse_categorical_crossentropy(label_ph, preds, from_logits=False) else: preds, = self._output.op.inputs loss = k.sparse_categorical_crossentropy(label_ph, preds, from_logits=True) else: loss = k.sparse_categorical_crossentropy( label_ph, self._output, from_logits=use_logits) # Convert predictions to logits for consistency with the other cases eps = 10e-8 preds = k.log(k.clip(self._output, eps, 1. - eps)) else: preds = self._output loss = k.sparse_categorical_crossentropy(label_ph, self._output, from_logits=use_logits) if preds == self._input: # recent Tensorflow version does not allow a model with an output same as the input. preds = k.identity(preds) loss_grads = k.gradients(loss, self._input) if k.backend() == 'tensorflow': loss_grads = loss_grads[0] elif k.backend() == 'cntk': raise NotImplementedError( 'Only TensorFlow and Theano support is provided for Keras.') # Set loss, grads and prediction functions self._preds_op = preds self._loss = k.function([self._input], [loss]) self._loss_grads = k.function([self._input, label_ph], [loss_grads]) self._preds = k.function([self._input], [preds]) # Get the internal layer self._layer_names = self._get_layers()
def my_loss(arg): action_pred, action_true, discount_episode_reward = arg action_true = K.cast(action_true, dtype=tf.int32) loss = K.sparse_categorical_crossentropy(action_true, action_pred) loss = loss * K.flatten(discount_episode_reward) return loss
y = CuDNNLSTM(z_dim, return_sequences=True)(y) y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) # attention交互 xy = Attention(8, 16)([y, x, x, x_mask]) xy = Concatenate()([y, xy]) # 输出分类 xy = Dense(char_size)(xy) xy = LeakyReLU(0.2)(xy) xy = Dense(len(chars) + 4)(xy) xy = Lambda(lambda x: (x[0] + x[1]) / 2)([xy, x_prior]) # 与先验结果平均 xy = Activation('softmax')(xy) # 交叉熵作为loss,但mask掉padding部分 cross_entropy = K.sparse_categorical_crossentropy(y_in[:, 1:], xy[:, :-1]) cross_entropy = K.sum(cross_entropy * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0]) model = Model([x_in, y_in], xy) model.add_loss(cross_entropy) model.compile(optimizer=Adam(1e-3)) def gen_sent(s, topk=3, maxlen=64): """beam search解码 每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索 """ xid = np.array([str2id(s)] * topk) # 输入转id yid = np.array([[2]] * topk) # 解码均以<start>开头,这里<start>的id为2 scores = [0] * topk # 候选答案分数
def softmax_loss1(y_true, y_pred): y_true_casted = K.cast(y_true, dtype='int32') y_true_cls = y_true_casted[:, 0] return K.sparse_categorical_crossentropy(y_true_cls, y_pred)
def call(self, y_true, y_pred): #return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1) #scce = tf.keras.losses.SparseCategoricalCrossentropy() #return scce(y_true, y_pred) log_ = K.mean(K.sparse_categorical_crossentropy(y_true, y_pred)) return K.sum(log_ * K.constant(class_weight))
def softmax_loss2(y_true, y_pred): return K.sparse_categorical_crossentropy(y_true[1], y_pred[1])
# train encoder-docoder of PretrainVAE enc_model.trainable = True dec_model.trainable = True dis_model.trainable = False enc_z, kl_loss = enc_model(enc_in) z_fake_score = dis_model(enc_z) dec_in = Input(shape=(max_len, )) dec_true = Input(shape=(max_len, )) dec_true_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(dec_true) dec_out = dec_model([dec_in, dec_true, enc_z]) xent_loss = K.sum( K.sparse_categorical_crossentropy(dec_true, dec_out) * dec_true_mask[:, :, 0]) / K.sum(dec_true_mask[:, :, 0]) d_loss = K.mean(-z_fake_score) all_loss = xent_loss + nambda * d_loss enc_dec_train_model = Model([enc_in, dec_in, dec_true], dec_out) enc_dec_train_model.add_loss(all_loss) enc_dec_train_model.compile(Adam(5e-4, 0.0)) enc_dec_train_model.metrics_names.append('ce_loss') enc_dec_train_model.metrics_tensors.append(xent_loss) enc_dec_train_model.metrics_names.append('kl_loss') enc_dec_train_model.metrics_tensors.append(kl_loss) dis_train_model.summary() enc_dec_train_model.summary()
#cls_info_dense = Dense(768, activation='relu')(cls_info) #cls_info_dense = Lambda(lambda x: K.expand_dims(x, 1))(cls_info_dense) x_answer_pos = add([x, q_cls]) ans_start = Dense(2, activation='softmax')(x_answer_pos) ans_end = Dense(2, activation='softmax')(x_answer_pos) passage_mask = passage_mask_in train_model = Model([x1_in, x2_in, h_in, y_in, ans_start_pos_in,ans_end_pos_in, passage_mask_in], [p, ans_start, ans_end]) model = Model([x1_in, x2_in, h_in, passage_mask_in], [p, ans_start, ans_end]) #model = Model([x1_in, x2_in, y_in], [p]) # train_model = Model([x1_in, x2_in, h_in, y_in, ans_start_pos_in,ans_end_pos_in, passage_mask_in], [p]) loss_p = K.sparse_categorical_crossentropy(y_in, p) loss_p = K.mean(loss_p) p_ans_start_loss = K.sparse_categorical_crossentropy(ans_start_pos_in, ans_start) p_ans_start_loss = K.sum(p_ans_start_loss * passage_mask) / K.sum(passage_mask) p_ans_end_loss = K.sparse_categorical_crossentropy(ans_end_pos_in, ans_end) p_ans_end_loss = K.sum(p_ans_end_loss * passage_mask) / K.sum(passage_mask) loss = loss_p + p_ans_start_loss + p_ans_end_loss # loss = loss_p train_model.add_loss(loss) train_model.compile( optimizer=Adam(3e-5), # 用足够小的学习率 metrics=['accuracy']
model = load_pretrained_model(config_path, checkpoint_path, seq2seq=True, keep_words=keep_words) model.summary() y_in = model.input[0][:, 1:] # 目标tokens y_mask = model.input[1][:, 1:] y = model.output[:, :-1] # 预测tokens,预测与目标错开一位 # 交叉熵作为loss,并mask掉输入部分的预测 y = model.output[:, :-1] # 预测tokens,预测与目标错开一位 cross_entropy = K.sparse_categorical_crossentropy(y_in, y) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) model.add_loss(cross_entropy) model.compile(optimizer=Adam(1e-5)) model.summary() def gen_sent(s, topk=2): """beam search解码 每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索 """ token_ids, segment_ids = tokenizer.encode(s[:max_input_len]) target_ids = [[] for _ in range(topk)] # 候选答案id target_scores = [0] * topk # 候选答案分数 for i in range(max_output_len): # 强制要求输出不超过max_output_len字
def train_reddit_lm(num_users=300, num_words=5000, num_epochs=30, maxlen=35, batch_size=20, exp_id=0, h=128, emb_h=256, lr=1e-3, drop_p=0.25, tied=False, nh=1, loo=None, sample_user=False, cross_domain=False, print_every=1000, rnn_fn='lstm', DP=False, l2_norm_clip=0.15, noise_multiplier=1.1): if cross_domain: loo = None sample_user = True user_comments, vocabs = load_wiki_by_users(num_users=num_users, num_words=num_words) else: user_comments, vocabs = read_top_user_comments(num_users, num_words, sample_user=sample_user) train_data = [] users = sorted(user_comments.keys()) for i, user in enumerate(users): if loo is not None and i == loo: print("Leaving {} out".format(i)) continue train_data += user_comments[user] train_data = words_to_indices(train_data, vocabs) train_data = flatten_data(train_data) if cross_domain: test_data = load_wiki_test_data() else: test_data = read_test_comments() process_test_data(test_data, vocabs) test_data = words_to_indices(test_data, vocabs) test_data = flatten_data(test_data) n_data = (len(train_data) - 1) // maxlen X_train = train_data[:-1][:n_data * maxlen].reshape(-1, maxlen) y_train = train_data[1:][:n_data * maxlen].reshape(-1, maxlen) print(X_train.shape) n_test_data = (len(test_data) - 1) // maxlen X_test = test_data[:-1][:n_test_data * maxlen].reshape(-1, maxlen) y_test = test_data[1:][:n_test_data * maxlen].reshape(-1, maxlen) print(X_test.shape) model = build_lm_model(emb_h=emb_h, h=h, nh=nh, drop_p=drop_p, V=len(vocabs), tied=tied, maxlen=maxlen, rnn_fn=rnn_fn) input_var = K.placeholder((None, maxlen)) target_var = K.placeholder((None, maxlen)) prediction = model(input_var) loss = K.sparse_categorical_crossentropy(target_var, prediction, from_logits=True) if DP: optimizer = DPAdamGaussianOptimizer(l2_norm_clip=l2_norm_clip, noise_multiplier=noise_multiplier, learning_rate=lr, num_microbatches=batch_size) grads_and_vars = optimizer.compute_gradients(loss, model.trainable_weights) updates = [optimizer.apply_gradients(grads_and_vars)] else: loss = K.mean(K.sum(loss, axis=-1)) optimizer = Adam(lr=lr, clipnorm=5) updates = optimizer.get_updates(loss, model.trainable_weights) # 20191110 LIN, Y.D. Modify for train accuracy. train_fn = K.function( [input_var, target_var, K.learning_phase()], [prediction, loss], updates=updates) # train_fn = K.function([input_var, target_var, K.learning_phase()], [loss], updates=updates) pred_fn = K.function( [input_var, target_var, K.learning_phase()], [prediction, loss]) # 20191129 LIN,Y.D. Records lost and perplexity train_losses = [] train_perps = [] test_losses = [] test_perps = [] train_accs = [] test_accs = [] iteration = 1 for epoch in range(num_epochs): train_batches = 0. train_loss = 0. train_iters = 0. for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True): inputs, targets = batch # 20191110 LIN, Y.D. Modify for train accuracy. preds, err = train_fn([inputs, targets, 1]) # err = train_fn([inputs, targets, 1])[0] train_batches += 1 if DP: err = np.sum(np.mean(err, axis=1)) train_loss += err train_iters += maxlen iteration += 1 if iteration % print_every == 0: test_acc = 0. test_n = 0. test_iters = 0. test_loss = 0. test_batches = 0. # 20191110 LIN, Y.D. Modify for train accuracy. train_acc = 0. train_n = 0. preds = preds.argmax(axis=-1) train_acc += np.sum(preds.flatten() == targets.flatten()) train_n += len(targets.flatten()) for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch preds, err = pred_fn([inputs, targets, 0]) if DP: err = np.sum(np.mean(err, axis=1)) test_loss += err test_iters += maxlen test_batches += 1 preds = preds.argmax(axis=-1) test_acc += np.sum(preds.flatten() == targets.flatten()) test_n += len(targets.flatten()) train_losses.append(train_loss / train_batches) train_perps.append(np.exp(train_loss / train_iters)) train_accs.append(train_acc / train_n * 100) test_losses.append(test_loss / test_batches) test_perps.append(np.exp(test_loss / test_iters)) test_accs.append(test_acc / test_n * 100) sys.stderr.write( "Epoch {}, iteration {}, train loss={:.3f}, train perp={:.3f}, train acc={:.3f}, " "test loss={:.3f}, test perp={:.3f}, " "test acc={:.3f}%\n".format( epoch, iteration, train_losses[-1], train_perps[-1], train_accs[ -1], # 20191110 LIN, Y.D. Modify for train accuracy. test_losses[-1], test_perps[-1], test_accs[-1])) # sys.stderr.write("Epoch {}, iteration {}, train loss={:.3f}, train perp={:.3f}, train acc={:.3f}, " # "test loss={:.3f}, test perp={:.3f}, " # "test acc={:.3f}%\n".format(epoch, iteration, # train_loss / train_batches, # np.exp(train_loss / train_iters), # train_acc / train_n * 100, # 20191110 LIN, Y.D. Modify for train accuracy. # test_loss / test_batches, # np.exp(test_loss / test_iters), # test_acc / test_n * 100)) if cross_domain: fname = 'wiki_lm{}'.format('' if loo is None else loo) else: fname = 'reddit_lm{}'.format('' if loo is None else loo) # Add DP suffix for storing DP results. if DP: fname = '{}_dp_l2_{}_noise_{}'.format(fname, l2_norm_clip, noise_multiplier) if sample_user: fname += '_shadow_exp{}_{}'.format(exp_id, rnn_fn) np.savez( MODEL_PATH + 'shadow_users{}_{}_{}_{}.npz'.format( exp_id, rnn_fn, num_users, 'cd' if cross_domain else ''), users) # Dump the record here. train_losses_file = open(f'./{RESULT_PATH}/{fname}_train_losses.pkl', 'wb') train_perps_file = open(f'./{RESULT_PATH}/{fname}_train_perps.pkl', 'wb') train_accs_file = open(f'./{RESULT_PATH}/{fname}_train_accs.pkl', 'wb') test_losses_file = open(f'./{RESULT_PATH}/{fname}_test_losses.pkl', 'wb') test_perps_file = open(f'./{RESULT_PATH}/{fname}_test_perps.pkl', 'wb') test_accs_file = open(f'./{RESULT_PATH}/{fname}_test_accs.pkl', 'wb') pkl.dump(train_losses, train_losses_file) pkl.dump(train_perps, train_perps_file) pkl.dump(train_accs, train_accs_file) pkl.dump(test_losses, test_losses_file) pkl.dump(test_perps, test_perps_file) pkl.dump(test_accs, test_accs_file) train_losses_file.close() train_perps_file.close() train_accs_file.close() test_losses_file.close() test_perps_file.close() test_accs_file.close() model.save(MODEL_PATH + '{}_{}.h5'.format(fname, num_users))
def sparse_loss(self, y_true, y_pred, from_logits=True): return K.sparse_categorical_crossentropy(y_true, y_pred, from_logits)
yield [batch_tokens_ids, batch_segment_ids], None batch_tokens_ids, batch_segment_ids = [], [] # 构建模型 model = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, application='unilm', keep_tokens=keep_words) y_true = model.input[0][:, 1:] y_mask = model.input[1][:, 1:] y_pred = model.output[:, :-1] cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) model.add_loss(cross_entropy) model.compile(optimizer=AdaFactor(learning_rate=1e-3)) model.summary() def ge_answer(wrong): """ 解码 :param wrong: :return: """ wrong_token_ids, _ = tokenizer.encode(wrong) token_ids = wrong_token_ids + [tokenizer._token_mask_id] * max_len + [
def cross_entropy(y_true, y_pred): return K.sparse_categorical_crossentropy(y_true, y_pred)
def my_loss(self, y_true, y_pred): perplexity = K.exp(K.sparse_categorical_crossentropy(y_true, y_pred)) print(perplexity) return perplexity
def custom_loss(y_true, y_pred): y_t = K.reshape(y_true,[-1,1]) y_p = K.reshape(y_pred,[-1,4]) losses = K.sparse_categorical_crossentropy(y_p,y_t, from_logits=True) return K.sum(losses)
pcsel = Lambda(lambda x: x[0] + x[1])([pcsel_1, pcsel_2]) pcsel = Lambda(lambda x: x[0][..., 0] - (1 - x[1]) * 1e10)([pcsel, hm]) pcsel = Activation('softmax')(pcsel) model = Model([x1_in, x2_in, h_in, hm_in], [psel, pconn, pcop, pcsel]) train_model = Model( [x1_in, x2_in, xm_in, h_in, hm_in, sel_in, conn_in, csel_in, cop_in], [psel, pconn, pcop, pcsel]) xm = xm # question的mask.shape=(None, x_len) hm = hm[:, 0] # header的mask.shape=(None, h_len) cm = K.cast(K.not_equal(cop, num_op - 1), 'float32') # conds的mask.shape=(None, x_len) psel_loss = K.sparse_categorical_crossentropy(sel_in, psel) psel_loss = K.sum(psel_loss * hm) / K.sum(hm) pconn_loss = K.sparse_categorical_crossentropy(conn_in, pconn) pconn_loss = K.mean(pconn_loss) pcop_loss = K.sparse_categorical_crossentropy(cop_in, pcop) pcop_loss = K.sum(pcop_loss * xm) / K.sum(xm) pcsel_loss = K.sparse_categorical_crossentropy(csel_in, pcsel) pcsel_loss = K.sum(pcsel_loss * xm * cm) / K.sum(xm * cm) loss = psel_loss + pconn_loss + pcop_loss + pcsel_loss train_model.add_loss(loss) train_model.compile(optimizer=Adam(learning_rate)) train_model.summary() def nl2sql(question, table):
# 定义解码层,分开定义是为了后面的重用 decoder_hidden = Dense(hidden_dim*(2*n)) decoder_cnn = GCNN(residual=True) decoder_dense = Dense(len(char2id), activation='softmax') h = decoder_hidden(z) h = Reshape((2*n, hidden_dim))(h) h = decoder_cnn(h) output = decoder_dense(h) # 建立模型 vae = Model(input_sentence, output) # xent_loss是重构loss,kl_loss是KL loss xent_loss = K.sum(K.sparse_categorical_crossentropy(input_sentence, output), 1) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) # add_loss是新增的方法,用于更灵活地添加各种loss vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() # 重用解码层,构建单独的生成模型 decoder_input = Input(shape=(latent_dim,)) _ = decoder_hidden(decoder_input) _ = Reshape((2*n, hidden_dim))(_) _ = decoder_cnn(_) _output = decoder_dense(_) generator = Model(decoder_input, _output)
def __init__( self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities'): super(KerasModel, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) from keras import backend as K import keras from pkg_resources import parse_version assert parse_version(keras.__version__) >= parse_version('2.0.7'), 'Keras version needs to be 2.0.7 or newer' # noqa: E501 if predicts == 'probs': predicts = 'probabilities' assert predicts in ['probabilities', 'logits'] images_input = model.input label_input = K.placeholder(shape=(1,)) predictions = model.output shape = K.int_shape(predictions) _, num_classes = shape assert num_classes is not None self._num_classes = num_classes if predicts == 'probabilities': if K.backend() == 'tensorflow': predictions, = predictions.op.inputs loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=True) else: logging.warning('relying on numerically unstable conversion' ' from probabilities to softmax') loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=False) # transform the probability predictions into logits, so that # the rest of this code can assume predictions to be logits predictions = self._to_logits(predictions) elif predicts == 'logits': loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=True) # sparse_categorical_crossentropy returns 1-dim tensor, # gradients wants 0-dim tensor (for some backends) loss = K.squeeze(loss, axis=0) grads = K.gradients(loss, images_input) grad_loss_output = K.placeholder(shape=(num_classes, 1)) external_loss = K.dot(predictions, grad_loss_output) # remove batch dimension of predictions external_loss = K.squeeze(external_loss, axis=0) # remove singleton dimension of grad_loss_output external_loss = K.squeeze(external_loss, axis=0) grads_loss_input = K.gradients(external_loss, images_input) if K.backend() == 'tensorflow': # tensorflow backend returns a list with the gradient # as the only element, even if loss is a single scalar # tensor; # theano always returns the gradient itself (and requires # that loss is a single scalar tensor) assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] assert isinstance(grads_loss_input, list) assert len(grads_loss_input) == 1 grad_loss_input = grads_loss_input[0] elif K.backend() == 'cntk': # pragma: no cover assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] grad = K.reshape(grad, (1,) + grad.shape) assert isinstance(grads_loss_input, list) assert len(grads_loss_input) == 1 grad_loss_input = grads_loss_input[0] grad_loss_input = K.reshape(grad_loss_input, (1,) + grad_loss_input.shape) # noqa: E501 else: assert not isinstance(grads, list) grad = grads grad_loss_input = grads_loss_input self._loss_fn = K.function( [images_input, label_input], [loss]) self._batch_pred_fn = K.function( [images_input], [predictions]) self._pred_grad_fn = K.function( [images_input, label_input], [predictions, grad]) self._bw_grad_fn = K.function( [grad_loss_output, images_input], [grad_loss_input])