Exemplo n.º 1
0
classifier = Dense(len(chars) + 4)

yl = Dense(char_size)(yl)
yl = LeakyReLU(0.2)(yl)
yl = classifier(yl)
yl = Lambda(lambda x: (x[0] + x[1]) / 2)([yl, x_prior])  # 与先验结果平均
yl = Activation('softmax')(yl)

yr = Dense(char_size)(yr)
yr = LeakyReLU(0.2)(yr)
yr = classifier(yr)
yr = Lambda(lambda x: (x[0] + x[1]) / 2)([yr, x_prior])  # 与先验结果平均
yr = Activation('softmax')(yr)

# 交叉熵作为loss,但mask掉padding部分
cross_entropy_1 = K.sparse_categorical_crossentropy(yl_in[:, 1:], yl[:, :-1])
cross_entropy_1 = K.sum(cross_entropy_1 * y_mask[:, 1:, 0]) / K.sum(
    y_mask[:, 1:, 0])
cross_entropy_2 = K.sparse_categorical_crossentropy(yr_in[:, 1:], yr[:, :-1])
cross_entropy_2 = K.sum(cross_entropy_2 * y_mask[:, 1:, 0]) / K.sum(
    y_mask[:, 1:, 0])
cross_entropy = (cross_entropy_1 + cross_entropy_2) / 2

model = Model([x_in, yl_in, yr_in], [yl, yr])
model.add_loss(cross_entropy)
model.compile(optimizer=Adam(1e-3))


def gen_sent(s, topk=3, maxlen=64):
    """双向beam search解码
    每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
Exemplo n.º 2
0
def perplexity(y_true, y_pred):
    cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
    perplexity = K.pow(2.0, cross_entropy)
    return perplexity
Exemplo n.º 3
0
'''
x = Dense(units=128, activation='relu')(x) 
x = Dropout(0.1)(x)
ans_start = Dense(1, activation='sigmoid')(x)
ans_end = Dense(1, activation='sigmoid')(x)
passage_mask = passage_mask_in
'''

#model = Model([x1_in, x2_in, y_in,ans_start_pos_in,ans_end_pos_in, passage_mask_in], [p, ans_start, ans_end])
model = Model([x1_in, x2_in, y_in, test_in], [p])

loss_p = K.binary_crossentropy(y_in, p) 
loss_p = K.mean(loss_p)

test_loss = K.sparse_categorical_crossentropy(test_in, p_test)
test_loss = K.mean(test_loss)

'''
p_ans_start_loss = K.sparse_categorical_crossentropy(ans_start_pos_in, ans_start)
p_ans_start_loss = K.sum(p_ans_start_loss * passage_mask) / K.sum(passage_mask)
p_ans_end_loss = K.sparse_categorical_crossentropy(ans_end_pos_in, ans_end)
p_ans_end_loss = K.sum(p_ans_end_loss * passage_mask) / K.sum(passage_mask) 

loss = loss_p + p_ans_start_loss  + p_ans_end_loss 
'''
loss = loss_p + test_loss

model.add_loss(loss)
model.compile(
    optimizer=Adam(1e-6), # 用足够小的学习率
Exemplo n.º 4
0
def my_sparse_categorical_crossentropy(y_true, y_pred):
    return K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False)
Exemplo n.º 5
0
    def __init__(
            self,
            model,
            bounds,
            channel_axis=3,
            preprocessing=(0, 1),
            predicts='probabilities'):

        super(KerasModel, self).__init__(bounds=bounds,
                                         channel_axis=channel_axis,
                                         preprocessing=preprocessing)

        from keras import backend as K
        import keras
        from pkg_resources import parse_version

        assert parse_version(keras.__version__) >= parse_version('2.0.7'), 'Keras version needs to be 2.0.7 or newer'  # noqa: E501

        if predicts == 'probs':
            predicts = 'probabilities'
        assert predicts in ['probabilities', 'logits']

        images_input = model.input
        label_input = K.placeholder(shape=(1,))

        predictions = model.output

        shape = K.int_shape(predictions)
        _, num_classes = shape
        assert num_classes is not None

        self._num_classes = num_classes

        if predicts == 'probabilities':
            loss = K.sparse_categorical_crossentropy(
                label_input, predictions, from_logits=False)
            # transform the probability predictions into logits, so that
            # the rest of this code can assume predictions to be logits
            predictions = self._to_logits(predictions)
        elif predicts == 'logits':
            loss = K.sparse_categorical_crossentropy(
                label_input, predictions, from_logits=True)

        # sparse_categorical_crossentropy returns 1-dim tensor,
        # gradients wants 0-dim tensor (for some backends)
        loss = K.squeeze(loss, axis=0)

        grads = K.gradients(loss, images_input)
        if K.backend() == 'tensorflow':
            # tensorflow backend returns a list with the gradient
            # as the only element, even if loss is a single scalar
            # tensor;
            # theano always returns the gradient itself (and requires
            # that loss is a single scalar tensor)
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]
        elif K.backend() == 'cntk':  # pragma: no cover
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]
            grad = K.reshape(grad, (1,) + grad.shape)
        else:
            assert not isinstance(grads, list)
            grad = grads

        self._loss_fn = K.function(
            [images_input, label_input],
            [loss])
        self._batch_pred_fn = K.function(
            [images_input], [predictions])
        self._pred_grad_fn = K.function(
            [images_input, label_input],
            [predictions, grad])
Exemplo n.º 6
0
    def __init__(self,
                 clip_values,
                 model,
                 use_logits=False,
                 channel_index=3,
                 defences=None):
        """
        Create a `Classifier` instance from a Keras model. Assumes the `model` passed as argument is compiled.

        :param clip_values: Tuple of the form `(min, max)` representing the minimum and maximum values allowed
               for features.
        :type clip_values: `tuple`
        :param model: Keras model
        :type model: `keras.models.Sequential`
        :param use_logits: True if the output of the model are the logits.
        :type use_logits: `bool`
        :param channel_index: Index of the axis in data containing the color channels or features.
        :type channel_index: `int`
        :param defences: Defences to be activated with the classifier.
        :type defences: `str` or `list(str)`
        """
        import keras.backend as k

        # TODO Generalize loss function?
        super(KerasClassifier, self).__init__(clip_values, channel_index,
                                              defences)

        self._model = model
        self._input = model.input
        self._output = model.output
        _, self._nb_classes = k.int_shape(model.output)
        self._input_shape = k.int_shape(model.input)[1:]

        # Get predictions and loss function
        label_ph = k.placeholder(shape=(None, ))
        if not use_logits:
            if k.backend() == 'tensorflow':
                preds, = self._output.op.inputs
                loss = k.sparse_categorical_crossentropy(label_ph,
                                                         preds,
                                                         from_logits=True)
            else:
                loss = k.sparse_categorical_crossentropy(
                    label_ph, self._output, from_logits=use_logits)

                # Convert predictions to logits for consistency with the other cases
                eps = 10e-8
                preds = k.log(k.clip(self._output, eps, 1. - eps))
        else:
            preds = self._output
            loss = k.sparse_categorical_crossentropy(label_ph,
                                                     self._output,
                                                     from_logits=use_logits)
        loss_grads = k.gradients(loss, self._input)

        if k.backend() == 'tensorflow':
            loss_grads = loss_grads[0]
        elif k.backend() == 'cntk':
            raise NotImplementedError(
                'Only TensorFlow and Theano support is provided for Keras.')

        # Set loss, grads and prediction functions
        self._preds_op = preds
        self._loss = k.function([self._input], [loss])
        self._loss_grads = k.function([self._input, label_ph], [loss_grads])
        self._preds = k.function([self._input], [preds])
Exemplo n.º 7
0
    def __init__(self,
                 model,
                 bounds,
                 aux_lp,
                 channel_axis=3,
                 preprocessing=(0, 1),
                 predicts='probabilities'):

        super(TwoInputKerasModel, self).__init__(bounds=bounds,
                                                 channel_axis=channel_axis,
                                                 preprocessing=preprocessing)

        from keras import backend as K
        import keras
        from pkg_resources import parse_version

        assert parse_version(keras.__version__) >= parse_version(
            '2.0.7'), 'Keras version needs to be 2.0.7 or newer'  # noqa: E501

        if predicts == 'probs':
            predicts = 'probabilities'
        assert predicts in ['probabilities', 'logits']

        images_input = model.input  # image is the first input
        label_input = K.placeholder(shape=(1, ))

        predictions = model.output

        shape = K.int_shape(predictions)
        _, num_classes = shape
        assert num_classes is not None

        self._num_classes = num_classes
        self.aux_lp = aux_lp
        aux_learning_phase = K.learning_phase()

        if predicts == 'probabilities':
            if K.backend() == 'tensorflow':
                # predictions = predictions.op.inputs[0]
                loss = K.sparse_categorical_crossentropy(label_input,
                                                         predictions,
                                                         from_logits=False)
                predictions = self._to_logits(predictions)
            else:
                logging.warning('relying on numerically unstable conversion'
                                ' from probabilities to softmax')
                loss = K.sparse_categorical_crossentropy(label_input,
                                                         predictions,
                                                         from_logits=False)

                # transform the probability predictions into logits, so that
                # the rest of this code can assume predictions to be logits
                predictions = self._to_logits(predictions)

        elif predicts == 'logits':
            loss = K.sparse_categorical_crossentropy(label_input,
                                                     predictions,
                                                     from_logits=True)

        # sparse_categorical_crossentropy returns 1-dim tensor,
        # gradients wants 0-dim tensor (for some backends)
        loss = K.squeeze(loss, axis=0)
        grads = K.gradients(loss, images_input)

        grad_loss_output = K.placeholder(shape=(num_classes, 1))
        external_loss = K.dot(predictions, grad_loss_output)
        # remove batch dimension of predictions
        external_loss = K.squeeze(external_loss, axis=0)
        # remove singleton dimension of grad_loss_output
        external_loss = K.squeeze(external_loss, axis=0)

        grads_loss_input = K.gradients(external_loss, images_input)

        if K.backend() == 'tensorflow':
            # tensorflow backend returns a list with the gradient
            # as the only element, even if loss is a single scalar tensor;
            # theano always returns the gradient itself (and requires
            # that loss is a single scalar tensor)
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]

            assert isinstance(grads_loss_input, list)
            assert len(grads_loss_input) == 1
            grad_loss_input = grads_loss_input[0]
        elif K.backend() == 'cntk':  # pragma: no cover
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]
            grad = K.reshape(grad, (1, ) + grad.shape)

            assert isinstance(grads_loss_input, list)
            assert len(grads_loss_input) == 1
            grad_loss_input = grads_loss_input[0]
            grad_loss_input = K.reshape(grad_loss_input, (1, ) +
                                        grad_loss_input.shape)  # noqa: E501
        else:
            assert not isinstance(grads, list)
            grad = grads

            grad_loss_input = grads_loss_input

        self._loss_fn = K.function(
            [images_input, label_input, aux_learning_phase], [loss])
        self._batch_pred_fn = K.function([images_input, aux_learning_phase],
                                         [predictions])
        self._pred_grad_fn = K.function(
            [images_input, aux_learning_phase, label_input],
            [predictions, grad])
        self._bw_grad_fn = K.function(
            [grad_loss_output, images_input, aux_learning_phase],
            [grad_loss_input])
def my_loss(arg):
    action_pred, action_true, discount_episode_reward = arg
    action_true = K.cast(action_true, dtype=tf.int32)
    loss = K.sparse_categorical_crossentropy(action_true, action_pred)
    loss = loss * K.flatten(discount_episode_reward)
    return loss
Exemplo n.º 9
0
def masked_loss(y_true, y_pred):
    y_mask = K.cast(K.any(y_true, axis=-1), "float32")
    loss = K.switch(y_mask, K.sparse_categorical_crossentropy(y_true, y_pred),
                    K.zeros_like(y_mask, dtype=K.floatx()))
    return K.sum(loss) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
Exemplo n.º 10
0
def sparse_masked_mlm_loss(y_true, y_pred):
    mask = K.cast(K.any(y_true, axis=-1), "float32")
    cce = K.sparse_categorical_crossentropy(y_true, y_pred)
    masked_cce = mask * cce
    return K.sum(masked_cce) / (K.sum(mask) + K.epsilon())
Exemplo n.º 11
0
def sparse_categorical_crossentropy(y_true, y_pred):
    return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true))
Exemplo n.º 12
0
def sparse_categorical_crossentropy(y_true, y_pred):
    '''expects an array of integer classes.
    Note: labels shape must have the same number of dimensions as output shape.
    If you get a shape error, add a length-1 dimension to labels.
    '''
    return K.sparse_categorical_crossentropy(y_pred, y_true)
Exemplo n.º 13
0
def sparse_logits_categorical_crossentropy(y_true, y_pred, scale=30):
    return K.sparse_categorical_crossentropy(y_true, scale * y_pred, from_logits=True)
Exemplo n.º 14
0
def SparseEncrop(y_ture, y_pred):
    loss = K.sparse_categorical_crossentropy(y_ture, y_pred, from_logits=True)
    return K.mean(loss)
Exemplo n.º 15
0
def build_model_from_config(config_file,
                            checkpoint_file,
                            training=False,
                            trainable=False,
                            seq_len=None,
                            ):
    """Build the model from config file.

    :param config_file: The path to the JSON configuration file.
    :param training: If training, the whole model will be returned.
    :param trainable: Whether the model is trainable.
    :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                    position embeddings will be sliced to fit the new length.
    :return: model and config
    """
    with open(config_file, 'r') as reader:
        config = json.loads(reader.read())
    if seq_len is not None:
        config['max_position_embeddings'] = min(seq_len, config['max_position_embeddings'])
    if trainable is None:
        trainable = training
    model = get_model(
        token_num=config['vocab_size'],
        pos_num=config['max_position_embeddings'],
        seq_len=config['max_position_embeddings'],
        embed_dim=config['hidden_size'],
        transformer_num=config['num_hidden_layers'],
        head_num=config['num_attention_heads'],
        feed_forward_dim=config['intermediate_size'],
        training=False,
        trainable=True,
    )
    inputs, outputs = model
    bio_label = Input(shape=(maxlen,))
    event = Input(shape=(1,))

    mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(inputs[0])
    event_embedding = Embedding(len(event2id),config['hidden_size'],mask_zero=True)(event)
    event_bc = Lambda(lambda input: input[0] * 0 + input[1])([outputs, event_embedding])
    outputs = Add()([outputs,event_bc])

    outputs = Dropout(0.15)(outputs)
    attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
    attention = MaskFlatten()(attention)
    attention = Activation('softmax')(attention)
    attention = MaskRepeatVector(config['hidden_size'])(attention)
    attention = MaskPermute([2, 1])(attention)
    sent_representation = multiply([outputs, attention])
    attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)
    t_dim = K.int_shape(outputs)[-1]
    bert_attention = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs,attention])

    cnn1 = MaskedConv1D(filters=hidden_size, kernel_size=3, activation='relu', padding='same')(bert_attention)
    #BIOE
    bio_pred = Dense(4, activation='softmax')(cnn1)
    entity_model = keras.models.Model([inputs[0], inputs[1],event], [bio_pred])  # 预测subject的模型
    train_model = keras.models.Model([inputs[0], inputs[1],bio_label,event],[bio_pred])

    loss = K.sparse_categorical_crossentropy(bio_label, bio_pred)
    loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

    train_model.add_loss(loss)
    train_model.summary()
    train_model.compile(
        optimizer=keras.optimizers.Adam(lr=3e-5),
    )
    load_model_weights_from_checkpoint(train_model, config, checkpoint_file, training)
    return train_model,entity_model
Exemplo n.º 16
0
def build_model():
    # 定义模型
    n = 5  # 只抽取五言诗
    latent_dim = 64  # 隐变量维度
    hidden_dim = 64  # 隐层节点数

    # 定义编码
    input_sentence = Input(shape=(2 * n + 1, ), dtype='int32')  # (None, 11)
    input_vec = Embedding(len(vocab2id),
                          hidden_dim)(input_sentence)  # (None, 11, 64)
    h = GCNN(residual=True)(input_vec)  # (None, 11, 64)
    h = GCNN(residual=True)(h)  # (None, 11, 64)
    h = GlobalAveragePooling1D()(h)  # (None, 64)

    # 算均值和方差
    z_mean = Dense(latent_dim)(h)
    z_log_var = Dense(latent_dim)(h)

    # 给均值和方差  让其去采样
    z = Lambda(sampling)([z_mean, z_log_var])

    # 定义解码
    decoder_hidden = Dense(hidden_dim * (2 * n + 1))
    decoder_cnn = GCNN(residual=True)
    decoder_dense = Dense(len(vocab2id), activation='softmax')

    h = decoder_hidden(z)
    h = Reshape((2 * n + 1, hidden_dim))(h)
    h = decoder_cnn(h)
    output = decoder_dense(h)

    # 建立模型
    vae = Model(input_sentence, output)

    # 定义损失   重构损失+KL损失
    xent_loss = K.sum(
        K.sparse_categorical_crossentropy(input_sentence, output), 1)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var),
                           axis=-1)
    vae_loss = K.mean(xent_loss + kl_loss)

    # add_loss是新增的方法,用于更灵活地添加各种loss
    vae.add_loss(vae_loss)
    vae.compile(optimizer='adam')
    vae.summary()

    # 重用解码层,构建单独的生成模型
    decoder_input = Input(shape=(latent_dim, ))
    _ = decoder_hidden(decoder_input)
    _ = Reshape((2 * n + 1, hidden_dim))(_)
    _ = decoder_cnn(_)
    _output = decoder_dense(_)
    generator = Model(decoder_input, _output)

    # 利用生成模型随机生成一首诗
    def gen():
        latent_dim = 64
        n = 5
        r = generator.predict(np.random.randn(1, latent_dim))[0]
        r = r.argmax(axis=1)
        return ''.join([id2vocab[i] for i in r[:2 * n + 1]])

    # 回调器,方便在训练过程中输出
    class Evaluate(Callback):
        def __init__(self):
            super(Evaluate, self).__init__()
            self.log = []

        def on_epoch_end(self, epoch, logs=None):
            self.log.append(gen())
            print(u'          %s' % (self.log[-1])).encode('utf-8')

    evaluator = Evaluate()

    vae.fit(x, shuffle=True, epochs=100, batch_size=64, callbacks=[evaluator])

    vae.save_weights('shi.model')

    for i in range(20):
        print(gen())
Exemplo n.º 17
0
def per_pixel_softmax_cross_entropy_loss(y_true, y_pred):
        return K.sum(K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True))
    def __init__(self,
                 clip_values,
                 model,
                 use_logits=False,
                 channel_index=3,
                 defences=None,
                 preprocessing=(0, 1),
                 input_layer=0,
                 output_layer=0,
                 custom_activation=False):
        """
        Create a `Classifier` instance from a Keras model. Assumes the `model` passed as argument is compiled.

        :param clip_values: Tuple of the form `(min, max)` representing the minimum and maximum values allowed
               for features.
        :type clip_values: `tuple`
        :param model: Keras model
        :type model: `keras.models.Model`
        :param use_logits: True if the output of the model are the logits.
        :type use_logits: `bool`
        :param channel_index: Index of the axis in data containing the color channels or features.
        :type channel_index: `int`
        :param defences: Defences to be activated with the classifier.
        :type defences: `str` or `list(str)`
        :param preprocessing: Tuple of the form `(substractor, divider)` of floats or `np.ndarray` of values to be
               used for data preprocessing. The first value will be substracted from the input. The input will then
               be divided by the second one.
        :type preprocessing: `tuple`
        :param input_layer: Which layer to consider as the Input when the model has multple input layers.
        :type input_layer: `int`
        :param output_layer: Which layer to consider as the Output when the model has multiple output layers.
        :type output_layer: `int`
        :param custom_activation: True if the model uses the last activation other than softmax and requires to use the
               output probability rather than the logits by attacks.
        :type custom_activation: `bool`
        """
        import keras.backend as k

        super(KerasClassifier, self).__init__(clip_values=clip_values,
                                              channel_index=channel_index,
                                              defences=defences,
                                              preprocessing=preprocessing)

        self._model = model
        if hasattr(model, 'inputs'):
            self._input = model.inputs[input_layer]
        else:
            self._input = model.input

        if hasattr(model, 'outputs'):
            self._output = model.outputs[output_layer]
        else:
            self._output = model.output

        _, self._nb_classes = k.int_shape(self._output)
        self._input_shape = k.int_shape(self._input)[1:]
        self._custom_activation = custom_activation
        logger.debug(
            'Inferred %i classes and %s as input shape for Keras classifier.',
            self.nb_classes, str(self.input_shape))

        # Get predictions and loss function
        label_ph = k.placeholder(shape=(None, ))
        if not use_logits:
            if k.backend() == 'tensorflow':
                if custom_activation:
                    preds = self._output
                    loss = k.sparse_categorical_crossentropy(label_ph,
                                                             preds,
                                                             from_logits=False)
                else:
                    preds, = self._output.op.inputs
                    loss = k.sparse_categorical_crossentropy(label_ph,
                                                             preds,
                                                             from_logits=True)
            else:
                loss = k.sparse_categorical_crossentropy(
                    label_ph, self._output, from_logits=use_logits)

                # Convert predictions to logits for consistency with the other cases
                eps = 10e-8
                preds = k.log(k.clip(self._output, eps, 1. - eps))
        else:
            preds = self._output
            loss = k.sparse_categorical_crossentropy(label_ph,
                                                     self._output,
                                                     from_logits=use_logits)
        if preds == self._input:  # recent Tensorflow version does not allow a model with an output same as the input.
            preds = k.identity(preds)
        loss_grads = k.gradients(loss, self._input)

        if k.backend() == 'tensorflow':
            loss_grads = loss_grads[0]
        elif k.backend() == 'cntk':
            raise NotImplementedError(
                'Only TensorFlow and Theano support is provided for Keras.')

        # Set loss, grads and prediction functions
        self._preds_op = preds
        self._loss = k.function([self._input], [loss])
        self._loss_grads = k.function([self._input, label_ph], [loss_grads])
        self._preds = k.function([self._input], [preds])

        # Get the internal layer
        self._layer_names = self._get_layers()
def my_loss(arg):
    action_pred, action_true, discount_episode_reward = arg
    action_true = K.cast(action_true, dtype=tf.int32)
    loss = K.sparse_categorical_crossentropy(action_true, action_pred)
    loss = loss * K.flatten(discount_episode_reward)
    return loss
Exemplo n.º 20
0
y = CuDNNLSTM(z_dim, return_sequences=True)(y)
y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max])

# attention交互
xy = Attention(8, 16)([y, x, x, x_mask])
xy = Concatenate()([y, xy])

# 输出分类
xy = Dense(char_size)(xy)
xy = LeakyReLU(0.2)(xy)
xy = Dense(len(chars) + 4)(xy)
xy = Lambda(lambda x: (x[0] + x[1]) / 2)([xy, x_prior])  # 与先验结果平均
xy = Activation('softmax')(xy)

# 交叉熵作为loss,但mask掉padding部分
cross_entropy = K.sparse_categorical_crossentropy(y_in[:, 1:], xy[:, :-1])
cross_entropy = K.sum(cross_entropy * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:,
                                                                       0])

model = Model([x_in, y_in], xy)
model.add_loss(cross_entropy)
model.compile(optimizer=Adam(1e-3))


def gen_sent(s, topk=3, maxlen=64):
    """beam search解码
    每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
    """
    xid = np.array([str2id(s)] * topk)  # 输入转id
    yid = np.array([[2]] * topk)  # 解码均以<start>开头,这里<start>的id为2
    scores = [0] * topk  # 候选答案分数
Exemplo n.º 21
0
 def softmax_loss1(y_true, y_pred):
     y_true_casted = K.cast(y_true, dtype='int32')
     y_true_cls = y_true_casted[:, 0]
     return K.sparse_categorical_crossentropy(y_true_cls, y_pred)
Exemplo n.º 22
0
 def call(self, y_true, y_pred):
     #return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1)
     #scce = tf.keras.losses.SparseCategoricalCrossentropy()
     #return scce(y_true, y_pred)
     log_ = K.mean(K.sparse_categorical_crossentropy(y_true, y_pred))
     return K.sum(log_ * K.constant(class_weight))
Exemplo n.º 23
0
 def softmax_loss2(y_true, y_pred):
     return K.sparse_categorical_crossentropy(y_true[1], y_pred[1])
Exemplo n.º 24
0
# train encoder-docoder of PretrainVAE
enc_model.trainable = True
dec_model.trainable = True
dis_model.trainable = False
enc_z, kl_loss = enc_model(enc_in)
z_fake_score = dis_model(enc_z)

dec_in = Input(shape=(max_len, ))
dec_true = Input(shape=(max_len, ))
dec_true_mask = Lambda(
    lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(dec_true)
dec_out = dec_model([dec_in, dec_true, enc_z])

xent_loss = K.sum(
    K.sparse_categorical_crossentropy(dec_true, dec_out) *
    dec_true_mask[:, :, 0]) / K.sum(dec_true_mask[:, :, 0])
d_loss = K.mean(-z_fake_score)
all_loss = xent_loss + nambda * d_loss

enc_dec_train_model = Model([enc_in, dec_in, dec_true], dec_out)
enc_dec_train_model.add_loss(all_loss)
enc_dec_train_model.compile(Adam(5e-4, 0.0))

enc_dec_train_model.metrics_names.append('ce_loss')
enc_dec_train_model.metrics_tensors.append(xent_loss)
enc_dec_train_model.metrics_names.append('kl_loss')
enc_dec_train_model.metrics_tensors.append(kl_loss)

dis_train_model.summary()
enc_dec_train_model.summary()
Exemplo n.º 25
0
#cls_info_dense = Dense(768, activation='relu')(cls_info)
#cls_info_dense = Lambda(lambda x: K.expand_dims(x, 1))(cls_info_dense) 
x_answer_pos = add([x, q_cls])
ans_start = Dense(2, activation='softmax')(x_answer_pos)
ans_end = Dense(2, activation='softmax')(x_answer_pos)

passage_mask = passage_mask_in



train_model = Model([x1_in, x2_in, h_in, y_in, ans_start_pos_in,ans_end_pos_in,  passage_mask_in], [p, ans_start, ans_end])
model = Model([x1_in, x2_in, h_in, passage_mask_in], [p, ans_start, ans_end])
#model = Model([x1_in, x2_in, y_in], [p])
# train_model = Model([x1_in, x2_in, h_in, y_in, ans_start_pos_in,ans_end_pos_in,  passage_mask_in], [p])

loss_p = K.sparse_categorical_crossentropy(y_in, p) 
loss_p = K.mean(loss_p)


p_ans_start_loss = K.sparse_categorical_crossentropy(ans_start_pos_in, ans_start)
p_ans_start_loss = K.sum(p_ans_start_loss * passage_mask) / K.sum(passage_mask)
p_ans_end_loss = K.sparse_categorical_crossentropy(ans_end_pos_in, ans_end)
p_ans_end_loss = K.sum(p_ans_end_loss * passage_mask) / K.sum(passage_mask) 
loss = loss_p + p_ans_start_loss +  p_ans_end_loss

# loss = loss_p 

train_model.add_loss(loss)
train_model.compile(
    optimizer=Adam(3e-5), # 用足够小的学习率
    metrics=['accuracy']
Exemplo n.º 26
0

model = load_pretrained_model(config_path,
                              checkpoint_path,
                              seq2seq=True,
                              keep_words=keep_words)

model.summary()

y_in = model.input[0][:, 1:]  # 目标tokens
y_mask = model.input[1][:, 1:]
y = model.output[:, :-1]  # 预测tokens,预测与目标错开一位

# 交叉熵作为loss,并mask掉输入部分的预测
y = model.output[:, :-1]  # 预测tokens,预测与目标错开一位
cross_entropy = K.sparse_categorical_crossentropy(y_in, y)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)

model.add_loss(cross_entropy)
model.compile(optimizer=Adam(1e-5))
model.summary()


def gen_sent(s, topk=2):
    """beam search解码
    每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
    """
    token_ids, segment_ids = tokenizer.encode(s[:max_input_len])
    target_ids = [[] for _ in range(topk)]  # 候选答案id
    target_scores = [0] * topk  # 候选答案分数
    for i in range(max_output_len):  # 强制要求输出不超过max_output_len字
Exemplo n.º 27
0
def train_reddit_lm(num_users=300,
                    num_words=5000,
                    num_epochs=30,
                    maxlen=35,
                    batch_size=20,
                    exp_id=0,
                    h=128,
                    emb_h=256,
                    lr=1e-3,
                    drop_p=0.25,
                    tied=False,
                    nh=1,
                    loo=None,
                    sample_user=False,
                    cross_domain=False,
                    print_every=1000,
                    rnn_fn='lstm',
                    DP=False,
                    l2_norm_clip=0.15,
                    noise_multiplier=1.1):
    if cross_domain:
        loo = None
        sample_user = True
        user_comments, vocabs = load_wiki_by_users(num_users=num_users,
                                                   num_words=num_words)
    else:
        user_comments, vocabs = read_top_user_comments(num_users,
                                                       num_words,
                                                       sample_user=sample_user)

    train_data = []
    users = sorted(user_comments.keys())

    for i, user in enumerate(users):
        if loo is not None and i == loo:
            print("Leaving {} out".format(i))
            continue
        train_data += user_comments[user]

    train_data = words_to_indices(train_data, vocabs)
    train_data = flatten_data(train_data)

    if cross_domain:
        test_data = load_wiki_test_data()
    else:
        test_data = read_test_comments()

    process_test_data(test_data, vocabs)
    test_data = words_to_indices(test_data, vocabs)
    test_data = flatten_data(test_data)

    n_data = (len(train_data) - 1) // maxlen
    X_train = train_data[:-1][:n_data * maxlen].reshape(-1, maxlen)
    y_train = train_data[1:][:n_data * maxlen].reshape(-1, maxlen)
    print(X_train.shape)

    n_test_data = (len(test_data) - 1) // maxlen
    X_test = test_data[:-1][:n_test_data * maxlen].reshape(-1, maxlen)
    y_test = test_data[1:][:n_test_data * maxlen].reshape(-1, maxlen)
    print(X_test.shape)

    model = build_lm_model(emb_h=emb_h,
                           h=h,
                           nh=nh,
                           drop_p=drop_p,
                           V=len(vocabs),
                           tied=tied,
                           maxlen=maxlen,
                           rnn_fn=rnn_fn)

    input_var = K.placeholder((None, maxlen))
    target_var = K.placeholder((None, maxlen))

    prediction = model(input_var)

    loss = K.sparse_categorical_crossentropy(target_var,
                                             prediction,
                                             from_logits=True)

    if DP:
        optimizer = DPAdamGaussianOptimizer(l2_norm_clip=l2_norm_clip,
                                            noise_multiplier=noise_multiplier,
                                            learning_rate=lr,
                                            num_microbatches=batch_size)
        grads_and_vars = optimizer.compute_gradients(loss,
                                                     model.trainable_weights)
        updates = [optimizer.apply_gradients(grads_and_vars)]
    else:
        loss = K.mean(K.sum(loss, axis=-1))
        optimizer = Adam(lr=lr, clipnorm=5)
        updates = optimizer.get_updates(loss, model.trainable_weights)
    # 20191110 LIN, Y.D. Modify for train accuracy.
    train_fn = K.function(
        [input_var, target_var, K.learning_phase()], [prediction, loss],
        updates=updates)
    # train_fn = K.function([input_var, target_var, K.learning_phase()], [loss], updates=updates)

    pred_fn = K.function(
        [input_var, target_var, K.learning_phase()], [prediction, loss])

    # 20191129 LIN,Y.D. Records lost and perplexity
    train_losses = []
    train_perps = []
    test_losses = []
    test_perps = []
    train_accs = []
    test_accs = []

    iteration = 1
    for epoch in range(num_epochs):
        train_batches = 0.
        train_loss = 0.
        train_iters = 0.

        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch

            # 20191110 LIN, Y.D. Modify for train accuracy.
            preds, err = train_fn([inputs, targets, 1])
            # err = train_fn([inputs, targets, 1])[0]
            train_batches += 1
            if DP:
                err = np.sum(np.mean(err, axis=1))
            train_loss += err
            train_iters += maxlen

            iteration += 1
            if iteration % print_every == 0:
                test_acc = 0.
                test_n = 0.
                test_iters = 0.
                test_loss = 0.
                test_batches = 0.

                # 20191110 LIN, Y.D. Modify for train accuracy.
                train_acc = 0.
                train_n = 0.
                preds = preds.argmax(axis=-1)
                train_acc += np.sum(preds.flatten() == targets.flatten())
                train_n += len(targets.flatten())

                for batch in iterate_minibatches(X_test,
                                                 y_test,
                                                 batch_size,
                                                 shuffle=False):
                    inputs, targets = batch

                    preds, err = pred_fn([inputs, targets, 0])
                    if DP:
                        err = np.sum(np.mean(err, axis=1))
                    test_loss += err
                    test_iters += maxlen
                    test_batches += 1

                    preds = preds.argmax(axis=-1)
                    test_acc += np.sum(preds.flatten() == targets.flatten())
                    test_n += len(targets.flatten())

                train_losses.append(train_loss / train_batches)
                train_perps.append(np.exp(train_loss / train_iters))
                train_accs.append(train_acc / train_n * 100)
                test_losses.append(test_loss / test_batches)
                test_perps.append(np.exp(test_loss / test_iters))
                test_accs.append(test_acc / test_n * 100)

                sys.stderr.write(
                    "Epoch {}, iteration {}, train loss={:.3f}, train perp={:.3f}, train acc={:.3f}, "
                    "test loss={:.3f}, test perp={:.3f}, "
                    "test acc={:.3f}%\n".format(
                        epoch,
                        iteration,
                        train_losses[-1],
                        train_perps[-1],
                        train_accs[
                            -1],  # 20191110 LIN, Y.D. Modify for train accuracy.
                        test_losses[-1],
                        test_perps[-1],
                        test_accs[-1]))

                # sys.stderr.write("Epoch {}, iteration {}, train loss={:.3f}, train perp={:.3f}, train acc={:.3f}, "
                #                  "test loss={:.3f}, test perp={:.3f}, "
                #                  "test acc={:.3f}%\n".format(epoch, iteration,
                #                                              train_loss / train_batches,
                #                                              np.exp(train_loss / train_iters),
                #                                              train_acc / train_n * 100, # 20191110 LIN, Y.D. Modify for train accuracy.
                #                                              test_loss / test_batches,
                #                                              np.exp(test_loss / test_iters),
                #                                              test_acc / test_n * 100))

    if cross_domain:
        fname = 'wiki_lm{}'.format('' if loo is None else loo)
    else:
        fname = 'reddit_lm{}'.format('' if loo is None else loo)

    # Add DP suffix for storing DP results.
    if DP:
        fname = '{}_dp_l2_{}_noise_{}'.format(fname, l2_norm_clip,
                                              noise_multiplier)

    if sample_user:
        fname += '_shadow_exp{}_{}'.format(exp_id, rnn_fn)
        np.savez(
            MODEL_PATH + 'shadow_users{}_{}_{}_{}.npz'.format(
                exp_id, rnn_fn, num_users, 'cd' if cross_domain else ''),
            users)

    # Dump the record here.
    train_losses_file = open(f'./{RESULT_PATH}/{fname}_train_losses.pkl', 'wb')
    train_perps_file = open(f'./{RESULT_PATH}/{fname}_train_perps.pkl', 'wb')
    train_accs_file = open(f'./{RESULT_PATH}/{fname}_train_accs.pkl', 'wb')
    test_losses_file = open(f'./{RESULT_PATH}/{fname}_test_losses.pkl', 'wb')
    test_perps_file = open(f'./{RESULT_PATH}/{fname}_test_perps.pkl', 'wb')
    test_accs_file = open(f'./{RESULT_PATH}/{fname}_test_accs.pkl', 'wb')
    pkl.dump(train_losses, train_losses_file)
    pkl.dump(train_perps, train_perps_file)
    pkl.dump(train_accs, train_accs_file)
    pkl.dump(test_losses, test_losses_file)
    pkl.dump(test_perps, test_perps_file)
    pkl.dump(test_accs, test_accs_file)
    train_losses_file.close()
    train_perps_file.close()
    train_accs_file.close()
    test_losses_file.close()
    test_perps_file.close()
    test_accs_file.close()

    model.save(MODEL_PATH + '{}_{}.h5'.format(fname, num_users))
Exemplo n.º 28
0
 def sparse_loss(self, y_true, y_pred, from_logits=True):
     return K.sparse_categorical_crossentropy(y_true, y_pred, from_logits)
                yield [batch_tokens_ids, batch_segment_ids], None
                batch_tokens_ids, batch_segment_ids = [], []


# 构建模型
model = build_transformer_model(config_path=config_path,
                                checkpoint_path=checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_words)

y_true = model.input[0][:, 1:]
y_mask = model.input[1][:, 1:]
y_pred = model.output[:, :-1]

cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)

model.add_loss(cross_entropy)
model.compile(optimizer=AdaFactor(learning_rate=1e-3))
model.summary()


def ge_answer(wrong):
    """
    解码
    :param wrong:
    :return:
    """
    wrong_token_ids, _ = tokenizer.encode(wrong)
    token_ids = wrong_token_ids + [tokenizer._token_mask_id] * max_len + [
Exemplo n.º 30
0
def cross_entropy(y_true, y_pred):
    return K.sparse_categorical_crossentropy(y_true, y_pred)
Exemplo n.º 31
0
 def my_loss(self, y_true, y_pred):
     perplexity = K.exp(K.sparse_categorical_crossentropy(y_true, y_pred))
     print(perplexity)
     return perplexity
Exemplo n.º 32
0
def custom_loss(y_true, y_pred):
    y_t = K.reshape(y_true,[-1,1])
    y_p = K.reshape(y_pred,[-1,4])
    losses = K.sparse_categorical_crossentropy(y_p,y_t, from_logits=True)
    return K.sum(losses)
Exemplo n.º 33
0
pcsel = Lambda(lambda x: x[0] + x[1])([pcsel_1, pcsel_2])
pcsel = Lambda(lambda x: x[0][..., 0] - (1 - x[1]) * 1e10)([pcsel, hm])
pcsel = Activation('softmax')(pcsel)

model = Model([x1_in, x2_in, h_in, hm_in], [psel, pconn, pcop, pcsel])

train_model = Model(
    [x1_in, x2_in, xm_in, h_in, hm_in, sel_in, conn_in, csel_in, cop_in],
    [psel, pconn, pcop, pcsel])

xm = xm  # question的mask.shape=(None, x_len)
hm = hm[:, 0]  # header的mask.shape=(None, h_len)
cm = K.cast(K.not_equal(cop, num_op - 1),
            'float32')  # conds的mask.shape=(None, x_len)

psel_loss = K.sparse_categorical_crossentropy(sel_in, psel)
psel_loss = K.sum(psel_loss * hm) / K.sum(hm)
pconn_loss = K.sparse_categorical_crossentropy(conn_in, pconn)
pconn_loss = K.mean(pconn_loss)
pcop_loss = K.sparse_categorical_crossentropy(cop_in, pcop)
pcop_loss = K.sum(pcop_loss * xm) / K.sum(xm)
pcsel_loss = K.sparse_categorical_crossentropy(csel_in, pcsel)
pcsel_loss = K.sum(pcsel_loss * xm * cm) / K.sum(xm * cm)
loss = psel_loss + pconn_loss + pcop_loss + pcsel_loss

train_model.add_loss(loss)
train_model.compile(optimizer=Adam(learning_rate))
train_model.summary()


def nl2sql(question, table):
Exemplo n.º 34
0
# 定义解码层,分开定义是为了后面的重用
decoder_hidden = Dense(hidden_dim*(2*n))
decoder_cnn = GCNN(residual=True)
decoder_dense = Dense(len(char2id), activation='softmax')

h = decoder_hidden(z)
h = Reshape((2*n, hidden_dim))(h)
h = decoder_cnn(h)
output = decoder_dense(h)


# 建立模型
vae = Model(input_sentence, output)

# xent_loss是重构loss,kl_loss是KL loss
xent_loss = K.sum(K.sparse_categorical_crossentropy(input_sentence, output), 1)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
vae_loss = K.mean(xent_loss + kl_loss)

# add_loss是新增的方法,用于更灵活地添加各种loss
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()

# 重用解码层,构建单独的生成模型
decoder_input = Input(shape=(latent_dim,))
_ = decoder_hidden(decoder_input)
_ = Reshape((2*n, hidden_dim))(_)
_ = decoder_cnn(_)
_output = decoder_dense(_)
generator = Model(decoder_input, _output)
Exemplo n.º 35
0
    def __init__(
            self,
            model,
            bounds,
            channel_axis=3,
            preprocessing=(0, 1),
            predicts='probabilities'):

        super(KerasModel, self).__init__(bounds=bounds,
                                         channel_axis=channel_axis,
                                         preprocessing=preprocessing)

        from keras import backend as K
        import keras
        from pkg_resources import parse_version

        assert parse_version(keras.__version__) >= parse_version('2.0.7'), 'Keras version needs to be 2.0.7 or newer'  # noqa: E501

        if predicts == 'probs':
            predicts = 'probabilities'
        assert predicts in ['probabilities', 'logits']

        images_input = model.input
        label_input = K.placeholder(shape=(1,))

        predictions = model.output

        shape = K.int_shape(predictions)
        _, num_classes = shape
        assert num_classes is not None

        self._num_classes = num_classes

        if predicts == 'probabilities':
            if K.backend() == 'tensorflow':
                predictions, = predictions.op.inputs
                loss = K.sparse_categorical_crossentropy(
                    label_input, predictions, from_logits=True)
            else:
                logging.warning('relying on numerically unstable conversion'
                                ' from probabilities to softmax')
                loss = K.sparse_categorical_crossentropy(
                    label_input, predictions, from_logits=False)

                # transform the probability predictions into logits, so that
                # the rest of this code can assume predictions to be logits
                predictions = self._to_logits(predictions)
        elif predicts == 'logits':
            loss = K.sparse_categorical_crossentropy(
                label_input, predictions, from_logits=True)

        # sparse_categorical_crossentropy returns 1-dim tensor,
        # gradients wants 0-dim tensor (for some backends)
        loss = K.squeeze(loss, axis=0)
        grads = K.gradients(loss, images_input)

        grad_loss_output = K.placeholder(shape=(num_classes, 1))
        external_loss = K.dot(predictions, grad_loss_output)
        # remove batch dimension of predictions
        external_loss = K.squeeze(external_loss, axis=0)
        # remove singleton dimension of grad_loss_output
        external_loss = K.squeeze(external_loss, axis=0)

        grads_loss_input = K.gradients(external_loss, images_input)

        if K.backend() == 'tensorflow':
            # tensorflow backend returns a list with the gradient
            # as the only element, even if loss is a single scalar
            # tensor;
            # theano always returns the gradient itself (and requires
            # that loss is a single scalar tensor)
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]

            assert isinstance(grads_loss_input, list)
            assert len(grads_loss_input) == 1
            grad_loss_input = grads_loss_input[0]
        elif K.backend() == 'cntk':  # pragma: no cover
            assert isinstance(grads, list)
            assert len(grads) == 1
            grad = grads[0]
            grad = K.reshape(grad, (1,) + grad.shape)

            assert isinstance(grads_loss_input, list)
            assert len(grads_loss_input) == 1
            grad_loss_input = grads_loss_input[0]
            grad_loss_input = K.reshape(grad_loss_input, (1,) + grad_loss_input.shape)  # noqa: E501
        else:
            assert not isinstance(grads, list)
            grad = grads

            grad_loss_input = grads_loss_input

        self._loss_fn = K.function(
            [images_input, label_input],
            [loss])
        self._batch_pred_fn = K.function(
            [images_input], [predictions])
        self._pred_grad_fn = K.function(
            [images_input, label_input],
            [predictions, grad])
        self._bw_grad_fn = K.function(
            [grad_loss_output, images_input],
            [grad_loss_input])