Пример #1
0
def get_model(feature_extractor, rpn_model, anchors, hyper_params, mode="training"):
    """Generating rpn model for given backbone base model and hyper params.
    inputs:
        feature_extractor = feature extractor layer from the base model
        rpn_model = tf.keras.model generated rpn model
        anchors = (total_anchors, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        hyper_params = dictionary
        mode = "training" or "inference"

    outputs:
        frcnn_model = tf.keras.model
    """
    input_img = rpn_model.input
    rpn_reg_predictions, rpn_cls_predictions = rpn_model.output
    #
    roi_bboxes = RoIBBox(anchors, mode, hyper_params, name="roi_bboxes")([rpn_reg_predictions, rpn_cls_predictions])
    #
    roi_pooled = RoIPooling(hyper_params, name="roi_pooling")([feature_extractor.output, roi_bboxes])
    #
    output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled)
    output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output)
    output = TimeDistributed(Dropout(0.5), name="frcnn_dropout1")(output)
    output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc2")(output)
    output = TimeDistributed(Dropout(0.5), name="frcnn_dropout2")(output)
    frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output)
    frcnn_reg_predictions = TimeDistributed(Dense(hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output)
    #
    if mode == "training":
        input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32)
        input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32)
        rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32)
        rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32)
        frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(hyper_params, name="roi_deltas")(
                                                        [roi_bboxes, input_gt_boxes, input_gt_labels])
        #
        loss_names = ["rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"]
        rpn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[0])([rpn_reg_actuals, rpn_reg_predictions])
        rpn_cls_loss_layer = Lambda(train_utils.rpn_cls_loss, name=loss_names[1])([rpn_cls_actuals, rpn_cls_predictions])
        frcnn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[2])([frcnn_reg_actuals, frcnn_reg_predictions])
        frcnn_cls_loss_layer = Lambda(train_utils.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions])
        #
        frcnn_model = Model(inputs=[input_img, input_gt_boxes, input_gt_labels,
                                    rpn_reg_actuals, rpn_cls_actuals],
                            outputs=[roi_bboxes, rpn_reg_predictions, rpn_cls_predictions,
                                     frcnn_reg_predictions, frcnn_cls_predictions,
                                     rpn_reg_loss_layer, rpn_cls_loss_layer,
                                     frcnn_reg_loss_layer, frcnn_cls_loss_layer])
        #
        for layer_name in loss_names:
            layer = frcnn_model.get_layer(layer_name)
            frcnn_model.add_loss(layer.output)
            frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean")
        #
    else:
        bboxes, labels, scores = Decoder(hyper_params["variances"], hyper_params["total_labels"], name="faster_rcnn_decoder")(
                                         [roi_bboxes, frcnn_reg_predictions, frcnn_cls_predictions])
        frcnn_model = Model(inputs=input_img, outputs=[bboxes, labels, scores])
        #
    return frcnn_model
Пример #2
0
def create_implExModel(num_nodes,
                       num_edges,
                       embed_size=50,
                       n3_reg=1e-3,
                       learning_rate=5e-1,
                       num_negs=50,
                       alpha=1.,
                       beta=1.):
    # Build complEx Model
    sub_inputs = Input(shape=(), name='subject')
    obj_inputs = Input(shape=(), name='object')
    rel_inputs = Input(shape=(), name='relation')
    cnt_inputs = Input(shape=(), name='count')
    y_true_inputs = Input(shape=(), name='label')
    inputs = {
        "subject": sub_inputs,
        "object": obj_inputs,
        "relation": rel_inputs,
        "count": cnt_inputs,
        "label": y_true_inputs
    }

    node_layer = Embedding(input_dim=num_nodes,
                           output_dim=embed_size,
                           embeddings_initializer=GlorotUniform(),
                           name='node_embedding')
    edge_layer = Embedding(input_dim=num_edges,
                           output_dim=embed_size,
                           embeddings_initializer=GlorotUniform(),
                           name='edge_embedding')

    sub_embed = node_layer(sub_inputs)
    rel_embed = edge_layer(rel_inputs)
    obj_embed = node_layer(obj_inputs)

    outputs = ComplExDotScore(n3_reg)([sub_embed, rel_embed, obj_embed])
    model = Model(inputs, outputs, name='implEx')

    # Compile implEx Model
    wbce_loss = tf.nn.weighted_cross_entropy_with_logits(
        y_true_inputs, outputs, num_negs) / num_negs
    confidence = 1 + alpha * tf.math.log(1 + cnt_inputs / beta)

    loss = K.sum(confidence * wbce_loss)
    model.add_loss(loss)
    model.add_metric(K.mean(wbce_loss), 'weighted_binarycrossentropy')

    model.compile(optimizer=Adagrad(learning_rate))

    return model
Пример #3
0
def model(train_ds, val_ds, bdir, movie_num, epochs, batch_size, learning_rate, patience):
	os.system("rm " + bdir + "log.csv")
	filelendf = pd.read_csv(bdir + 'Dataset/file_length.dat', engine = 'python', sep = ':', index_col = 0)
	print ("--> Starting Training with learning rate = {learning_rate} for epochs = {epochs} with patience = {patience}".format(learning_rate = learning_rate, epochs = epochs, patience = patience))
	adam = Adam(learning_rate = learning_rate)

	Ip = Input(shape = (movie_num, ), name = "Input")
	Op = Input(shape = (movie_num, ), name = "Target")
	Weight = Input(shape = (movie_num, ), name = "Weight")
	Count = Input(shape = (1, ), name = "Count")

	n = pow(2, floor(log(movie_num)/log(2)))
	if n < 512:
		print ("Insufficient number of movies for a good model")
		exit()
	else:
		x = layer_creator(n, Ip)

	Output = Dense(movie_num, activation = "relu", name = "Output")(x)

	model = Model(inputs = [Ip, Op, Weight, Count], outputs = Output)
	model.add_loss(rmse(Op, Output, Weight, Count))
	model.add_metric(mae(Op, Output, Weight, Count), aggregation = 'mean', name = 'mae')
	model.compile(optimizer = adam, loss = None, metrics = None)
	#print (model.summary())

	es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = patience)
	cl = CSVLogger(bdir + 'log.csv', append = True, separator = ',')
	mc = ModelCheckpoint(bdir + 'model.h5', monitor = 'val_loss', verbose = 1, save_best_only = True)

	history = model.fit(train_ds, epochs = epochs, steps_per_epoch = (filelendf.loc['Train']['Length'] // batch_size), validation_data = val_ds, callbacks = [es, cl, mc])

	print ("--> Plotting Loss")
	#print(history.history.keys())
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title('model loss')
	plt.ylabel('loss')
	plt.xlabel('epoch')
	plt.legend(['train', 'val'], loc = 'upper left')
	plt.savefig(bdir + 'loss.png', bbox_inches = 'tight')
Пример #4
0
def build_vae(input_shape):
    '''
    Arguments:
        input_shape(tuple): the shape of input images (H, W, C)
    
    Returns:
        encoder, decoder, autoencoder models
    '''
    def build_encoder(input_shape, latent_dim):
        inputs = Input(input_shape)
        x = Conv2D(256, 4, strides=2, padding='same', activation='relu')(inputs)
        x = Conv2D(128, 4, strides=2, padding='same', activation='relu')(x)
        x = Conv2D(64, 4, strides=2, padding='same', activation='relu')(x)
        # x = Conv2D(64, 4, strides=2, padding='same', activation='relu')(x)
        x = Flatten()(x)
        mean = Dense(latent_dim)(x)
        logvar = Dense(latent_dim)(x)

        epsilon = K.random_normal(K.shape(mean))
        z = mean + K.exp(0.5 * logvar) * epsilon
        encoder = Model(inputs, [z, mean, logvar], name='encoder')
        return encoder

    def build_decoder(latent_dim):
        decoder = Sequential([
            Dense(4* 4* 64, activation='relu', input_shape=(latent_dim,)),
            Reshape((4, 4, 64)),
            # Conv2DTranspose(128, 4, strides=2, padding='same', activation='relu'),
            Conv2DTranspose(64, 4, strides=2, padding='same', activation='relu'),
            Conv2DTranspose(32, 4, strides=2, padding='same', activation='relu'),
            Conv2DTranspose(3, 4, strides=2, padding='same', activation='sigmoid')
        ], name='decoder')
        return decoder

    latent_dim = 512
    encoder = build_encoder(input_shape, latent_dim)
    # encoder.summary()
    decoder = build_decoder(latent_dim)
    # decoder.summary()

    inputs = Input(input_shape)
    z, mean, logvar = encoder(inputs)
    decoder_out = decoder(z)
    autoencoder = Model(inputs, decoder_out)

    bce_loss = K.sum(binary_crossentropy(inputs, decoder_out), axis=[1, 2])
    kl_loss = -0.5 * K.sum(1 + logvar - K.square(mean) - K.exp(logvar), axis=-1)
    vae_loss = K.mean(bce_loss + kl_loss)
    autoencoder.add_loss(vae_loss)

    autoencoder.add_metric(tf.reduce_mean(bce_loss), name='bce_sum', aggregation='mean')
    autoencoder.add_metric(tf.reduce_mean(bce_loss) / input_shape[0] / input_shape[1], name='bce', aggregation='mean')
    autoencoder.add_metric(tf.reduce_mean(kl_loss), name='KL', aggregation='mean')
    autoencoder.compile(Adam(1e-3, decay=5e-4))

    return encoder, decoder, autoencoder
Пример #5
0
    def create_model_and_compile(self):
        input_shape = self.in_shape
        self.base_model = self.base_network()
        self.cs_layer = Dense(self.num_classes,
                              use_bias=False,
                              name='CS_layer')
        qpn_in = Input(shape=input_shape, name='in_qpn')
        qpny = Input(shape=(self.num_classes, ), name='in_qpny')
        qpny_label = Input(shape=(1, ), name='in_qpny_label')

        qpn_out = self.base_model(qpn_in)
        qpn_cls_sig = self.cs_layer(qpn_out)

        fe_loss = Lambda(self.triplet_loss_all_combinations,
                         name='triplet_loss_FE')(qpn_out) * self.fe_weight
        fe_accuracy = Lambda(self.FE_accuracy,
                             name='FE_accuracy_metric')(qpn_out)

        cs_loss = Lambda(self.manual_CS_loss, name='CS_loss_calc')(
            [qpn_cls_sig, qpny]) * self.cs_weight
        cs_accuracy = Lambda(self.CS_accuracy,
                             name='CS_Acc')([qpn_cls_sig, qpny])

        total_loss = fe_loss + cs_loss

        model = Model(inputs=[qpn_in, qpny, qpny_label],
                      outputs=[qpn_cls_sig],
                      name='FEModel')

        if 'adm' in self.optim:
            optm = Adam(lr=self.LR)
        elif 'ranger' in self.optim:  # option to use a newer optimizer
            radam = tfa.optimizers.RectifiedAdam(lr=self.LR, min_lr=1e-7)
            optm = tfa.optimizers.Lookahead(radam,
                                            sync_period=6,
                                            slow_step_size=0.5)

        model.add_loss(total_loss)
        model.compile(optimizer=optm)

        # Metrics to track the accuracy and loss progression
        model.add_metric(fe_accuracy, name='fe_a', aggregation='mean')
        model.add_metric(cs_accuracy, name='cs_a', aggregation='mean')
        model.add_metric(fe_loss, name='fe_loss', aggregation='mean')
        model.add_metric(cs_loss, name='cs_loss_out', aggregation='mean')

        return model, optm
Пример #6
0
def build_triplet_distances_model(extractor_model,
                                  dist_type='eucl',
                                  alpha=1.0,
                                  add_loss=False):
    anchor_in = Input(shape=(224, 224, 3), name="anchor_in")
    anchor_out = extractor_model(anchor_in)

    pos_in = Input(shape=(224, 224, 3), name="pos_in")
    pos_out = extractor_model(pos_in)

    neg_in = Input(shape=(224, 224, 3), name="neg_in")
    neg_out = extractor_model(neg_in)

    if dist_type == 'cos':
        pos_dist = CosineDistance(name="pos_dist")([anchor_out, pos_out])
        neg_dist = CosineDistance(name="neg_dist")([anchor_out, neg_out])
    else:
        pos_dist = EuclidianDistanceSquared(name="pos_dist")(
            [anchor_out, pos_out])
        neg_dist = EuclidianDistanceSquared(name="neg_dist")(
            [anchor_out, neg_out])

    triplet = TripletLoss(alpha=alpha)([pos_dist, neg_dist])
    triplet_model = Model([anchor_in, pos_in, neg_in],
                          [triplet, pos_dist, neg_dist])
    triplet_model.add_metric(pos_dist,
                             aggregation='mean',
                             name="pos_dist_mean")
    triplet_model.add_metric(neg_dist,
                             aggregation='mean',
                             name="neg_dist_mean")
    if add_loss:
        triplet_model.add_loss(triplet)
    else:
        triplet_model.add_metric(triplet,
                                 aggregation='mean',
                                 name="triplet_loss_mean")

    triplet_model.compile(optimizer=Adamax(), loss=None)
    return triplet_model
Пример #7
0
class MLMTextClassifyModel(AbstractTextClassifyModel, TFBasedModel):
    def __init__(self, config):
        super().__init__(config=config)
        self.tgt_token_ids = [
            self.tokenizer.token2id(t) for t in self.tgt_tokens
        ]
        self.pred_mask = [
            1 if idx in self.tgt_token_ids else 0
            for idx in range(self.vocab_size)
        ]

    def _load_config(self, config):
        super()._load_config(config)
        self.max_len = self.task_config["max_len"]
        self.word2label = jload(self.task_config['token2label_path'])
        self.label2word = inverse_dict(self.word2label, overwrite=False)
        self.pattern = self.task_config["pattern"]
        # self.keep_tokens = load_lines(self.task_config["keep_token_path"])
        self.tgt_tokens = flat([list(w) for w in self.word2label])
        # self.keep_tokens += self.tgt_tokens
        self.label_num = len(set(self.word2label.values()))

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    pos_weight=1.,
                    bilstm_dim_list=[],
                    transformer_kwargs={},
                    h5_file=None):

        with self.get_scope():
            # transformer_kwargs = {
            #     "keep_tokens": self.keep_token_ids
            # }
            self.nn_model = get_mlm_model(
                pretrained_model_path,
                pretrained_model_tag="bert",
                transformer_kwargs=transformer_kwargs,
                h5_file=h5_file)
            logger.info("nn model's summary:")
            self.nn_model.summary(print_fn=logger.info)
            self._update_model_dict("test", self.nn_model)
            return self.nn_model

    @discard_kwarg
    def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None):
        logger.info("compiling model...")
        with self.get_scope():
            token_output = Input(shape=(None, ),
                                 name='token_output',
                                 dtype=tf.int32)
            self.train_model = Model(self.nn_model.inputs + [token_output],
                                     self.nn_model.output,
                                     name="train_model")
        output = self.train_model.output

        loss_mask = Lambda(
            function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32),
            name="pred_mask")(token_output)
        loss_layer = build_classify_loss_layer(multi_label=False,
                                               with_mask=True)
        loss = loss_layer([token_output, output, loss_mask])
        self.train_model.add_loss(loss)

        accuracy_func = masked_sparse_categorical_accuracy
        metric_layer = MetricLayer(accuracy_func, name="metric_layer")
        accuracy = metric_layer([token_output, output, loss_mask])

        self.train_model.add_metric(accuracy,
                                    aggregation="mean",
                                    name="accuracy")
        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)

        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: UnionTextClassifyExample) -> Dict:
        # if example.extra_text:
        #     text = self.pattern
        #     extra_text = self.tokenizer.end_token.join(example.text, example.extra_text)
        # else:
        #     text = self.pattern
        #     extra_text = example.text
        text = self.pattern + example.text
        feature = self.tokenizer.do_tokenize(text=text)

        mask_spans = find_span(feature["tokens"], "[MASK]")
        assert len(mask_spans) == 1
        feature["mask_span"] = mask_spans[0]
        if isinstance(example, LabeledTextClassifyExample):
            if isinstance(example.label, list):
                labels = [e.name for e in example.label]
            else:
                labels = [example.label.name]
            feature.update(labels=labels)
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]:
        record = dict(idx=idx, **feature)
        truncate_record(record=record,
                        max_len=self.max_len,
                        keys=["token_ids", "segment_ids", "tokens"])
        if mode == "train":
            labels = feature.get("labels")
            if labels is None:
                raise ValueError("no labels given in train mode!")
            label = random.choice(labels)
            tgt_word = random.choice(self.label2word[label])
            tokened = self.tokenizer.do_tokenize(tgt_word)
            tgt_token_span = tokened["tokens"][1:-1]
            tgt_token_span_id = tokened["token_ids"][1:-1]
            mask_start, mask_end = record["mask_span"]
            assert len(tgt_token_span) == mask_end - mask_start

            tgt_tokens = copy.copy(record["tokens"])
            tgt_token_ids = copy.copy(record["token_ids"])
            token_output = [0] * len(tgt_token_ids)
            tgt_tokens[mask_start:mask_end] = tgt_token_span
            tgt_token_ids[mask_start:mask_end] = tgt_token_span_id
            token_output[mask_start:mask_end] = tgt_token_span_id
            record.update(target_tokens=tgt_tokens,
                          tgt_token_ids=tgt_token_ids,
                          token_output=token_output)
        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self,
                      features,
                      pred_tensors,
                      show_detail=False,
                      threshold=.5) -> List[LabelOrLabels]:
        def _tensor2output(feature, pred_tensor) -> LabelOrLabels:
            mask_idx_start, mask_idx_end = feature["mask_span"]
            # logger.info(pred_tensor.shape)
            pred_tensor = pred_tensor[mask_idx_start:mask_idx_end]
            pred_tensor = pred_tensor * self.pred_mask
            # logger.info(pred_tensor)
            # logger.info(pred_tensor.shape)

            probs = np.max(pred_tensor, axis=-1)
            # logger.info(probs)
            prob = np.prod(probs)
            # logger.info(prob)

            pred_tensor = np.argmax(pred_tensor, axis=-1)

            word = "".join(self.tokenizer.id2token(e) for e in pred_tensor)
            # logger.info(word)
            label = self.word2label[word]

            return Label(name=label, prob=prob)

        preds = [_tensor2output(f, t) for f, t in zip(features, pred_tensors)]
        return preds
Пример #8
0
def createModel(max_len,
                features,
                dimsEmbed,
                lr,
                two_layer=False,
                bidir=False,
                cells=32,
                regularization_base=2e-6,
                locality_term=False,
                batch_size=None,
                locality_power=1,
                **kwargs):
    print(tf.executing_eagerly())
    inp = Input(shape=(max_len, ), name="inputs1")
    inp2 = Input(shape=(max_len, ), name="inputs2")

    inp3 = Input(shape=(batch_size, batch_size), name="inputs3")

    if 'regularization_base_latent' in kwargs:
        regularization_base_latent = kwargs['regularization_base_latent']
    else:
        regularization_base_latent = regularization_base

    embedLayer = Embedding(features + 1,
                           dimsEmbed,
                           input_length=max_len,
                           embeddings_initializer=he_uniform(2),
                           mask_zero=True,
                           name='inpEmbed1')
    prev_layer = embedLayer(inp)

    embed2Layer = Embedding(2,
                            cells,
                            input_length=max_len,
                            embeddings_initializer=he_uniform(3),
                            mask_zero=True,
                            trainable=False,
                            name='inpEmbed2')
    embed2 = embed2Layer(inp2)

    embed2Layer.set_weights(
        np.expand_dims(np.stack([
            np.zeros_like(embed2Layer.get_weights()[0][0]),
            np.ones_like(embed2Layer.get_weights()[0][0])
        ],
                                axis=0),
                       axis=0))

    if two_layer:
        lstmEncSecLayer = LSTM(cells,
                               activation='relu',
                               name='encoder2Layer',
                               return_sequences=True,
                               kernel_initializer=he_normal(1),
                               kernel_regularizer=l1_l2(
                                   regularization_base, regularization_base),
                               bias_regularizer=l1_l2(2 * regularization_base,
                                                      2 * regularization_base))

        if bidir:
            lstmEncSecLayer = Bidirectional(lstmEncSecLayer)

        prev_layer = lstmEncSecLayer(prev_layer)

    lstmEncLayer = LSTM(cells,
                        activation='relu',
                        return_sequences=False,
                        kernel_initializer=he_normal(5),
                        name='encoderLayer',
                        return_state=True,
                        recurrent_regularizer=l1_l2(regularization_base / 20,
                                                    regularization_base / 20),
                        kernel_regularizer=l1_l2(regularization_base,
                                                 regularization_base),
                        bias_regularizer=l1_l2(regularization_base * 2,
                                               regularization_base * 2))

    if bidir:
        lstmEncLayer = Bidirectional(lstmEncLayer)
        enc, h1, h2, h3, h4 = lstmEncLayer(prev_layer)
        concat = Concatenate()([h1, h2, h3, h4])

    else:
        enc, h1, c1 = lstmEncLayer(prev_layer)
        concat = Concatenate()([h1, c1])

    bn1 = BatchNormalization(name='bn1')(concat)

    hLayer = Dense(cells,
                   activation='tanh',
                   use_bias=True,
                   kernel_initializer=he_normal(10),
                   name='hDense',
                   activity_regularizer=l1_l2(regularization_base_latent / 2,
                                              regularization_base_latent / 2),
                   bias_regularizer=l1_l2(regularization_base_latent * 2.5,
                                          regularization_base_latent * 2.5))
    h = hLayer(bn1)

    cLayer = Dense(cells,
                   activation='linear',
                   use_bias=True,
                   kernel_initializer=he_normal(11),
                   name='cDense',
                   activity_regularizer=l1_l2(regularization_base_latent / 2,
                                              regularization_base_latent / 2),
                   bias_regularizer=l1_l2(regularization_base_latent * 2.5,
                                          regularization_base_latent * 2.5))
    c = cLayer(bn1)

    if locality_term:
        locality1 = Lambda(locality1_op)([h, c])
        locality2 = Lambda(locality2_op)(inp3)
        locality_layer = Lambda(locality_term_op)([locality1, locality2])

    decoderInpH = Input((cells, ))
    decoderInpC = Input((cells, ))
    decoderPrevInput = Input(((max_len, cells)))
    print("model kwargs: ", kwargs)
    timeWindowsConstant = False if 'timeWindowsConstant' not in kwargs else kwargs[
        'timeWindowsConstant']
    decoderInpDenses = False if 'decoderInpDenses' not in kwargs else kwargs[
        'decoderInpDenses']
    inpHZeros = False if 'inpHZeros' not in kwargs else kwargs['inpHZeros']
    inpCZeros = False if 'inpCZeros' not in kwargs else kwargs['inpCZeros']
    outAdditionalDense = False if 'outAdditionalDense' not in kwargs else kwargs[
        'outAdditionalDense']

    decoderInpH_topass = decoderInpH
    decoderInpC_topass = decoderInpC

    if timeWindowsConstant:
        rep = decoderPrevInput
    else:
        rep = RepeatVector(max_len)(decoderInpH)
    # mult = Multiply()([decoderPrevInput, decoderPrevInput])

    if decoderInpDenses:

        decoderInpHDense1 = Dense(
            cells,
            activation='relu',
            kernel_initializer=he_normal(32678),
            name='decInpHDense1',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base * 1.0))(decoderInpH)

        decoderInpHDense2 = Dense(
            cells,
            activation='relu',
            kernel_initializer=he_normal(32679),
            name='decInpHDense2',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base *
                                   1.0))(decoderInpHDense1)

        decoderInpH_topass = Dense(
            cells,
            activation='tanh',
            kernel_initializer=he_normal(72679),
            name='decInpHDense3',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base *
                                   1.0))(decoderInpHDense2)

        decoderInpCDense1 = Dense(
            cells,
            activation='relu',
            kernel_initializer=he_normal(32618),
            name='decInpCDense1',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base * 1.0))(decoderInpC)

        decoderInpCDense2 = Dense(
            cells,
            activation='relu',
            kernel_initializer=he_normal(32628),
            name='decInpCDense2',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base *
                                   1.0))(decoderInpCDense1)

        decoderInpC_topass = Dense(
            cells,
            activation='linear',
            kernel_initializer=he_normal(32619),
            name='decInpCDense3',
            kernel_regularizer=l1_l2(regularization_base * 1.0,
                                     regularization_base * 1.0),
            bias_regularizer=l1_l2(regularization_base * 1.0,
                                   regularization_base *
                                   1.0))(decoderInpCDense2)

    if inpHZeros:
        decoderInpH_topass = Lambda(lambda x: x * 0.0)(decoderInpH_topass)
    if inpCZeros:
        decoderInpC_topass = Lambda(lambda x: x * 0.0)(decoderInpC_topass)
    # if not timeWindowsConstant:

    mult = Multiply()([rep, decoderPrevInput])

    mask = Masking(0.0)(mult)

    prev_layer = LSTM(cells,
                      activation='relu',
                      return_sequences=True,
                      kernel_initializer=he_normal(21),
                      name='decoder1',
                      kernel_regularizer=l1_l2(regularization_base * 1.5,
                                               regularization_base * 1.5),
                      bias_regularizer=l1_l2(regularization_base * 2.5,
                                             regularization_base * 2.5))(
                                                 mask,
                                                 initial_state=[
                                                     decoderInpH_topass,
                                                     decoderInpC_topass
                                                 ])
    if two_layer:
        prev_layer = LSTM(
            cells,
            activation='relu',
            return_sequences=True,
            kernel_initializer=he_normal(35),
            name='decoder2',
            kernel_regularizer=l1_l2(regularization_base * 1.5,
                                     regularization_base * 1.5),
            bias_regularizer=l1_l2(regularization_base * 2.5,
                                   regularization_base * 2.5))(prev_layer)
    # to comment

    if outAdditionalDense:

        outPrev = TimeDistributed(Dense(
            32,
            activation='relu',
            kernel_initializer=he_normal(836),
            name='densePrevOut',
            kernel_regularizer=l1_l2(regularization_base * 2.0,
                                     regularization_base * 2.0),
            bias_regularizer=l1_l2(regularization_base * 2.5,
                                   regularization_base * 2.5)),
                                  name='densePrevOut')(prev_layer)
        out = TimeDistributed(Dense(features,
                                    activation='softmax',
                                    kernel_initializer=he_normal(100),
                                    name='denseOut'),
                              name='denseOut')(outPrev)
    else:
        out = TimeDistributed(Dense(features,
                                    activation='softmax',
                                    kernel_initializer=he_normal(100),
                                    name='denseOut'),
                              name='denseOut')(prev_layer)

    decoder = Model(inputs=[decoderInpH, decoderInpC, decoderPrevInput],
                    outputs=out)
    if locality_term:
        model = Model(inputs=[inp, inp2, inp3],
                      outputs=decoder([h, c, embed2]))
    else:
        model = Model(inputs=[inp, inp2], outputs=decoder([h, c, embed2]))

    model.summary()
    decoder.summary()

    if locality_term:
        print("Using locality term! Locality power: ", locality_power)
        locality_loss = (1 - locality_layer) * tf.constant(locality_power)
        model.add_loss(locality_loss)

        model.add_metric(locality_loss, name='locality', aggregation='mean')
    # model.add_metric(get_gradient_norm(model), name='locality', aggregation='mean')
    # model.add_metric(locality_loss, name='localityS', aggregation='sum')
    model.compile(optimizer=Adam(lr, clipnorm=1.0, clipvalue=0.5),
                  loss='categorical_crossentropy')
    decoder.compile(optimizer=Adam(lr, clipnorm=1.0, clipvalue=0.5),
                    loss='categorical_crossentropy')
    # model.metrics_tensors = []
    if locality_term:
        encoder = Model(inputs=[inp, inp2, inp3], outputs=[h, c, embed2])
    else:
        encoder = Model(inputs=[inp, inp2], outputs=[h, c, embed2])

    return model, encoder, decoder
Пример #9
0
def init_model(backbone_model_name, freeze_backbone_for_N_epochs, input_shape,
               region_num, attribute_name_to_label_encoder_dict,
               kernel_regularization_factor, bias_regularization_factor,
               gamma_regularization_factor, beta_regularization_factor,
               pooling_mode, min_value, max_value, use_horizontal_flipping):
    def _add_pooling_module(input_tensor):
        # Add a global pooling layer
        output_tensor = input_tensor
        if len(K.int_shape(output_tensor)) == 4:
            if pooling_mode == "Average":
                output_tensor = GlobalAveragePooling2D()(output_tensor)
            elif pooling_mode == "Max":
                output_tensor = GlobalMaxPooling2D()(output_tensor)
            elif pooling_mode == "GeM":
                output_tensor = GlobalGeMPooling2D()(output_tensor)
            else:
                assert False, "{} is an invalid argument!".format(pooling_mode)

        # Add the clipping operation
        if min_value is not None and max_value is not None:
            output_tensor = Lambda(lambda x: K.clip(
                x, min_value=min_value, max_value=max_value))(output_tensor)

        return output_tensor

    def _add_classification_module(input_tensor):
        # Add a batch normalization layer
        output_tensor = input_tensor
        output_tensor = BatchNormalization(epsilon=2e-5)(output_tensor)

        # Add a dense layer with softmax activation
        label_encoder = attribute_name_to_label_encoder_dict["identity_ID"]
        class_num = len(label_encoder.classes_)
        output_tensor = Dense(units=class_num,
                              use_bias=False,
                              kernel_initializer=RandomNormal(
                                  mean=0.0, stddev=0.001))(output_tensor)
        output_tensor = Activation("softmax")(output_tensor)

        return output_tensor

    def _triplet_hermans_loss(y_true,
                              y_pred,
                              metric="euclidean",
                              margin="soft"):
        # Create the loss in two steps:
        # 1. Compute all pairwise distances according to the specified metric.
        # 2. For each anchor along the first dimension, compute its loss.
        dists = cdist(y_pred, y_pred, metric=metric)
        loss = batch_hard(dists=dists,
                          pids=tf.argmax(y_true, axis=-1),
                          margin=margin)
        return loss

    # Initiation
    miscellaneous_output_tensor_list = []

    # Initiate the early blocks
    applications_instance = Applications()
    model_name_to_model_function = applications_instance.get_model_name_to_model_function(
    )
    assert backbone_model_name in model_name_to_model_function.keys(
    ), "Backbone {} is not supported.".format(backbone_model_name)
    model_function = model_name_to_model_function[backbone_model_name]
    blocks = applications_instance.get_model_in_blocks(
        model_function=model_function, include_top=False)
    vanilla_input_tensor = Input(shape=input_shape)
    intermediate_output_tensor = vanilla_input_tensor
    for block in blocks[:-1]:
        block = Applications.wrap_block(block, intermediate_output_tensor)
        intermediate_output_tensor = block(intermediate_output_tensor)

    # Initiate the last blocks
    last_block = Applications.wrap_block(blocks[-1],
                                         intermediate_output_tensor)
    last_block_for_global_branch_model = replicate_model(
        model=last_block, suffix="global_branch")
    last_block_for_regional_branch_model = replicate_model(
        model=last_block, suffix="regional_branch")

    # Add the global branch
    miscellaneous_output_tensor = _add_pooling_module(
        input_tensor=last_block_for_global_branch_model(
            intermediate_output_tensor))
    miscellaneous_output_tensor_list.append(miscellaneous_output_tensor)

    # Add the regional branch
    if region_num > 0:
        # Process each region
        regional_branch_output_tensor = last_block_for_regional_branch_model(
            intermediate_output_tensor)
        total_height = K.int_shape(regional_branch_output_tensor)[1]
        region_size = total_height // region_num
        for region_index in np.arange(region_num):
            # Get a slice of feature maps
            start_index = region_index * region_size
            end_index = (region_index + 1) * region_size
            if region_index == region_num - 1:
                end_index = total_height
            sliced_regional_branch_output_tensor = Lambda(
                lambda x, start_index=start_index, end_index=end_index:
                x[:, start_index:end_index])(regional_branch_output_tensor)

            # Downsampling
            sliced_regional_branch_output_tensor = Conv2D(
                filters=K.int_shape(sliced_regional_branch_output_tensor)[-1]
                // region_num,
                kernel_size=3,
                padding="same")(sliced_regional_branch_output_tensor)
            sliced_regional_branch_output_tensor = Activation("relu")(
                sliced_regional_branch_output_tensor)

            # Add the regional branch
            miscellaneous_output_tensor = _add_pooling_module(
                input_tensor=sliced_regional_branch_output_tensor)
            miscellaneous_output_tensor_list.append(
                miscellaneous_output_tensor)

    # Define the model used in inference
    inference_model = Model(inputs=[vanilla_input_tensor],
                            outputs=miscellaneous_output_tensor_list,
                            name="inference_model")
    specify_regularizers(inference_model, kernel_regularization_factor,
                         bias_regularization_factor,
                         gamma_regularization_factor,
                         beta_regularization_factor)

    # Define the model used in classification
    classification_input_tensor_list = [
        Input(shape=K.int_shape(item)[1:])
        for item in miscellaneous_output_tensor_list
    ]
    classification_output_tensor_list = []
    for classification_input_tensor in classification_input_tensor_list:
        classification_output_tensor = _add_classification_module(
            input_tensor=classification_input_tensor)
        classification_output_tensor_list.append(classification_output_tensor)
    classification_model = Model(inputs=classification_input_tensor_list,
                                 outputs=classification_output_tensor_list,
                                 name="classification_model")
    specify_regularizers(classification_model, kernel_regularization_factor,
                         bias_regularization_factor,
                         gamma_regularization_factor,
                         beta_regularization_factor)

    # Define the model used in training
    expand = lambda x: x if isinstance(x, list) else [x]
    vanilla_input_tensor = Input(shape=K.int_shape(inference_model.input)[1:])
    vanilla_feature_tensor_list = expand(inference_model(vanilla_input_tensor))
    if use_horizontal_flipping:
        flipped_input_tensor = tf.image.flip_left_right(vanilla_input_tensor)
        flipped_feature_tensor_list = expand(
            inference_model(flipped_input_tensor))
        merged_feature_tensor_list = [
            sum(item_tuple) / 2 for item_tuple in zip(
                vanilla_feature_tensor_list, flipped_feature_tensor_list)
        ]
    else:
        merged_feature_tensor_list = vanilla_feature_tensor_list
    miscellaneous_output_tensor_list = merged_feature_tensor_list
    classification_output_tensor_list = expand(
        classification_model(merged_feature_tensor_list))
    training_model = Model(inputs=[vanilla_input_tensor],
                           outputs=miscellaneous_output_tensor_list +
                           classification_output_tensor_list,
                           name="training_model")

    # Add the flipping loss
    if use_horizontal_flipping:
        flipping_loss_list = [
            K.mean(mean_squared_error(*item_tuple)) for item_tuple in zip(
                vanilla_feature_tensor_list, flipped_feature_tensor_list)
        ]
        flipping_loss = sum(flipping_loss_list)
        training_model.add_metric(flipping_loss,
                                  name="flipping",
                                  aggregation="mean")
        training_model.add_loss(1.0 * flipping_loss)

    # Compile the model
    triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss(
        y_true, y_pred)
    miscellaneous_loss_function_list = [
        triplet_hermans_loss_function
    ] * len(miscellaneous_output_tensor_list)
    categorical_crossentropy_loss_function = lambda y_true, y_pred: 1.0 * categorical_crossentropy(
        y_true, y_pred, from_logits=False, label_smoothing=0.0)
    classification_loss_function_list = [
        categorical_crossentropy_loss_function
    ] * len(classification_output_tensor_list)
    training_model.compile_kwargs = {
        "optimizer":
        Adam(),
        "loss":
        miscellaneous_loss_function_list + classification_loss_function_list
    }
    if freeze_backbone_for_N_epochs > 0:
        specify_trainable(model=training_model,
                          trainable=False,
                          keywords=[block.name for block in blocks])
    training_model.compile(**training_model.compile_kwargs)

    # Print the summary of the training model
    summarize_model(training_model)

    return training_model, inference_model
Пример #10
0
class GlobalPointerModel(AbstractTextSpanClassifyModelAIConfig, TFBasedModel):
    def _load_config(self, config):
        super()._load_config(config)
        self.max_len = self.task_config['max_len']
        self.labels = load_lines(self.task_config['label_file_path'])
        self.label2id, self.id2label = seq2dict(self.labels)
        self.label_num = len(self.label2id)

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    bilstm_dim_list=[],
                    head_size=64,
                    pos_weight=1,
                    **kwargs):
        """
        构建模型
        Args:
            head_size: GlobalPointer层的embedding size
            pretrained_model_path: 预训练模型地址
            pretrained_model_tag: 预训练模型类型bert/...
            bilstm_dim_list: 序列encode过程中如果要接bilstm。输入每个bilstm层的dimension
            pos_weight: 正例的权重
            **kwargs:
        Returns:
            nn模型
        """
        with self.get_scope():
            encoder_model = get_sequence_encoder_model(
                vocab_size=self.vocab_size,
                pretrained_model_path=pretrained_model_path,
                pretrained_model_tag=pretrained_model_tag,
                bilstm_dim_list=bilstm_dim_list,
                **kwargs)
            sequence_embedding = encoder_model.output
            output = GlobalPointer(self.label_num,
                                   head_size)(sequence_embedding)
            output = Lambda(lambda x: x**pos_weight,
                            name="pos_weight_layer")(output)
            self.nn_model = Model(inputs=encoder_model.inputs,
                                  outputs=[output],
                                  name="token_classify_model")
        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    @discard_kwarg
    def compile_model(self, optimizer_name: str, optimizer_args: dict):
        logger.info(
            f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}"
        )
        with self.get_scope():
            classify_output = Input(shape=(self.label_num, None, None),
                                    dtype=tf.float32,
                                    name='classify_output')
            token_ids, segment_ids = self.nn_model.inputs
            output = self.nn_model([token_ids, segment_ids])
            self.train_model = Model(
                inputs=[token_ids, segment_ids, classify_output],
                outputs=[output])
        loss_layer = LossLayer(loss_func=global_pointer_crossentropy,
                               name="loss_layer")
        loss = loss_layer([classify_output, output])
        self.train_model.add_loss(loss)

        accuracy_func = global_pointer_f1_score
        metric_layer = MetricLayer(accuracy_func, name="metric_layer")
        metric = metric_layer([classify_output, output])
        self.train_model.add_metric(metric,
                                    aggregation="mean",
                                    name="global_pointer_f1_score")
        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)

        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: UnionTextSpanClassifyExample) -> Dict:
        feature = self.tokenizer.do_tokenize(text=example.text, store_map=True)
        if isinstance(example, LabeledTextSpanClassifyExample):
            feature.update(text_spans=[
                e.dict(exclude_none=True) for e in example.text_spans
            ])
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[dict]:
        record = dict(idx=idx, **feature)
        if mode == "train":
            text_spans = feature.get("text_spans")
            if text_spans is None:
                raise ValueError(f"not text_spans key found in train mode!")
            text_spans: TextSpans = [TextSpan(**e) for e in text_spans]
            char2token = record["char2token"]
            token_len = len(record["tokens"])
            classify_output = np.zeros(shape=(self.label_num, token_len,
                                              token_len))
            for text_span in text_spans:
                label_id = self.label2id[text_span.label]
                token_start = char2token[text_span.span[0]]
                token_end = char2token[text_span.span[1] - 1]
                classify_output[label_id][token_start][token_end] = 1

            record.update(classify_output=classify_output)
        truncate_record(record=record,
                        max_len=self.max_len,
                        keys=["token_ids", "segment_ids", "tokens"])

        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self, features, pred_tensors,
                      show_detail) -> List[TextSpans]:
        def _tensor2output(feature, pred_tensor) -> TextSpans:
            text_spans = []
            prob_tensor = tf.math.sigmoid(pred_tensor)
            for l, s, e in zip(*np.where(pred_tensor > 0)):
                if e < s:
                    break
                label = self.id2label[l]
                char_start = feature["token2char"][s][0]
                char_end = feature["token2char"][e][1]
                text = feature["text"][char_start:char_end]
                prob = prob_tensor[l][s][e]
                text_span = TextSpan(text=text,
                                     label=label,
                                     span=(char_start, char_end),
                                     prob=prob)
                text_spans.append(text_span)
            return text_spans

        preds = [_tensor2output(f, p) for f, p in zip(features, pred_tensors)]
        return preds
Пример #11
0
class SeqLabelingModel(AbstractTextSpanClassifyModelAIConfig, TFBasedModel):
    def _load_config(self, config):
        super()._load_config(config)
        self.seq_label_strategy: SeqLabelStrategy = SeqLabelStrategy[
            self.task_config['seq_label_strategy']]

        self.max_len = self.task_config['max_len']
        self.multi_label = self.task_config.get("multi_label", False)
        self.label_list = read_seq_label_file(
            self.task_config['label_file_path'], self.seq_label_strategy)
        self.label2id, self.id2label = seq2dict(self.label_list)
        self.label_num = len(self.label2id)

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    bilstm_dim_list=[],
                    use_crf=False,
                    crf_lr_multiplier=100,
                    pos_weight=1,
                    **kwargs):
        """
        构建模型
        Args:
            pretrained_model_path: 预训练模型地址
            pretrained_model_tag: 预训练模型类型bert/...
            dense_dim_list: 序列encode之后过的每个全连接层的维度(默认用relu做激活函数)。如果为空列表,表示不添加全连接层
            hidden_dropout_prob: 序列encode之后过得dropout层drop概率。避免过拟合
            bilstm_dim_list: 序列encode过程中如果要接bilstm。输入每个bilstm层的dimension
            use_crf: 是否使用crf层
            crf_lr_multiplier: crf层的学习率倍数,参考https://kexue.fm/archives/7196
            pos_weight: 正例的权重
            **kwargs:
        Returns:
            nn模型
        """
        with self.get_scope():
            encoder_model = get_sequence_encoder_model(
                vocab_size=self.vocab_size,
                pretrained_model_path=pretrained_model_path,
                pretrained_model_tag=pretrained_model_tag,
                bilstm_dim_list=bilstm_dim_list,
                **kwargs)
            sequence_embedding = encoder_model.output
            classify_activation = sigmoid if self.multi_label else softmax
            classifier_layer = Dense(
                self.label_num,
                name="token_classifier",
                activation=classify_activation,
                kernel_initializer=TruncatedNormal(stddev=0.02))
            prob_vec_output = classifier_layer(sequence_embedding)
            if use_crf:
                classifier_layer = CRF(lr_multiplier=crf_lr_multiplier,
                                       name="crf_layer")
                prob_vec_output = classifier_layer(prob_vec_output)
            if self.multi_label:
                prob_vec_output = Lambda(
                    lambda x: x**pos_weight,
                    name="pos_weight_layer")(prob_vec_output)

            self.nn_model = Model(inputs=encoder_model.inputs,
                                  outputs=[prob_vec_output],
                                  name="token_classify_model")
        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    def compile_model(self, optimizer_name: str, optimizer_args: dict,
                      **kwargs):
        logger.info(
            f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}"
        )
        with self.get_scope():
            classify_labels = Input(
                shape=(None, self.label_num) if self.multi_label else (None, ),
                name='classify_labels',
                dtype=tf.int32)
            token_ids, segment_ids = self.nn_model.inputs
            output = self.nn_model([token_ids, segment_ids])
            self.train_model = Model(
                inputs=[token_ids, segment_ids, classify_labels],
                outputs=[output])

        loss_mask = Lambda(
            function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32),
            name="pred_mask")(token_ids)

        # 计算loss的时候,过滤掉pad token的loss
        loss_layer = build_classify_loss_layer(multi_label=self.multi_label,
                                               with_mask=True)
        loss = loss_layer([classify_labels, output, loss_mask])
        self.train_model.add_loss(loss)

        # 计算accuracy的时候,过滤掉pad token 的accuracy
        masked_accuracy_func = masked_binary_accuracy if self.multi_label else masked_sparse_categorical_accuracy
        metric_layer = MetricLayer(masked_accuracy_func)
        masked_accuracy = metric_layer([classify_labels, output, loss_mask])
        self.train_model.add_metric(masked_accuracy,
                                    aggregation="mean",
                                    name="accuracy")

        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)
        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: UnionTextSpanClassifyExample) -> Dict:
        feature = self.tokenizer.do_tokenize(text=example.text, store_map=True)
        if isinstance(example, LabeledTextSpanClassifyExample):
            feature.update(text_spans=[
                e.dict(exclude_none=True) for e in example.text_spans
            ])
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]:
        record = dict(idx=idx, **feature)
        if mode == "train":
            text_spans = feature.get("text_spans")
            if text_spans is None:
                raise ValueError(f"not text_spans key found in train mode!")
            text_spans = [TextSpan(**e) for e in text_spans]
            token_label_func = get_overlap_token_label_sequence if self.multi_label else get_token_label_sequence
            target_token_label_sequence = token_label_func(
                feature["tokens"], text_spans, feature["char2token"],
                self.seq_label_strategy)
            classify_labels = token_label2classify_label_input(
                target_token_label_sequence, self.multi_label, self.label2id)
            record.update(
                target_token_label_sequence=target_token_label_sequence,
                classify_labels=classify_labels)

        truncate_record(
            record=record,
            max_len=self.max_len,
            keys=["token_ids", "segment_ids", "tokens", "classify_labels"])

        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self,
                      features,
                      pred_tensors,
                      show_detail,
                      threshold=0.5) -> List[TextSpans]:
        def _tensor2output(feature, pred_tensor) -> TextSpans:
            pred_labels = tensor2labels(pred_tensor,
                                        self.multi_label,
                                        self.id2label,
                                        threshold=threshold)
            tokens = feature["tokens"]
            pred_labels = pred_labels[:len(tokens)]
            if show_detail:
                logger.info(f"tokens:{tokens}")
                for idx, (token,
                          pred_label) in enumerate(zip(tokens, pred_labels)):
                    if pred_label and pred_label != self.seq_label_strategy.empty:
                        logger.info(
                            f"idx:{idx}, token:{token}, pred:{pred_label}")
            decode_func = decode_overlap_label_sequence if self.multi_label else decode_label_sequence
            text_spans = decode_func(feature, pred_labels,
                                     self.seq_label_strategy)
            return text_spans

        preds = [_tensor2output(f, p) for f, p in zip(features, pred_tensors)]
        return preds
Пример #12
0
class TransformerMLMModel(AbstractMLMClassifyModel, TFBasedModel):
    def _load_config(self, config):
        super()._load_config(config)
        self.max_len = self.task_config["max_len"]
        self.mask_percent = self.task_config.get("mask_percent", 0.15)

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    pos_weight=1.,
                    bilstm_dim_list=[],
                    transformer_kwargs={},
                    h5_file=None):
        with self.get_scope():
            if hasattr(self, 'keep_token_ids'):
                transformer_kwargs.update(keep_tokens=self.keep_token_ids)

            self.nn_model = get_mlm_model(
                pretrained_model_path,
                pretrained_model_tag="bert",
                transformer_kwargs=transformer_kwargs,
                h5_file=h5_file)
        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    @discard_kwarg
    def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None):
        logger.info("compiling model...")
        with self.get_scope():
            token_output = Input(shape=(None, ),
                                 name='token_output',
                                 dtype=tf.int32)
            self.train_model = Model(self.nn_model.inputs + [token_output],
                                     self.nn_model.output,
                                     name="train_model")
        output = self.train_model.output

        loss_mask = Lambda(
            function=lambda x: tf.cast(tf.not_equal(x, 0), tf.float32),
            name="pred_mask")(token_output)
        loss_layer = build_classify_loss_layer(multi_label=False,
                                               with_mask=True)
        loss = loss_layer([token_output, output, loss_mask])
        self.train_model.add_loss(loss)

        accuracy_func = masked_sparse_categorical_accuracy
        metric_layer = MetricLayer(accuracy_func, name="metric_layer")
        accuracy = metric_layer([token_output, output, loss_mask])

        self.train_model.add_metric(accuracy,
                                    aggregation="mean",
                                    name="accuracy")
        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)

        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: MLMExample) -> Dict:
        feature = self.tokenizer.do_tokenize(text=example.text)
        tokens = feature["tokens"]
        masks = [e for e in enumerate(tokens) if e[1] == MASK]
        feature["masks"] = masks
        if example.masked_tokens:
            assert len(masks) == len(example.masked_tokens)
            feature["masked_tokens"] = [
                (m[0], t) for m, t in zip(masks, example.masked_tokens)
            ]
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]:
        record = dict(idx=idx, **feature)
        if mode == "train":
            masked_tokens = feature.get("masked_tokens")
            if not masked_tokens:
                token_infos = [
                    e for e in enumerate(feature["tokens"])
                    if e[1] not in self.tokenizer.special_tokens
                ]
                masked_tokens = random.sample(
                    token_infos, int(len(token_infos) * self.mask_percent))
            token_output = [0] * len(feature["tokens"])
            tokens = copy.copy(feature["tokens"])
            token_ids = copy.copy(feature["token_ids"])

            for idx, token in masked_tokens:
                token_id = self.tokenizer.token2id(token)
                token_output[idx] = token_id
                if tokens[idx] != MASK:
                    r = random.random()
                    if r <= 0.8:
                        t = MASK
                    elif r <= 0.9:
                        t = random.choice(self.tokenizer.vocabs)
                    else:
                        t = token
                    tokens[idx] = t
                    token_ids[idx] = self.tokenizer.token2id(t)

            record.update(token_output=token_output,
                          masked_tokens=masked_tokens,
                          tokens=tokens,
                          token_ids=token_ids)
        truncate_record(
            record=record,
            max_len=self.max_len,
            keys=["token_ids", "segment_ids", "tokens", "token_output"])
        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self,
                      features,
                      pred_tensors,
                      show_detail=False,
                      threshold=.5) -> List[List[str]]:
        def _tensor2output(feature, pred_tensor):
            # masked_tokens = feature["masked_tokens"]
            # token2char = feature["token2char"]
            # masked_chars = []
            masks = feature["masks"]
            # logger.info(masks)
            # logger.info(pred_tensor.shape)
            pred_tensor = np.argmax(pred_tensor, axis=-1)
            # logger.info(pred_tensor)
            pred_tokens = [self.tokenizer.id2token(e) for e in pred_tensor]
            # logger.info(pred_tokens)

            # logger.info(pred_tensor.shape)
            # logger.info(pred_tensor)
            masked_token_ids = [
                pred_tensor[e[0]] for e in masks if e[0] < len(pred_tensor)
            ]
            masked_tokens = [
                self.tokenizer.id2token(i) for i in masked_token_ids
            ]
            return masked_tokens

        preds = [_tensor2output(f, t) for f, t in zip(features, pred_tensors)]
        return preds
Пример #13
0
class VAE():
    '''
    VAE Model
    Variational autoencoder modeling of the Ising spin configurations
    '''
    def __init__(self,
                 input_shape=(81, 81, 1),
                 scaled=False,
                 padded=False,
                 conv_number=4,
                 filter_base_length=3,
                 filter_base_stride=3,
                 filter_base=32,
                 filter_length=3,
                 filter_stride=3,
                 filter_factor=1,
                 dropout=False,
                 z_dim=2,
                 kl_anneal=False,
                 alpha=1.0,
                 beta=8.0,
                 lamb=1.0,
                 krnl_init='lecun_normal',
                 act='selu',
                 opt='nadam',
                 lr=1e-5,
                 batch_size=128,
                 dataset_size=1115136):
        self.eps = 1e-8
        ''' initialize model parameters '''
        self.scaled = scaled
        self.padded = padded
        if self.padded:
            self.padding = 'same'
        else:
            self.padding = 'valid'
        # convolutional parameters
        # number of convolutions
        self.conv_number = conv_number
        # number of filters for first convolution
        self.filter_base = filter_base
        # multiplicative factor for filters in subsequent convolutions
        self.filter_factor = filter_factor
        # filter side length
        self.filter_base_length = filter_base_length
        self.filter_length = filter_length
        # filter stride
        self.filter_base_stride = filter_base_stride
        self.filter_stride = filter_stride
        # convolutional input and output shapes
        self.input_shape = input_shape
        self.n_feat = np.prod(self.input_shape)
        self.final_conv_shape = get_final_conv_shape(
            self.input_shape, self.conv_number, self.filter_base_length,
            self.filter_length, self.filter_base_stride, self.filter_stride,
            self.filter_base, self.filter_factor, self.padded)
        self.dropout = dropout
        # latent and classification dimensions
        # latent dimension
        self.z_dim = z_dim
        # total correlation weights
        self.kl_anneal_b = kl_anneal
        self.alpha, self.beta, self.lamb = alpha, beta, lamb
        # kernel initializer and activation
        self.krnl_init = krnl_init
        self.act = act
        if self.scaled:
            self.dec_out_act = 'sigmoid'
        else:
            self.dec_out_act = 'tanh'
        self.out_init = 'glorot_uniform'
        # optimizer
        self.vae_opt_n = opt
        # learning rate
        self.lr = lr
        # batch size, dataset size, and log importance weight
        self.batch_size = batch_size
        self.dataset_size = dataset_size
        self._set_log_importance_weight()
        self._set_prior_params()
        # loss history
        self.vae_loss_history = []
        self.tc_loss_history = []
        self.rc_loss_history = []
        # past epochs (changes if loading past trained model)
        self.past_epochs = 0
        # checkpoint managers
        self.vae_mngr = None
        # build full model
        self._build_model()

    def get_file_prefix(self):
        ''' gets parameter tuple and filename string prefix '''
        params = (self.conv_number, self.filter_base_length,
                  self.filter_base_stride, self.filter_base,
                  self.filter_length, self.filter_stride, self.filter_factor,
                  self.dropout, self.z_dim, self.kl_anneal_b, self.alpha,
                  self.beta, self.lamb, self.krnl_init, self.act,
                  self.vae_opt_n, self.lr, self.batch_size)
        file_name = 'btcvae.{}.{}.{}.{}.{}.{}.{}.{:d}.{}.{:d}.{:.0e}.{:.0e}.{:.0e}.{}.{}.{}.{:.0e}.{}'.format(
            *params)
        return file_name

    def scale_configurations(self, x):
        return (x + 1) / 2

    def _set_log_importance_weight(self):
        ''' logarithmic importance weights for minibatch stratified sampling '''
        n, m = self.dataset_size, self.batch_size - 1
        strw = np.float32(n - m) / np.float32(n * m)
        w = np.ones((self.batch_size, self.batch_size), dtype=np.float32) / m
        w.reshape(-1)[::m + 1] = 1. / n
        w.reshape(-1)[1::m + 1] = strw
        w[m - 1, 0] = strw
        self.log_importance_weight = K.log(K.constant(w, dtype=tf.float32))
        return

    def _set_prior_params(self):
        # mu = 0, stdv = 1 => log(var) = 0
        self.mu_prior = K.constant(np.zeros(shape=(self.batch_size,
                                                   self.z_dim)),
                                   dtype=tf.float32)
        self.logvar_prior = K.constant(np.zeros(shape=(self.batch_size,
                                                       self.z_dim)),
                                       dtype=tf.float32)
        return

    def sample_gaussian(self, beta):
        ''' samples a point in a multivariate gaussian distribution '''
        mu, logvar = beta
        return mu + K.exp(0.5 * logvar) * K.random_normal(
            shape=(self.batch_size, self.z_dim))

    def sample_logistic(self, shape):
        u = K.random_uniform(shape, 0.0, 1.0)
        l = K.log(u + self.eps) - K.log(1 - u + self.eps)
        return l

    def sample_bernoulli(self, p):
        logp = tf.math.log_sigmoid(p)
        logq = tf.math.log_sigmoid(-p)
        l = self.sample_logistic(K.int_shape(p))
        z = logp - logq + l
        return 1. / (1. + K.exp(-100 * z))

    def gauss_log_density(self, z, beta=None):
        ''' logarithmic probability density for multivariate gaussian distribution given samples z and parameters beta = (mu, log(var)) '''
        if beta is None:
            mu, logvar = self.mu_prior, self.logvar_prior
        else:
            mu, logvar = beta
        norm = K.log(2 * np.pi)
        zsc = (z - mu) * K.exp(-0.5 * logvar)
        return -0.5 * (zsc**2 + logvar + norm)

    def log_sum_exp(self, z):
        ''' numerically stable logarithmic sum of exponentials '''
        m = K.max(z, axis=1, keepdims=True)
        u = z - m
        m = K.squeeze(m, 1)
        return m + K.log(K.sum(K.exp(u), axis=1, keepdims=False))

    def total_correlation_loss(self):
        # log p(z)
        logpz = K.sum(K.reshape(self.gauss_log_density(self.z),
                                shape=(self.batch_size, -1)),
                      axis=1)
        # log q(z|x)
        logqz_x = K.sum(K.reshape(self.gauss_log_density(
            self.z, (self.mu, self.logvar)),
                                  shape=(self.batch_size, -1)),
                        axis=1)
        # log q(z) ~ log (1/MN) sum_m q(z|x_m) = -log(MN)+log(sum_m(exp(q(z|x_m))))
        _logqz = self.gauss_log_density(
            K.reshape(self.z, shape=(self.batch_size, 1, self.z_dim)),
            (K.reshape(self.mu, shape=(1, self.batch_size, self.z_dim)),
             K.reshape(self.logvar, shape=(1, self.batch_size, self.z_dim))))
        logqz_prodmarginals = K.sum(self.log_sum_exp(
            K.reshape(self.log_importance_weight,
                      shape=(self.batch_size, self.batch_size, 1)) + _logqz),
                                    axis=1)
        logqz = self.log_sum_exp(self.log_importance_weight +
                                 K.sum(_logqz, axis=2))
        # alpha controls index-code mutual information
        # beta controls total correlation
        # gamma controls dimension-wise kld
        melbo = -self.alpha * (logqz_x - logqz) - self.beta * (
            logqz - logqz_prodmarginals) - self.lamb * (logqz_prodmarginals -
                                                        logpz)
        return -self.kl_anneal * melbo / self.z_dim

    def kullback_leibler_divergence_loss(self):
        return -0.5 * self.kl_anneal * self.beta * K.sum(
            1. + self.logvar - K.square(self.mu) - K.exp(self.logvar), axis=-1)

    def reconstruction_loss(self):
        if not self.scaled:
            x = self.scale_configurations(self.enc_x_input)
            x_hat = self.scale_configurations(self.x_output)
        else:
            x = self.enc_x_input
            x_hat = self.x_output
        return -K.sum(K.reshape(x * K.log(x_hat + self.eps) +
                                (1. - x) * K.log(1. - x_hat + self.eps),
                                shape=(self.batch_size, -1)),
                      axis=-1) / self.n_feat

    def _build_model(self):
        ''' builds each component of the VAE model '''
        self._build_encoder()
        self._build_decoder()
        self._build_vae()

    def _build_encoder(self):
        ''' builds encoder model '''
        # takes sample (real or fake) as input
        self.enc_x_input = Input(batch_shape=(self.batch_size, ) +
                                 self.input_shape,
                                 name='enc_x_input')
        conv = self.enc_x_input
        # iterative convolutions over input
        for i in range(self.conv_number):
            filter_number = get_filter_number(i, self.filter_base,
                                              self.filter_factor)
            filter_length, filter_stride = get_filter_length_stride(
                i, self.filter_base_length, self.filter_base_stride,
                self.filter_length, self.filter_stride)
            conv = Conv2D(filters=filter_number,
                          kernel_size=filter_length,
                          kernel_initializer=self.krnl_init,
                          padding=self.padding,
                          strides=filter_stride,
                          name='enc_conv_{}'.format(i))(conv)
            if self.act == 'lrelu':
                conv = LeakyReLU(alpha=0.1,
                                 name='enc_conv_lrelu_{}'.format(i))(conv)
                conv = BatchNormalization(
                    name='enc_conv_batchnorm_{}'.format(i))(conv)
                if self.dropout:
                    conv = SpatialDropout2D(
                        rate=0.5, name='enc_conv_drop_{}'.format(i))(conv)
            elif self.act == 'selu':
                conv = Activation(activation='selu',
                                  name='enc_conv_selu_{}'.format(i))(conv)
                if self.dropout:
                    conv = AlphaDropout(
                        rate=0.5,
                        noise_shape=(self.batch_size, 1, 1, filter_number),
                        name='enc_conv_drop_{}'.format(i))(conv)
        # flatten final convolutional layer
        x = Flatten(name='enc_fltn_0')(conv)
        if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0):
            # mean
            self.mu = Dense(units=self.z_dim,
                            kernel_initializer=self.out_init,
                            activation='linear',
                            name='enc_mu_ouput')(x)
            # logarithmic variance
            self.logvar = Dense(units=self.z_dim,
                                kernel_initializer=self.out_init,
                                activation='linear',
                                name='enc_logvar_ouput')(x)
            # latent space
            self.z = Lambda(self.sample_gaussian,
                            output_shape=(self.z_dim, ),
                            name='enc_z_output')([self.mu, self.logvar])
            # build encoder
            self.encoder = Model(inputs=[self.enc_x_input],
                                 outputs=[self.mu, self.logvar, self.z],
                                 name='encoder')
        else:
            # latent space
            self.z = Dense(self.z_dim,
                           kernel_initializer=self.out_init,
                           activation='sigmoid',
                           name='enc_z_ouput')(x)
            # build encoder
            self.encoder = Model(inputs=[self.enc_x_input],
                                 outputs=[self.z],
                                 name='encoder')

    def _build_decoder(self):
        ''' builds decoder model '''
        # latent unit gaussian and categorical inputs
        self.dec_z_input = Input(batch_shape=(self.batch_size, self.z_dim),
                                 name='dec_z_input')
        x = self.dec_z_input
        # dense layer with same feature count as final convolution
        u = 0
        x = Dense(units=np.prod(self.final_conv_shape),
                  kernel_initializer=self.krnl_init,
                  name='dec_dense_{}'.format(u))(x)
        if self.act == 'lrelu':
            x = LeakyReLU(alpha=0.1, name='dec_dense_lrelu_{}'.format(u))(x)
            x = BatchNormalization(name='dec_dense_batchnorm_{}'.format(u))(x)
        elif self.act == 'selu':
            x = Activation(activation='selu',
                           name='dec_dense_selu_{}'.format(u))(x)
        u += 1
        # reshape to final convolution shape
        convt = Reshape(target_shape=self.final_conv_shape,
                        name='dec_rshp_0')(x)
        if self.dropout:
            if self.act == 'lrelu':
                convt = SpatialDropout2D(rate=0.5,
                                         name='dec_rshp_drop_0')(convt)
            elif self.act == 'selu':
                convt = AlphaDropout(rate=0.5,
                                     noise_shape=(self.batch_size, 1, 1,
                                                  self.final_conv_shape[-1]),
                                     name='dec_rshp_drop_0')(convt)
        u = 0
        # transform to sample shape with transposed convolutions
        for i in range(self.conv_number - 1, 0, -1):
            filter_number = get_filter_number(i - 1, self.filter_base,
                                              self.filter_factor)
            convt = Conv2DTranspose(filters=filter_number,
                                    kernel_size=self.filter_length,
                                    kernel_initializer=self.krnl_init,
                                    padding=self.padding,
                                    strides=self.filter_stride,
                                    name='dec_convt_{}'.format(u))(convt)
            if self.act == 'lrelu':
                convt = LeakyReLU(alpha=0.1,
                                  name='dec_convt_lrelu_{}'.format(u))(convt)
                convt = BatchNormalization(
                    name='dec_convt_batchnorm_{}'.format(u))(convt)
                if self.dropout:
                    convt = SpatialDropout2D(
                        rate=0.5, name='dec_convt_drop_{}'.format(u))(convt)
            elif self.act == 'selu':
                convt = Activation(activation='selu',
                                   name='dec_convt_selu_{}'.format(u))(convt)
                if self.dropout:
                    convt = AlphaDropout(
                        rate=0.5,
                        noise_shape=(self.batch_size, 1, 1, filter_number),
                        name='dec_convt_drop_{}'.format(u))(convt)
            u += 1
        self.dec_x_output = Conv2DTranspose(
            filters=1,
            kernel_size=self.filter_base_length,
            kernel_initializer=self.out_init,
            activation=self.dec_out_act,
            padding=self.padding,
            strides=self.filter_base_stride,
            name='dec_x_output')(convt)
        # build decoder
        self.decoder = Model(inputs=[self.dec_z_input],
                             outputs=[self.dec_x_output],
                             name='decoder')

    def _build_vae(self):
        ''' builds variational autoencoder network '''
        self.kl_anneal = Input(batch_shape=(self.batch_size, ),
                               name='kl_anneal')
        # build VAE
        if np.all(np.array([self.alpha, self.beta, self.lamb]) == 0):
            self.x_output = self.decoder(self.encoder(self.enc_x_input))
            self.vae = Model(inputs=[self.enc_x_input],
                             outputs=[self.x_output],
                             name='variational_autoencoder')
        elif self.alpha == self.beta == self.lamb:
            self.x_output = self.decoder(self.encoder(self.enc_x_input)[2])
            self.vae = Model(inputs=[self.enc_x_input, self.kl_anneal],
                             outputs=[self.x_output],
                             name='variational_autoencoder')
            tc_loss = self.kl_anneal * self.kullback_leibler_divergence_loss()
            self.vae.add_loss(tc_loss)
            self.vae.add_metric(tc_loss, name='tc_loss', aggregation='mean')
        elif np.any(np.array([self.alpha, self.beta, self.lamb]) > 0):
            self.x_output = self.decoder(self.encoder(self.enc_x_input)[2])
            self.vae = Model(inputs=[self.enc_x_input, self.kl_anneal],
                             outputs=[self.x_output],
                             name='variational_autoencoder')
            tc_loss = self.kl_anneal * self.total_correlation_loss()
            self.vae.add_loss(tc_loss)
            self.vae.add_metric(tc_loss, name='tc_loss', aggregation='mean')
        # define VAE optimizer
        if self.vae_opt_n == 'sgd':
            self.vae_opt = SGD(learning_rate=self.lr)
        elif self.vae_opt_n == 'sgdm':
            self.vae_opt = SGD(learning_rate=self.lr, momentum=0.5)
        elif self.vae_opt_n == 'nsgd':
            self.vae_opt = SGD(learning_rate=self.lr,
                               momentum=0.5,
                               nesterov=True)
        elif self.vae_opt_n == 'rmsprop':
            self.vae_opt = RMSprop(learning_rate=self.lr)
        elif self.vae_opt_n == 'rmsprop_cent':
            self.vae_opt = RMSprop(learning_rate=self.lr, centered=True)
        elif self.vae_opt_n == 'adam':
            self.vae_opt = Adam(learning_rate=self.lr, beta_1=0.5)
        elif self.vae_opt_n == 'adam_ams':
            self.vae_opt = Adam(learning_rate=self.lr,
                                beta_1=0.5,
                                amsgrad=True)
        elif self.vae_opt_n == 'adamax':
            self.vae_opt = Adamax(learning_rate=self.lr, beta_1=0.5)
        elif self.vae_opt_n == 'adamax_ams':
            self.vae_opt = Adamax(learning_rate=self.lr,
                                  beta_1=0.5,
                                  amsgrad=True)
        elif self.vae_opt_n == 'nadam':
            self.vae_opt = Nadam(learning_rate=self.lr, beta_1=0.5)
        # compile VAE
        rc_loss = self.reconstruction_loss()
        self.vae.add_loss(rc_loss)
        self.vae.add_metric(rc_loss, name='rc_loss', aggregation='mean')
        self.vae.compile(optimizer=self.vae_opt)

    def encode(self, x_batch, verbose=False):
        ''' encoder input configurations '''
        return self.encoder.predict(x_batch,
                                    batch_size=self.batch_size,
                                    verbose=verbose)

    def generate(self, beta_batch, verbose=False):
        ''' generate new configurations using samples from the latent distribution '''
        # sample latent space
        if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0):
            if len(beta_batch) == 2:
                z_batch = sample_gaussian(beta_batch)
            else:
                z_batch = beta_batch
        else:
            z_batch = beta_batch
        # generate configurations
        return self.decoder.predict(z_batch,
                                    batch_size=self.batch_size,
                                    verbose=verbose)

    def model_summaries(self):
        ''' print model summaries '''
        self.encoder.summary()
        self.decoder.summary()
        self.vae.summary()

    def save_weights(self, name, lattice_sites, interval, num_samples, scaled,
                     seed):
        ''' save weights to file '''
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.weights.h5'
        # save weights
        self.vae.save_weights(file_name)

    def load_weights(self, name, lattice_sites, interval, num_samples, scaled,
                     seed):
        ''' load weights from file '''
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.weights.h5'
        # load weights
        self.vae.load_weights(file_name)

    def get_losses(self):
        ''' retrieve loss histories '''
        # reshape arrays into (epochs, batches)
        vae_loss = np.array(self.vae_loss_history).reshape(
            -1, self.num_batches)
        tc_loss = np.array(self.tc_loss_history).reshape(-1, self.num_batches)
        rc_loss = np.array(self.rc_loss_history).reshape(-1, self.num_batches)
        return vae_loss, tc_loss, rc_loss

    def save_losses(self, name, lattice_sites, interval, num_samples, scaled,
                    seed):
        ''' save loss histories to file '''
        # retrieve losses
        losses = self.get_losses()
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.loss.npy'
        np.save(file_name, np.stack(losses, axis=-1))

    def load_losses(self, name, lattice_sites, interval, num_samples, scaled,
                    seed):
        ''' load loss histories from file '''
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        file_name = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.loss.npy'
        losses = np.load(file_name)
        # set past epochs
        self.past_epochs = losses.shape[0]
        self.num_batches = losses.shape[1]
        # change loss histories into lists
        self.vae_loss_history = list(losses[:, :, 0].reshape(-1))
        self.tc_loss_history = list(losses[:, :, 1].reshape(-1))
        self.rc_loss_history = list(losses[:, :, 2].reshape(-1))

    def initialize_checkpoint_managers(self, name, lattice_sites, interval,
                                       num_samples, scaled, seed):
        ''' initialize training checkpoint managers '''
        # initialize checkpoints
        self.vae_ckpt = Checkpoint(step=tf.Variable(0),
                                   optimizer=self.vae_opt,
                                   net=self.vae)
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        directory = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.ckpts'
        # initialize checkpoint managers
        self.vae_mngr = CheckpointManager(self.vae_ckpt,
                                          directory + '/vae/',
                                          max_to_keep=4)

    def load_latest_checkpoint(self, name, lattice_sites, interval,
                               num_samples, scaled, seed):
        ''' load latest training checkpoint from file '''
        # initialize checkpoint managers
        self.initialize_checkpoint_managers(name, lattice_sites, interval,
                                            num_samples, scaled, seed)
        self.load_losses(name, lattice_sites, interval, num_samples, scaled,
                         seed)
        # file parameters
        params = (name, lattice_sites, interval, num_samples, scaled, seed)
        directory = os.getcwd() + '/{}.{}.{}.{}.{:d}.{}.'.format(
            *params) + self.get_file_prefix() + '.ckpts'
        # restore checkpoints
        self.vae_ckpt.restore(
            self.vae_mngr.latest_checkpoint).assert_consumed()

    def train_vae(self, x_batch, kl_anneal):
        ''' train VAE '''
        # VAE losses
        if np.any(np.array([self.alpha, self.beta, self.lamb]) > 0):
            vae_loss, tc_loss, rc_loss = self.vae.train_on_batch(
                [x_batch, kl_anneal])
            self.vae_loss_history.append(vae_loss.mean())
            self.tc_loss_history.append(tc_loss)
            self.rc_loss_history.append(rc_loss)
        else:
            vae_loss, rc_loss = self.vae.train_on_batch(x_batch)
            self.vae_loss_history.append(vae_loss.mean())
            self.tc_loss_history.append(0)
            self.rc_loss_history.append(rc_loss)

    def rolling_loss_average(self, epoch, batch):
        ''' calculate rolling loss averages over batches during training '''
        epoch = epoch + self.past_epochs
        # catch case where there are no calculated losses yet
        if batch == 0:
            vae_loss = 0
            tc_loss = 0
            rc_loss = 0
        # calculate rolling average
        else:
            # start index for current epoch
            start = self.num_batches * epoch
            # stop index for current batch (given epoch)
            stop = self.num_batches * epoch + batch + 1
            # average loss histories
            vae_loss = np.mean(self.vae_loss_history[start:stop])
            tc_loss = np.mean(self.tc_loss_history[start:stop])
            rc_loss = np.mean(self.rc_loss_history[start:stop])
        return vae_loss, tc_loss, rc_loss

    def fit(self,
            x_train,
            num_epochs=4,
            save_step=4,
            random_sampling=False,
            verbose=False):
        ''' fit model '''
        self.num_temp_x, self.num_temp_y, self.num_samples, _, _, = x_train.shape
        self.num_batches = (self.num_temp_x * self.num_temp_y *
                            self.num_samples) // self.batch_size
        if random_sampling:
            # x_train = extract_unique_data(x_train, self.num_temp_x, self.num_temp_y, self.num_samples, self.input_shape)
            x_train = x_train.reshape(
                self.num_temp_x * self.num_temp_y * self.num_samples,
                *self.input_shape)
        else:
            x_train = reorder_training_data(x_train, self.num_temp_x,
                                            self.num_temp_y, self.num_samples,
                                            self.input_shape, self.batch_size)
        num_epochs += self.past_epochs
        if np.all(np.array([self.alpha, self.beta, self.lamb]) == 0):
            kl_anneal = np.zeros((num_epochs, self.num_batches))
        elif not self.kl_anneal_b:
            kl_anneal = np.ones((num_epochs, self.num_batches))
        else:
            n_cycles = 4
            linear_kl_anneal = np.linspace(
                0., 1., num_epochs * self.num_batches // (2 * n_cycles))
            constant_kl_anneal = np.ones(num_epochs * self.num_batches //
                                         (2 * n_cycles))
            cycle_kl_anneal = np.concatenate(
                (linear_kl_anneal, constant_kl_anneal))
            kl_anneal = np.tile(cycle_kl_anneal,
                                n_cycles).reshape(num_epochs, self.num_batches)
        lr_factor = np.ones((num_epochs, self.num_batches))
        # loop through epochs
        for i in range(self.past_epochs, num_epochs):
            # construct progress bar for current epoch
            if random_sampling:
                batch_range = trange(self.num_batches,
                                     desc='',
                                     disable=not verbose)
            else:
                b = np.arange(self.num_batches)
                np.random.shuffle(b)
                batch_range = tqdm(b, desc='', disable=not verbose)
            # loop through batches
            u = 0
            for j in batch_range:
                # set batch loss description
                batch_loss = self.rolling_loss_average(i, u)
                batch_acc = np.exp(-batch_loss[-1])
                desc = 'Epoch: {}/{} LR Fctr: {:.4f} KL Anl: {:.4f} VAE Lss: {:.4f} TCKLD Lss: {:.4f} RCNST Lss: {:.4f} RCNST Acc: {:.4f}'.format(
                    i + 1, num_epochs, lr_factor[i, u], kl_anneal[i, u],
                    *batch_loss, batch_acc)
                batch_range.set_description(desc)
                # fetch batch
                if random_sampling:
                    x_batch = draw_random_batch(x_train, self.batch_size)
                else:
                    x_batch = draw_indexed_batch(x_train, self.batch_size, j)
                # train VAE
                self.vae_opt.learning_rate = lr_factor[i, u] * self.lr
                self.train_vae(x_batch=x_batch,
                               kl_anneal=kl_anneal[i, u] *
                               np.ones(self.batch_size))
                u += 1
            # if checkpoint managers are initialized
            if self.vae_mngr is not None:
                # increment checkpoint
                self.vae_ckpt.step.assign_add(1)
                # if save step is reached
                if np.int32(self.vae_ckpt.step) % save_step == 0:
                    # save model checkpoint
                    vae_save_path = self.vae_mngr.save()
                    print('Checkpoint DSC: {}'.format(vae_save_path))
Пример #14
0
elif 'rednet' in model_type:
    output = model_rednet(input_blur)
    loss = custom_loss_others(input_sharp)
    custom_psnr = custom_psnr_others(input_sharp)

# Define the model
model = Model(inputs=[input_sharp, input_blur], outputs=output)

# x_unwrap = generator(input_blur)
# model = Model(inputs=[input_sharp, input_blur], outputs=x_unwrap)

# Add custom loss and metric
model.add_loss(loss)
# Since training happens on batch of images we will use the mean of SSIM values of all the images in the batch as the
# loss value -> batch_mean(mean_scales_mse)
model.add_metric(custom_psnr, name='mean_scales_psnr', aggregation='mean')  # name = 'psnr'
# Compile the model
OPTIMIZER = Adam(lr=initial_lr)
model.compile(optimizer=OPTIMIZER)

# Print the summary
print(model.summary())

# Callbacks
tensorboard_callback = TensorBoard(log_dir=log_dir)  # , histogram_freq=1, profile_batch='1')

save_weights_only = False

# PolynomialDecay definition
if 'reds' in task:
    data_size = train_sharp_generator.samples // batch_size
Пример #15
0
class RelationTokenClassifyModel(AbstractRelationClassifyModel, TFBasedModel):
    custom_objects = dict(TokenExtractLayer=TokenExtractLayer)

    def _load_config(self, config):
        super()._load_config(config)
        self.max_len = self.task_config['max_len']
        self.multi_label = self.task_config["multi_label"]
        self.text_span_labels = load_lines(self.task_config["text_span_label_path"])
        self.labels = load_lines(self.task_config['label_path'])
        self.sparse_label = not self.multi_label
        self.label2id, self.id2label = seq2dict(self.labels)
        self.label_num = len(self.label2id)
        self.embedding_strategy: EmbeddingStrategy = EmbeddingStrategy[self.task_config["embedding_strategy"].upper()]

    def _init_tokenizer(self):
        logger.info("initializing tokenizer")
        tokenizer_args = copy.copy(self.tokenizer_config["tokenizer_args"])
        vocab_path = tokenizer_args["vocabs"]
        vocabs = load_lines(vocab_path)
        special_tokens = flat([[f"[S:{label}]", f"[O:{label}]"] for label in self.text_span_labels])
        special_tokens.extend(["[/S]", "[/O]"])
        vocabs = replace_unused_tokens(vocabs, special_tokens)
        logger.info(f"replacing special tokens:{special_tokens} to vocabs")
        tokenizer_args.update(vocabs=vocabs)
        self.tokenizer = build_tokenizer(self.tokenizer_config["tokenizer_name"], tokenizer_args)
        self.vocab_size = self.tokenizer.vocab_size

    def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert",
                    pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}, **kwargs):

        with self.get_scope():
            encoder_model = get_sequence_encoder_model(vocab_size=self.vocab_size,
                                                       pretrained_model_path=pretrained_model_path,
                                                       pretrained_model_tag=pretrained_model_tag,
                                                       bilstm_dim_list=bilstm_dim_list,
                                                       transformer_kwargs=transformer_kwargs)

            span_idxs = Input(name="span_idxs", shape=(4,), dtype=tf.int32)
            sequence_embedding = encoder_model.output
            if self.embedding_strategy != EmbeddingStrategy.CLS:
                token_idxs = None
                if self.embedding_strategy == EmbeddingStrategy.ENTITY_START_END:
                    token_idxs = span_idxs
                if self.embedding_strategy == EmbeddingStrategy.ENTITY_START:
                    token_idxs = span_idxs[:, :2]
                token_extract_layer = TokenExtractLayer(name="token_extract_layer")
                class_embedding = token_extract_layer([sequence_embedding, token_idxs])
            else:
                class_embedding = Lambda(lambda x: x[:, 0], name="get_cls_layer")(sequence_embedding)

            classify_activation = sigmoid if self.multi_label else softmax
            classifier_layer = Dense(
                self.label_num, name="classify_layer", activation=classify_activation
            )
            output = classifier_layer(class_embedding)

            if self.multi_label:
                output = Lambda(lambda x: x ** pos_weight, name="pos_weight_layer")(output)
            self.nn_model = Model(inputs=encoder_model.inputs + [span_idxs], outputs=[output])
        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None):
        logger.info("compiling model...")
        with self.get_scope():
            classify_output = Input(shape=(self.label_num,) if self.multi_label else (), name='classify_output', dtype=tf.float32)
            inputs = self.nn_model.inputs
            output = self.nn_model.output
            loss_input = [classify_output, output]
            if rdrop_alpha:
                output1 = self.nn_model(inputs)
                loss_input.append(output1)
                output = Lambda(function=lambda x: sum(x) / len(x), name="avg_pool_layer")([output, output1])
            self.train_model = Model(inputs + [classify_output], output, name="train_model")


        loss_layer = build_classify_loss_layer(multi_label=self.multi_label, rdrop_alpha=rdrop_alpha)
        loss = loss_layer(loss_input)
        self.train_model.add_loss(loss)

        accuracy_func = binary_accuracy if self.multi_label else sparse_categorical_accuracy
        metric_layer = MetricLayer(accuracy_func, name="metric_layer")
        accuracy = metric_layer([classify_output, output])
        self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy")

        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)
        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: UnionRelationClassifyExample) -> Dict:
        idx_infos = [(f"[S:{example.text_span1.label}]", example.text_span1.span[0]),
                     (f"[O:{example.text_span2.label}]", example.text_span2.span[0]),
                     (f"[/S]", example.text_span1.span[1]),
                     (f"[/O]", example.text_span2.span[1])]

        text = example.text

        for token, idx in sorted(idx_infos, key=lambda x: x[1], reverse=True):
                text = text[:idx] + token + text[idx:]

        feature = self.tokenizer.do_tokenize(text)
        tokens = feature["tokens"]
        span_idxs = [tokens.index(e) for e, span in idx_infos]
        feature["span_idxs"] = span_idxs
        if isinstance(example, LabeledRelationClassifyExample):
            if isinstance(example.label, list):
                labels = [e.name for e in example.label]
            else:
                labels = [example.label.name]
            feature.update(labels=labels)
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]:
        record = dict(idx=idx, **feature)
        truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"])
        if mode == "train":
            labels = feature.get("labels")
            if labels is None:
                raise ValueError("no labels given in train mode!")
            classify_output = get_classify_output(labels, self.label2id, self.sparse_label)
            record.update(classify_output=classify_output)
        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self, pred_tensors, show_detail=False, threshold=0.5) -> List[LabelOrLabels]:
        def _tensor2output(pred_tensor) -> LabelOrLabels:
            if self.multi_label:
                if show_detail:
                    logger.info(f"pred tensor")
                    logger.info(pred_tensor)
                hard_pred_tensor = apply_threshold(pred_tensor, threshold)
                label_data = [int(e.numpy()) for e in n_hot2idx_tensor(hard_pred_tensor)]
                return [Label(name=self.id2label[label_id], prob=pred_tensor[label_id]) for label_id in label_data]
            else:
                label_id = tf.argmax(pred_tensor, axis=-1).numpy()
                label = self.id2label[label_id]
                prob = pred_tensor[label_id]
                return Label(prob=prob, name=label)

        preds = [_tensor2output(t) for t in pred_tensors]
        return preds
Пример #16
0
def create_model(input_shape=(256, 256, 3), coef=1., alpha=1):
    vgg = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
    vgg = Model(inputs=vgg.inputs,
                outputs=vgg.get_layer('block4_conv1').output,
                name='vgg')

    content_input = Input(shape=input_shape, name='content_input')
    style_input = Input(shape=input_shape, name='style_input')

    style_out = []
    enc_layers = []
    c = content_input
    s = style_input
    for layer in vgg.layers[1:]:
        if 'conv' in layer.name:
            srp = Padding()
            enc_layers.append(srp)
            c = srp(c)
            s = srp(s)
            new_layer = Conv2D(filters=layer.filters,
                               kernel_size=layer.kernel_size,
                               activation=layer.activation,
                               padding='valid',
                               name=layer.name)
        elif 'pool' in layer.name:
            new_layer = MaxPooling2D((2, 2), strides=(2, 2), name=layer.name)
        else:
            assert False

        enc_layers.append(new_layer)
        c = new_layer(c)
        s = new_layer(s)
        new_layer.set_weights(layer.get_weights())

        if 'conv1' in s.name:
            style_out.append(s)

    adain = AdaIN(alpha=alpha, name='adain')([c, s])
    x = adain

    # Decoder
    decoder_layers = [
        # Block 4
        Padding(),
        Conv2D(256, (3, 3),
               activation='relu',
               padding='valid',
               name='block4_conv1_decoded'),
        UpSampling2D(),

        # Block 3
        Padding(),
        Conv2D(256, (3, 3),
               activation='relu',
               padding='valid',
               name='block3_conv4_decoded'),
        Padding(),
        Conv2D(256, (3, 3),
               activation='relu',
               padding='valid',
               name='block3_conv3_decoded'),
        Padding(),
        Conv2D(256, (3, 3),
               activation='relu',
               padding='valid',
               name='block3_conv2_decoded'),
        Padding(),
        Conv2D(128, (3, 3),
               activation='relu',
               padding='valid',
               name='block3_conv1_decoded'),
        UpSampling2D(),

        # Block 2
        Padding(),
        Conv2D(128, (3, 3),
               activation='relu',
               padding='valid',
               name='block2_conv2_decoded'),
        Padding(),
        Conv2D(64, (3, 3),
               activation='relu',
               padding='valid',
               name='block2_conv1_decoded'),
        UpSampling2D(),

        # Block 1
        Padding(),
        Conv2D(64, (3, 3),
               activation='relu',
               padding='valid',
               name='block1_conv2_decoded'),
        Padding(),
        Conv2D(3, (3, 3),
               activation=None,
               padding='valid',
               name='block1_conv1_decoded'),
        PostProcess(name="decoded"),
    ]

    for layer in decoder_layers:
        x = layer(x)

    # Connections for calculating of losses
    out = []
    for layer in enc_layers:
        x = layer(x)
        if 'conv1' in x.name:
            out.append(x)

    loss_model = Model(inputs=[content_input, style_input], outputs=x)

    # Content loss
    Lc = tf.reduce_mean(tf.square(adain - x), axis=(1, 2, 3))
    loss_model.add_loss(Lc)

    # Style loss
    L1 = tf.constant(0.)
    L2 = tf.constant(0.)
    for t, s in zip(out, style_out):
        mean_t, variance_t = tf.nn.moments(t, [1, 2])
        mean_s, variance_s = tf.nn.moments(s, [1, 2])
        std_t, std_s = tf.sqrt(variance_t), tf.sqrt(variance_s)
        #std_t, std_s = variance_t, variance_s
        L1 += tf.reduce_mean(K.square(mean_t - mean_s), axis=1)
        L2 += tf.reduce_mean(K.square(std_t - std_s), axis=1)

    Ls = L1 + L2
    loss_model.add_loss(coef * Ls)

    loss_model.add_metric(Lc, name="Lc")
    loss_model.add_metric(Ls, name="Ls")

    # Weights freezing
    for layer in loss_model.layers:
        layer.trainable = layer.name.endswith('decoded')

    return loss_model
Пример #17
0
def get_model(base_model, rpn_model, anchors, hyper_params, mode="training"):
    """Generating rpn model for given backbone base model and hyper params.
    inputs:
        base_model = tf.keras.model pretrained backbone, only VGG16 available for now
        rpn_model = tf.keras.model generated rpn model
        hyper_params = dictionary
        mode = "training" or "inference"

    outputs:
        frcnn_model = tf.keras.model
    """
    input_img = base_model.input
    rpn_reg_predictions, rpn_cls_predictions = rpn_model.output
    #
    roi_bboxes = RoIBBox(anchors, hyper_params, name="roi_bboxes")(
        [rpn_reg_predictions, rpn_cls_predictions])
    #
    roi_pooled = RoIPooling(
        hyper_params, name="roi_pooling")([base_model.output, roi_bboxes])
    #
    output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled)
    output = TimeDistributed(Dense(4096, activation="relu"),
                             name="frcnn_fc1")(output)
    output = TimeDistributed(BatchNormalization(),
                             name="frcnn_batch_norm1")(output)
    output = TimeDistributed(Dropout(0.2), name="frcnn_dropout1")(output)
    output = TimeDistributed(Dense(2048, activation="relu"),
                             name="frcnn_fc2")(output)
    output = TimeDistributed(BatchNormalization(),
                             name="frcnn_batch_norm2")(output)
    output = TimeDistributed(Dropout(0.2), name="frcnn_dropout2")(output)
    frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"],
                                                  activation="softmax"),
                                            name="frcnn_cls")(output)
    frcnn_reg_predictions = TimeDistributed(Dense(
        hyper_params["total_labels"] * 4, activation="linear"),
                                            name="frcnn_reg")(output)
    #
    if mode == "training":
        input_gt_boxes = Input(shape=(None, 4),
                               name="input_gt_boxes",
                               dtype=tf.float32)
        input_gt_labels = Input(shape=(None, ),
                                name="input_gt_labels",
                                dtype=tf.int32)
        rpn_cls_actuals = Input(shape=(None, None,
                                       hyper_params["anchor_count"]),
                                name="input_rpn_cls_actuals",
                                dtype=tf.float32)
        rpn_reg_actuals = Input(shape=(None, 4),
                                name="input_rpn_reg_actuals",
                                dtype=tf.float32)
        frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(
            hyper_params,
            name="roi_deltas")([roi_bboxes, input_gt_boxes, input_gt_labels])
        #
        loss_names = [
            "rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"
        ]
        rpn_reg_loss_layer = Lambda(helpers.reg_loss, name=loss_names[0])(
            [rpn_reg_actuals, rpn_reg_predictions])
        rpn_cls_loss_layer = Lambda(helpers.rpn_cls_loss, name=loss_names[1])(
            [rpn_cls_actuals, rpn_cls_predictions])
        frcnn_reg_loss_layer = Lambda(helpers.reg_loss, name=loss_names[2])(
            [frcnn_reg_actuals, frcnn_reg_predictions])
        frcnn_cls_loss_layer = Lambda(
            helpers.frcnn_cls_loss,
            name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions])
        #
        frcnn_model = Model(inputs=[
            input_img, input_gt_boxes, input_gt_labels, rpn_reg_actuals,
            rpn_cls_actuals
        ],
                            outputs=[
                                roi_bboxes, rpn_reg_predictions,
                                rpn_cls_predictions, frcnn_reg_predictions,
                                frcnn_cls_predictions, rpn_reg_loss_layer,
                                rpn_cls_loss_layer, frcnn_reg_loss_layer,
                                frcnn_cls_loss_layer
                            ])
        #
        for layer_name in loss_names:
            layer = frcnn_model.get_layer(layer_name)
            frcnn_model.add_loss(layer.output)
            frcnn_model.add_metric(layer.output,
                                   name=layer_name,
                                   aggregation="mean")
        #
    else:
        frcnn_model = Model(inputs=[input_img],
                            outputs=[
                                roi_bboxes, rpn_reg_predictions,
                                rpn_cls_predictions, frcnn_reg_predictions,
                                frcnn_cls_predictions
                            ])
        #
    return frcnn_model
Пример #18
0
class VariationalAutoencoder():
    def __init__(self,
                 input_dim,
                 encoder_conv_filters,
                 encoder_conv_kernel_size,
                 encoder_conv_strides,
                 decoder_conv_t_filters,
                 decoder_conv_t_kernel_size,
                 decoder_conv_t_strides,
                 z_dim,
                 use_batch_norm=False,
                 use_dropout=False):

        self.name = 'variational_autoencoder'

        self.input_dim = input_dim
        self.encoder_conv_filters = encoder_conv_filters
        self.encoder_conv_kernel_size = encoder_conv_kernel_size
        self.encoder_conv_strides = encoder_conv_strides
        self.decoder_conv_t_filters = decoder_conv_t_filters
        self.decoder_conv_t_kernel_size = decoder_conv_t_kernel_size
        self.decoder_conv_t_strides = decoder_conv_t_strides
        self.z_dim = z_dim

        self.use_batch_norm = use_batch_norm
        self.use_dropout = use_dropout

        self.n_layers_encoder = len(encoder_conv_filters)
        self.n_layers_decoder = len(decoder_conv_t_filters)

        self._build()

    def _build(self):

        ### THE ENCODER
        encoder_input = Input(shape=self.input_dim, name='encoder_input')

        x = encoder_input

        for i in range(self.n_layers_encoder):
            conv_layer = Conv2D(filters=self.encoder_conv_filters[i],
                                kernel_size=self.encoder_conv_kernel_size[i],
                                strides=self.encoder_conv_strides[i],
                                padding='same',
                                name='encoder_conv_' + str(i))

            x = conv_layer(x)

            if self.use_batch_norm:
                x = BatchNormalization()(x)

            x = LeakyReLU()(x)

            if self.use_dropout:
                x = Dropout(rate=0.25)(x)

        shape_before_flattening = K.int_shape(x)[1:]

        x = Flatten()(x)
        self.mu = Dense(self.z_dim, name='mu')(x)
        self.log_var = Dense(self.z_dim, name='log_var')(x)

        self.encoder_mu_log_var = Model(encoder_input, (self.mu, self.log_var))

        def sampling(args):
            mu, log_var = args
            epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
            return mu + K.exp(log_var / 2) * epsilon

        encoder_output = Lambda(sampling,
                                name='encoder_output')([self.mu, self.log_var])

        self.encoder = Model(encoder_input, encoder_output)

        ### THE DECODER

        decoder_input = Input(shape=(self.z_dim, ), name='decoder_input')

        x = Dense(np.prod(shape_before_flattening))(decoder_input)
        x = Reshape(shape_before_flattening)(x)

        for i in range(self.n_layers_decoder):
            conv_t_layer = Conv2DTranspose(
                filters=self.decoder_conv_t_filters[i],
                kernel_size=self.decoder_conv_t_kernel_size[i],
                strides=self.decoder_conv_t_strides[i],
                padding='same',
                name='decoder_conv_t_' + str(i))

            x = conv_t_layer(x)

            if i < self.n_layers_decoder - 1:
                if self.use_batch_norm:
                    x = BatchNormalization()(x)
                x = LeakyReLU()(x)
                if self.use_dropout:
                    x = Dropout(rate=0.25)(x)
            else:
                x = Activation('sigmoid')(x)

        decoder_output = x

        self.decoder = Model(decoder_input, decoder_output)

        ### THE FULL VAE
        model_input = encoder_input
        model_output = self.decoder(encoder_output)

        self.model = Model(model_input, model_output)

    def compile(self, learning_rate, r_loss_factor):
        self.learning_rate = learning_rate

        ### COMPILATION
        def vae_r_loss(y_true, y_pred):
            r_loss = K.mean(K.square(y_true - y_pred), axis=[1, 2, 3])
            return r_loss_factor * r_loss

        def vae_kl_loss(y_true, y_pred):
            kl_loss = -0.5 * K.sum(
                1 + self.log_var - K.square(self.mu) - K.exp(self.log_var),
                axis=1)
            return kl_loss

        def vae_loss(y_true, y_pred):
            r_loss = vae_r_loss(y_true, y_pred)
            kl_loss = vae_kl_loss(y_true, y_pred)
            return r_loss + kl_loss

        optimizer = Adam(learning_rate=learning_rate)
        self.model.add_loss(vae_loss(self.model.input, self.model.output))
        self.model.add_metric(vae_kl_loss(self.model.input, self.model.output),
                              name='vae_kl_loss')
        self.model.compile(optimizer=optimizer,
                           loss=None,
                           metrics=[vae_r_loss])

    def save(self, folder):

        if not os.path.exists(folder):
            os.makedirs(folder)
            os.makedirs(os.path.join(folder, 'viz'))
            os.makedirs(os.path.join(folder, 'weights'))
            os.makedirs(os.path.join(folder, 'images'))

        with open(os.path.join(folder, 'params.pkl'), 'wb') as f:
            pickle.dump([
                self.input_dim, self.encoder_conv_filters,
                self.encoder_conv_kernel_size, self.encoder_conv_strides,
                self.decoder_conv_t_filters, self.decoder_conv_t_kernel_size,
                self.decoder_conv_t_strides, self.z_dim, self.use_batch_norm,
                self.use_dropout
            ], f)

        self.plot_model(folder)

    def load_weights(self, filepath):
        self.model.load_weights(filepath)

    def train(self,
              x_train,
              batch_size,
              epochs,
              run_folder,
              print_every_n_batches=100,
              initial_epoch=0,
              lr_decay=1):

        custom_callback = CustomCallback(run_folder, print_every_n_batches,
                                         initial_epoch, self)
        lr_sched = step_decay_schedule(initial_lr=self.learning_rate,
                                       decay_factor=lr_decay,
                                       step_size=1)

        checkpoint_filepath = os.path.join(
            run_folder, "weights/weights-{epoch:03d}-{loss:.2f}.h5")
        checkpoint1 = ModelCheckpoint(checkpoint_filepath,
                                      save_weights_only=True,
                                      verbose=1)
        checkpoint2 = ModelCheckpoint(os.path.join(run_folder,
                                                   'weights/weights.h5'),
                                      save_weights_only=True,
                                      verbose=1)

        callbacks_list = [checkpoint1, checkpoint2, custom_callback, lr_sched]

        self.model.fit(x_train,
                       x_train,
                       batch_size=batch_size,
                       shuffle=True,
                       epochs=epochs,
                       initial_epoch=initial_epoch,
                       callbacks=callbacks_list)

    def train_with_generator(
        self,
        data_flow,
        epochs,
        steps_per_epoch,
        run_folder,
        print_every_n_batches=100,
        initial_epoch=0,
        lr_decay=1,
    ):

        custom_callback = CustomCallback(run_folder, print_every_n_batches,
                                         initial_epoch, self)
        lr_sched = step_decay_schedule(initial_lr=self.learning_rate,
                                       decay_factor=lr_decay,
                                       step_size=1)

        checkpoint_filepath = os.path.join(
            run_folder, "weights/weights-{epoch:03d}-{loss:.2f}.h5")
        checkpoint1 = ModelCheckpoint(checkpoint_filepath,
                                      save_weights_only=True,
                                      verbose=1)
        checkpoint2 = ModelCheckpoint(os.path.join(run_folder,
                                                   'weights/weights.h5'),
                                      save_weights_only=True,
                                      verbose=1)

        callbacks_list = [checkpoint1, checkpoint2, custom_callback, lr_sched]

        self.model.save_weights(os.path.join(run_folder, 'weights/weights.h5'))

        self.model.fit(data_flow,
                       shuffle=True,
                       epochs=epochs,
                       initial_epoch=initial_epoch,
                       callbacks=callbacks_list,
                       steps_per_epoch=steps_per_epoch)

    def plot_model(self, run_folder):
        plot_model(self.model,
                   to_file=os.path.join(run_folder, 'viz/model.png'),
                   show_shapes=True,
                   show_layer_names=True)
        plot_model(self.encoder,
                   to_file=os.path.join(run_folder, 'viz/encoder.png'),
                   show_shapes=True,
                   show_layer_names=True)
        plot_model(self.decoder,
                   to_file=os.path.join(run_folder, 'viz/decoder.png'),
                   show_shapes=True,
                   show_layer_names=True)
Пример #19
0
class VAE():
    '''
    A class representing the variational autoencoder used to encode the bar arrays.
    The VAE encodes a batch of bars of shape input_shape into a vector with latent_dim dimensions.

    Parameters
    ----------
    latent_dim : int
        The dimensionality of the latent space.
    input_shape : list of int with lenght 2
        The shape of the input space without batch size.
        The first element specifies the number of timesteps in a bar, the second one the number of instruments.
    weights : str, optional
        Path to a weights file that was previously saved by Keras.
        If specified, the weights are loaded. If None, the weights are initialized by Keras.
    debug : bool, optional
        If true print debug information, such as model summaries during class construction.
    '''
    def __init__(self, latent_dim, input_shape, weights=None, debug=True):
        # set main class attributes
        self.input_shape = input_shape
        self.latent_dim = latent_dim
        self.debug = debug
        self.weightdir = 'weights/'
        self.callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./logs')] # saves the necassary data to be viewed by the Tensorboard application

        # create input tensors and instantiate the encoder and decoder models
        bar_input = Input(shape = self.input_shape, name='encoder_input')
        latent_input = Input(shape = self.latent_dim, name = 'latent_input')
        self.encoder = self.make_encoder(bar_input)
        self.decoder = self.make_decoder(latent_input)

        # create the output tensors
        encoder_output = self.encoder(bar_input)
        vae_output = self.decoder(encoder_output[2])

        # instantiate the VAE model
        self.VAE = Model(bar_input, vae_output, name='VAE_DNN')

        # calcuate the loss functions and add them to the model
        z_mean, z_log_var, z = encoder_output
        kl_loss = -.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis = -1)
        recon_loss = mean_squared_error(tf.reshape(bar_input, [-1]), tf.reshape(vae_output, [-1]))
        recon_loss *= np.prod(self.input_shape, dtype = float)
        vae_loss = tf.math.reduce_mean(0.1 * kl_loss + recon_loss)
        self.VAE.add_loss(vae_loss)
        self.VAE.add_metric(recon_loss, name = 'recon_loss', aggregation='mean') # add the reconstruction loss as an additional viewable metric for performance analysis

        # compile the model and load the weights if specified
        self.VAE.compile(optimizer='adam')
        if self.debug:
            self.VAE.summary()
        if weights: self.VAE.load_weights(weights)

    def make_encoder(self, bar_input, n_conv = 4):
        '''
        Helper function to instatiate the encoder model.

        Parameters
        ----------
        bar_input : Tensorflow symbolic tensor
            The symbolic tensor which represents the encoder input.
        n_conv : int, optional
            The number of convolution and pooling layers applied.

        Returns:
        --------
        Keras model
            The created encoder model.
        '''

        x = bar_input
        # add the specified number of convolution/pooling layers
        for i in range(n_conv):
          x = Conv1D(64 * 2 ** i, 3, activation = 'relu', padding = 'valid') (x)
          x = MaxPooling1D(2) (x)
        x = Flatten(name = 'flatten')(x)
        #x = Dense(self.latent_dim * 2, activation = 'relu') (x) # intermediate dense layer

        z_mean = Dense(self.latent_dim, name = 'z_mean') (x)
        z_log_var = Dense(self.latent_dim, name = 'z_log_var') (x)
        # function to sample from a standard normal distribution, which is wrapped as a Lambda layer
        normal_sample_f = lambda y : tf.random.normal(tf.shape(y))
        eps = Lambda(normal_sample_f) (z_log_var)
        # Reparamerisation trick, rescale the sampled value to the actual distribution
        z = z_mean + tf.math.exp(0.5 * z_log_var) * eps

        # model returns the mean and logartihm of the variance as well as the sampled value
        encoder = Model(bar_input, [z_mean, z_log_var, z], name='encoder')
        if self.debug: encoder.summary()
        return encoder

    def make_decoder(self, latent_input, n_deconv = 4):
        '''
        Helper function to instatiate the decoder model.

        Parameters
        ----------
        latent_input : Tensorflow symbolic tensor
            The symbolic tensor which represents the latent input.
        n_deconv : int, optional
            The number of upsampling and convolution layers applied.

        Returns:
        --------
        Keras model
            The created encoder model.
        '''
        x = latent_input
        #x = Dense(self.latent_dim * 2, activation = 'relu') (x)
        x = Dense(4 * 128, activation = 'relu') (x)
        x = Reshape((4, 128)) (x)
        for _ in range(n_deconv - 1):
          x = UpSampling1D(2) (x)
          x = Conv1D(64 * 2 ** i, 3, activation = 'relu', padding = 'same') (x)
        x = UpSampling1D(2) (x)
        x = Conv1D(22, 5, activation = 'relu', padding = 'same') (x)

        decoder = Model(latent_input, x, name='decoder')
        if self.debug: decoder.summary()
        return decoder

    def train(self, train_data, epochs, validation_split=0.0, **kwargs):
        '''
        Train the VAE model on a given dataset for a number of epochs.
        The weights are saved in the weightdir folder with a timestamp after training is completed.

        Parameters
        ----------
        train_data : array_like
            An array with shape (batch, *input_shape) which contains data to train the VAE on.
        epochs : int
            The number of epochs to train the model.
        validation_split : float, optional
            A number in the range [0,1].
            It corresponds to the fraction of the samples withheld from the training data set to use for validation purposes.
        **kwargs
            Further keyword arguments to be passed to the train method of the VAE.
        '''
        history = self.VAE.fit(train_data, epochs=epochs, validation_split = validation_split, callbacks = self.callbacks, **kwargs)
        name = strftime("%Y-%m-%d_%H:%M:%S", gmtime())
        self.VAE.save_weights(self.weightdir + name)
        return history

    def interpolate(self, bar1, bar2, n_steps = 10):
        '''
        Interpolate between the specified bars.
        To achive this, each bar is encoded as a latent vector. Between those latent vectors, the specified number of intemediate vectors are generated.
        The entire sequence is then decoded into a sequence of bars again.

        Parameters
        ----------
        bar1, bar2 : array_like
            Arrays of shape input_shape which represent the bars to be interpolated.
        n_steps : int, optional
            The number of steps in the intepolqation inculding the two original bars.

        Returns
        -------
        array_like
            Array of shape (n_steps, *input:_shape) representing the generated sequence.
        '''
        lv1 = self.encoder.predict(bar1[None,:,:])[2]
        lv2 = self.encoder.predict(bar2[None,:,:])[2]

        lvs = np.concatenate([lv1 + (lv2-lv1)*i/(n_steps - 1) for i in range(n_steps)])
        bars_pred = self.decoder.predict(lvs)

        return bars_pred

    def sample(self, bar, n_samples):
        '''
        Sample from the distribtion of a given input bar.

        Parameters
        ----------
        bar : array_like
            Array of shape input_shape around which the distribution is to be sampled.

        Return
        ------
        array_like
            Array of shape (n_samples, *input_shape) which contians the samples.
        '''
        return self.VAE.predict(np.array([bar]*n_samples))
Пример #20
0
class CLSTokenClassifyModel(AbstractTextClassifyModel, TFBasedModel):
    def _load_config(self, config):
        super()._load_config(config)
        self.multi_label = self.task_config["multi_label"]
        self.max_len = self.task_config["max_len"]
        self.labels = load_lines(self.task_config['label_path'])
        self.sparse_label = not self.multi_label
        self.label2id, self.id2label = seq2dict(self.labels)
        self.label_num = len(self.label2id)

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    pos_weight=1.,
                    bilstm_dim_list=[],
                    transformer_kwargs={}):
        with self.get_scope():
            encoder_model = get_sequence_encoder_model(
                vocab_size=self.vocab_size,
                pretrained_model_path=pretrained_model_path,
                pretrained_model_tag=pretrained_model_tag,
                bilstm_dim_list=bilstm_dim_list,
                transformer_kwargs=transformer_kwargs)
            sequence_embedding = encoder_model.output
            class_embedding = Lambda(function=lambda x: x[:, 0],
                                     name="cls_layer")(sequence_embedding)
            classify_activation = sigmoid if self.multi_label else softmax
            classifier_layer = Dense(
                self.label_num,
                name="classifier",
                activation=classify_activation,
                kernel_initializer=TruncatedNormal(stddev=0.02))
            output = classifier_layer(class_embedding)
            if self.multi_label:
                output = Lambda(lambda x: x**pos_weight,
                                name="pos_weight_layer")(output)

            self.nn_model = Model(inputs=encoder_model.inputs,
                                  outputs=[output],
                                  name="nn_model")

        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    @discard_kwarg
    def compile_model(self, optimizer_name, optimizer_args, rdrop_alpha=None):
        logger.info("compiling model...")
        with self.get_scope():
            classify_output = Input(
                shape=(self.label_num, ) if self.multi_label else (),
                name='classify_output',
                dtype=tf.int32)
            inputs = self.nn_model.inputs
            output = self.nn_model(inputs)
            loss_input = [classify_output, output]
            if rdrop_alpha:
                output1 = self.nn_model(inputs)
                loss_input.append(output1)
                output = Lambda(function=lambda x: sum(x) / len(x),
                                name="avg_pool_layer")([output, output1])
            self.train_model = Model(inputs + [classify_output], [output],
                                     name="train_model")

        loss_layer = build_classify_loss_layer(multi_label=self.multi_label,
                                               rdrop_alpha=rdrop_alpha)
        loss = loss_layer(loss_input)
        self.train_model.add_loss(loss)

        accuracy_func = binary_accuracy if self.multi_label else sparse_categorical_accuracy

        metric_layer = MetricLayer(accuracy_func, name="metric_layer")
        accuracy = metric_layer([classify_output, output])
        self.train_model.add_metric(accuracy,
                                    aggregation="mean",
                                    name="accuracy")
        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)

        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)

    def example2feature(self, example: UnionTextClassifyExample) -> Dict:
        feature = self.tokenizer.do_tokenize(text=example.text,
                                             extra_text=example.extra_text)
        if isinstance(example, LabeledTextClassifyExample):
            if isinstance(example.label, list):
                labels = [e.name for e in example.label]
            else:
                labels = [example.label.name]
            feature.update(labels=labels)
        return feature

    def _feature2records(self, idx, feature: Dict, mode: str) -> List[Dict]:
        record = dict(idx=idx, **feature)
        truncate_record(record=record,
                        max_len=self.max_len,
                        keys=["token_ids", "segment_ids", "tokens"])
        if mode == "train":
            labels = feature.get("labels")
            if labels is None:
                raise ValueError("no labels given in train mode!")
            classify_output = get_classify_output(labels, self.label2id,
                                                  self.sparse_label)
            record.update(classify_output=classify_output)
        return [record]

    @discard_kwarg
    @log_cost_time
    def _post_predict(self,
                      pred_tensors,
                      show_detail=False,
                      threshold=.5) -> List[LabelOrLabels]:
        def _tensor2output(pred_tensor) -> LabelOrLabels:
            if self.multi_label:
                if show_detail:
                    logger.info(f"pred tensor")
                    logger.info(pred_tensor)
                hard_pred_tensor = apply_threshold(pred_tensor, threshold)
                label_data = [
                    int(e.numpy()) for e in n_hot2idx_tensor(hard_pred_tensor)
                ]
                return [
                    Label(name=self.id2label[label_id],
                          prob=pred_tensor[label_id])
                    for label_id in label_data
                ]
            else:
                label_id = tf.argmax(pred_tensor, axis=-1).numpy()
                label = self.id2label[label_id]
                prob = pred_tensor[label_id]
                return Label(prob=prob, name=label)

        preds = [_tensor2output(t) for t in pred_tensors]
        return preds