Exemplo n.º 1
0
    checkpointer = ModelCheckpoint(filepath=best_model(),
                                   monitor='val.macro_mae',
                                   mode="min",
                                   verbose=1,
                                   save_best_only=True)

_callbacks = []
_callbacks.append(metrics_callback)
_callbacks.append(plotting)
_callbacks.append(weights)

############################################################################
# APPLY CLASS WEIGHTS
############################################################################
if TASK == "BD":
    class_weights = get_class_weights2(training[1], smooth_factor=0)
else:
    class_weights = get_class_weights2(onehot_to_categories(training[1]),
                                       smooth_factor=0.1)

print("Class weights:",
      {cat_to_class_mapping[c]: w
       for c, w in class_weights.items()})

history = nn_model.fit(training[0],
                       training[1],
                       validation_data=(validation[0],
                                        validation[1]) if not FINAL else
                       (testing[0], testing[1]),
                       nb_epoch=50,
                       batch_size=64,
Exemplo n.º 2
0
    def run(self,
            train,
            test,
            features=None,
            test_features=None,
            extra_train=None,
            callbacks=True):
        self.tokenizer.fit_on_texts(train.text.values)

        features_dim = features.shape[1] if features is not None else None

        X_train, Y_train = self.get_features_targets(train)
        X_test, Y_test = self.get_features_targets(
            test, features_dim=X_train.shape[1])

        if extra_train is not None:
            self.tokenizer.fit_on_texts(extra_train.text.values)
            X_extra_train, Y_extra_train \
                = self.get_features_targets(extra_train)
            if X_extra_train.shape[1] > X_train.shape[1]:
                X_train = pad_sequences(X_train, maxlen=X_extra_train.shape[1])
                X_test = pad_sequences(X_test, maxlen=X_extra_train.shape[1])

        vocab_size = len(self.tokenizer.word_index) + 1

        class_count = 3 if self.ternary else 2

        embedding_matrix = None

        if self.use_embeddings:
            embedding_manager = EmbeddingManager()
            embedding_matrix = embedding_manager.get_embedding_matrix(
                self.tokenizer.word_index, self.embedding_dim)

        base_model_params = {
            'input_dim': X_train.shape[1],
            'class_count': class_count,
            'features_dim': features_dim,
            'dropout': self.dropout
        }

        if self.model_type == "elmo":
            params = {
                **base_model_params, 'index_word': self.tokenizer.index_word
            }
            self.model = ElmoModel().compile(**params)
        elif self.model_type == "bid_attent":
            params = {
                **base_model_params, 'vocab_size': vocab_size,
                'embedding_matrix': embedding_matrix,
                'embedding_dim': self.embedding_dim
            }
            self.model = BidirectionalAttention().compile(**params)
        else:
            params = {
                **base_model_params, 'vocab_size': vocab_size,
                'embedding_matrix': embedding_matrix,
                'embedding_dim': self.embedding_dim
            }
            self.model = BaselineWithFeatures().compile(**params)

        self.logger.setup(ternary=self.ternary,
                          embeddings=self.use_embeddings,
                          train_set=X_train,
                          test_set=X_test,
                          vocab_size=vocab_size,
                          epochs=self.epochs,
                          batch_size=self.batch_size,
                          dropout=self.dropout,
                          extra_train=extra_train is not None)

        self.model.summary(print_fn=self.logger.write)

        fit_params = {
            'batch_size':
            self.batch_size,
            'callbacks':
            self.get_callbacks() if callbacks else [],
            'epochs':
            self.epochs,
            'validation_split':
            self.validation_split,
            'verbose':
            1,
            'class_weight':
            get_class_weights2(onehot_to_categories(Y_train), smooth_factor=0)
        }

        if extra_train is not None:
            training = self.model.fit(X_extra_train, Y_extra_train,
                                      **fit_params)
            self.logger.write_history(training)

        train_input = [X_train, features] if features is not None else X_train
        test_input = [X_test, test_features
                      ] if features is not None else X_test

        training = self.model.fit(train_input, Y_train, **fit_params)
        pred_classes = self.model.predict(test_input, verbose=1).argmax(axis=1)

        self.logger.write_history(training)
        self.print_results(pred_classes, Y_test, class_count=class_count)
        self.save_output_for_scoring(test.tweet_id, pred_classes)
Exemplo n.º 3
0
    plot_model(nn_model,
               show_layer_names=True,
               show_shapes=True,
               to_file="taskimage.png")
    print(nn_model.summary())
    #exit()

    classes = ['positive', 'negative', 'neutral']
    class_to_cat_mapping = get_labels_to_categories_map(classes)
    cat_to_class_mapping = {
        v: k
        for k, v in get_labels_to_categories_map(classes).items()
    }

    _datasets = {}
    _datasets["1-train"] = training
    _datasets["2-val"] = testing

    class_weights = get_class_weights2(onehot_to_categories(training[1]))
    # print(training[0])
    print("Class weights:",
          {cat_to_class_mapping[c]: w
           for c, w in class_weights.items()})
    # print(training[0].shape,training[1].shape)
    history = nn_model.fit(training[0],
                           training[1],
                           validation_data=testing,
                           epochs=20,
                           batch_size=50,
                           class_weight=class_weights)
Exemplo n.º 4
0
                 HP_FINAL_CELLS],  #  , HP_DROP_REP, HP_DROP_OUT],
        metrics=[
            hp.Metric(METRIC_RECALL, display_name='Recall'),
            hp.Metric(METRIC_ACCURACY, display_name='Accuracy'),
            hp.Metric(EPOCHS, display_name='Epochs')
        ])

########################################################################################################################
# Validation settings #
########################################################################################################################
class_weights = None
if TASK == "acp":
    # Need to get index from one hot vector representation
    lab_to_cat = {'negative': 0, 'mixed': 1, 'positive': 2}
    cat_to_class_mapping = {v: k for k, v in lab_to_cat.items()}
    class_weights = get_class_weights2(
        [list(elem).index(1) for elem in y_train], smooth_factor=0.1)
    print("Class weights:",
          {cat_to_class_mapping[c]: w
           for c, w in class_weights.items()})
    class_weights = {
        i: class_weights[w]
        for i, w in enumerate(class_weights.keys())
    }


def validation_score_model(hparams, n_run):
    nn_model = model(
        embeddings,
        text_max_length,
        target_max_length,
        len(classes),