def test_lstm(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(with_text_col=True)

        glove_file_path = 'resource/glove/glove.6B.50d.txt'# need be changed to where you store the pre-trained GloVe file.
        
        text_config = Mapping()
        text_config.mode = 'glove'
        text_config.max_words = 20
        text_config.maxlen = 5
        text_config.embedding_dim = 50
        text_config.embeddings_index = open_glove(glove_file_path) # need to change

        encoder = Encoder(metadata, text_config=text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        text_config.embedding_matrix = encoder.text_config.embedding_matrix

        model_config = get_fake_modelconfig('tmp/outputs_test')
        model_config.output_dir = os.path.join(model_config.output_dir, 'lstm')
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)

        model = NeuralNetworkModel(text_config, model_config)
        output = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text)

        # print(hist.history)
        # y_dev, X_dev_struc, X_dev_text)

        val_metric_true = 0.0
        self.assertTrue(np.isclose(val_metric_true, output['val_metric'], atol=1e-4))
    def test_textdata_only_tfidf(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(with_text_col=True, text_only=True)
        
        text_config = Mapping()
        text_config.mode = 'tfidf'
        text_config.max_words = 20

        encoder = Encoder(metadata, text_config=text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        model_config = get_fake_modelconfig('tmp/outputs_test')
        model_config.output_dir = os.path.join(model_config.output_dir, 'tfidf_text_only')
        if not os.path.exists(model_config.output_dir):
            os.makedirs(model_config.output_dir)

        model = NeuralNetworkModel(text_config, model_config)
        output = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text)

        # print(hist.history)
        # y_dev, X_dev_struc, X_dev_text)

        val_metric_true = 0.0
        self.assertTrue(np.isclose(val_metric_true, output['val_metric']))
def get_fake_linear_regression_modelconfig(output_path):
    model_config = Mapping()
    model_config.task_type = 'regression' ## 'classification' or 'regression'
    model_config.num_classes = 3 ## number of classes or number of outputs
    model_config.model_type = 'linear_regression' ## default is 'mlp', can be 'skip_connections'
    model_config.output_dir = output_path
    # model_config.C = 0.1
    return model_config
def get_fake_rf_modelconfig(output_path):
    model_config = Mapping()
    model_config.task_type = 'classification' ## 'classification' or 'regression'
    model_config.num_classes = 3 ## number of classes or number of outputs
    model_config.model_type = 'random_forest' ## default is 'mlp', can be 'skip_connections'
    model_config.output_dir = output_path
    model_config.n_trees = 4
    return model_config
    def test_word_embedding(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(
            with_text_col=True)

        glove_file_path = 'resource/glove/glove.6B.50d.txt'  # need be changed to where you store the pre-trained GloVe file.

        text_config = Mapping()
        text_config.mode = 'glove'
        text_config.max_words = 20
        text_config.maxlen = 5
        text_config.embedding_dim = 50
        text_config.embeddings_index = open_glove(glove_file_path)

        encoder = Encoder(metadata, text_config=text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        X_train_text_true = np.array([[9, 10, 11, 2, 3], [15, 16, 17, 18, 19],
                                      [1, 2, 1, 1, 3]])
        X_train_struc_true = np.array([[-1.22474487, 1., 0., 0.],
                                       [0., 0., 1., 0.],
                                       [1.22474487, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_train_text_true, X_train_text).all())
        self.assertTrue(np.isclose(X_train_struc_true, X_train_struc).all())
        X_dev_text_true = np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 0],
                                    [1, 1, 1, 1, 1]])
        X_dev_struc_true = np.array([[2.44948974, 0., 1., 0.],
                                     [6.12372436, 0., 1., 0.],
                                     [3.67423461, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_dev_text_true, X_dev_text).all())
        self.assertTrue(np.isclose(X_dev_struc_true, X_dev_struc).all())
        X_test_text_true = np.array([[14, 4, 1, 1, 1], [1, 1, 1, 1, 1],
                                     [1, 1, 1, 1, 1]])
        X_test_struc_true = np.array([[0., 1., 0., 0.],
                                      [3.67423461, 0., 0., 1.],
                                      [1.22474487, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_test_text_true, X_test_text).all())
        self.assertTrue(np.isclose(X_test_struc_true, X_test_struc).all())
def get_fake_modelconfig(output_path):
    model_config = Mapping()
    model_config.task_type = 'classification' ## 'classification' or 'regression'
    model_config.num_classes = 3 ## number of classes or number of outputs
    model_config.combine = 'concate' ## or 'attention'
    model_config.model_type = 'mlp' ## default is 'mlp', can be 'skip_connections'
    model_config.n_layers_dense = 2
    model_config.hidden_size_dense = 16
    model_config.n_layers_lstm = 2
    model_config.hidden_size_lstm = 32
    model_config.dropout_rate_lstm = 0.0
    model_config.n_layers_output = 2
    model_config.hidden_size_output = 32
    model_config.optimizer = 'adam' ## 'adam', 'sgd', 'rmsprop'
    model_config.learning_rate = 0.01
    model_config.clipnorm = 5.0
    model_config.patience = 5
    model_config.output_dir = output_path
    model_config.n_epochs = 10
    model_config.batch_size = 1
    model_config.verbose = 0
    return model_config
    def test_tfidf(self):
        df_train, df_dev, df_test, metadata = get_fake_dataset(
            with_text_col=True)

        text_config = Mapping()
        text_config.mode = 'tfidf'
        text_config.max_words = 20
        print('*' * 20)
        print(text_config.mode)

        encoder = Encoder(metadata, text_config=text_config)
        y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train)
        y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev)
        y_test, X_test_struc, X_test_text = encoder.transform(df_test)

        X_train_text_true = np.array([[
            0., 0.69314718, 0.69314718, 0., 0.91629073, 0.91629073, 0.91629073,
            0.91629073, 0.91629073, 0.91629073, 0.91629073, 0., 0., 0., 0., 0.,
            0., 0., 0., 0.
        ],
                                      [
                                          0., 0., 0., 1.55141507, 0., 0., 0.,
                                          0., 0., 0., 0., 0.91629073,
                                          0.91629073, 0.91629073, 0.91629073,
                                          0.91629073, 0.91629073, 0.91629073,
                                          0.91629073, 0.
                                      ],
                                      [
                                          0., 0.69314718, 0.69314718, 0., 0.,
                                          0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                          0., 0., 0., 0., 0., 0.91629073
                                      ]])
        X_train_struc_true = np.array([[-1.22474487, 1., 0., 0.],
                                       [0., 0., 1., 0.],
                                       [1.22474487, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_train_text_true, X_train_text).all())
        self.assertTrue(np.isclose(X_train_struc_true, X_train_struc).all())
        X_dev_text_true = np.array([[
            0., 0., 0., 0.91629073, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 0.
        ],
                                    [
                                        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
                                    ],
                                    [
                                        0., 0., 0., 0., 0., 0., 0., 0.,
                                        0.91629073, 0.91629073, 0.91629073, 0.,
                                        0., 0., 0., 0., 0., 0., 0., 0.
                                    ]])
        X_dev_struc_true = np.array([[2.44948974, 0., 1., 0.],
                                     [6.12372436, 0., 1., 0.],
                                     [3.67423461, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_dev_text_true, X_dev_text).all())
        self.assertTrue(np.isclose(X_dev_struc_true, X_dev_struc).all())

        X_test_text_true = np.array([[
            0., 0., 0., 1.55141507, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
            0.91629073, 0., 0., 0., 0., 0., 0.
        ],
                                     [
                                         0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                         0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                         0., 0.
                                     ],
                                     [
                                         0., 0., 0., 0.91629073, 0., 0., 0.,
                                         0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                         0., 0., 0., 0.
                                     ]])
        X_test_struc_true = np.array([[0., 1., 0., 0.],
                                      [3.67423461, 0., 0., 1.],
                                      [1.22474487, 0., 0., 1.]])
        self.assertTrue(np.isclose(X_test_text_true, X_test_text).all())
        self.assertTrue(np.isclose(X_test_struc_true, X_test_struc).all())
示例#8
0
 def __init__(self, text_config, model_config):
     self.text_config = Mapping(text_config)
     self.model_config = Mapping(model_config)