def test_textdata_only_tfidf(self): df_train, df_dev, df_test, metadata = get_fake_dataset(with_text_col=True, text_only=True) text_config = Mapping() text_config.mode = 'tfidf' text_config.max_words = 20 encoder = Encoder(metadata, text_config=text_config) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) model_config = get_fake_modelconfig('tmp/outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'tfidf_text_only') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = NeuralNetworkModel(text_config, model_config) output = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) # print(hist.history) # y_dev, X_dev_struc, X_dev_text) val_metric_true = 0.0 self.assertTrue(np.isclose(val_metric_true, output['val_metric']))
def test_lstm(self): df_train, df_dev, df_test, metadata = get_fake_dataset(with_text_col=True) glove_file_path = 'resource/glove/glove.6B.50d.txt'# need be changed to where you store the pre-trained GloVe file. text_config = Mapping() text_config.mode = 'glove' text_config.max_words = 20 text_config.maxlen = 5 text_config.embedding_dim = 50 text_config.embeddings_index = open_glove(glove_file_path) # need to change encoder = Encoder(metadata, text_config=text_config) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) text_config.embedding_matrix = encoder.text_config.embedding_matrix model_config = get_fake_modelconfig('tmp/outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'lstm') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = NeuralNetworkModel(text_config, model_config) output = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) # print(hist.history) # y_dev, X_dev_struc, X_dev_text) val_metric_true = 0.0 self.assertTrue(np.isclose(val_metric_true, output['val_metric'], atol=1e-4))
def test_strucdata_only(self): df_train, df_dev, df_test, metadata = get_fake_dataset(with_text_col=False) encoder = Encoder(metadata, text_config=None) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) model_config = get_fake_modelconfig('tmp/outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'dense_mlp') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = NeuralNetworkModel(text_config=None, model_config=model_config) output = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) # print(hist.history) # y_dev, X_dev_struc, X_dev_text) val_metric_true = 0.0 self.assertTrue(np.isclose(val_metric_true, output['val_metric'], atol=1e-2))