def testSeqtoSeqWithLuongMulAttentionParameterCount(self): filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() ml_helper = interface.ml_helper params = ml_helper.count_parameters() self.assertEqual(376254, params)
def testParameterSearchCorrectCountSearchOptions(self): interface = BasicInterface() filename = "iris_tests/full_iris_with_parameter_search.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) search_options = interface.count_search_options(greedy=True) self.assertEqual(7, search_options) search_options = interface.count_search_options(greedy=False) self.assertEqual(24, search_options)
def testParameterSearchFullRuns(self): interface = BasicInterface() filename = "iris_tests/full_iris_with_parameter_search.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) search_configuration = interface.search(greedy=False, minimize_valid_score=False) v_list = search_configuration.get_affected_variables() self.assertEqual(3, len(v_list)) self.assertIn("batch_size", v_list) self.assertIn("inner_dim", v_list) self.assertIn("dropout", v_list)
def testParameterSearchRunsNormallyUsingFirst(self): interface = BasicInterface() filename = "iris_tests/full_iris_with_parameter_search.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize() interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance) bg = interface.get_execution_component("batch_generator") self.assertEqual(10, bg.get_value_model().get_batch_size()) mlp = interface.get_execution_component("mlp") self.assertEqual(10, mlp.get_value_model().get_transform_shape()[1]) self.assertEqual(0.1, mlp.get_value_model().get_dropout_rate())
def testProfileTrain(self): interface = BasicInterface() filename = "iris_tests/full_iris.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") profile_dir = self.setup_holder.filepath_handler.get_test_data_path( "test_output/logs") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize(profile=True, log_dir=profile_dir) interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance) self.assertTrue(os.path.exists(profile_dir + "/timeline.json"))
def testIrisTrainAndEvaluate(self): interface = BasicInterface() filename = "iris_tests/full_iris.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize() interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance)
def testSeqtoSeqWithSgdLrDecease(self): filename = "full_ml_tests/toy_nmt/toy_nmt_sgd_learning_rate_decay.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() upd_component = interface.get_execution_component("upd") learning_rate_variable = upd_component.value_model.get_learning_rate() lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.1, lr) interface.train(iterations=10) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.1, lr) interface.train(iterations=1991) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.0999, lr) interface.train(iterations=1) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.0998001, lr) interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testIrisSaveAndLoad(self): interface = BasicInterface() filename = "iris_tests/full_iris_no_shuffling.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") model_filepath = self.setup_holder.filepath_handler.get_test_data_path( "stored_models/iris/iris.model") if os.path.exists(model_filepath): shutil.rmtree(model_filepath) interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize() interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance) interface.save(model_filepath) interface_2 = BasicInterface() interface_2.load_file(block_filepath) interface_2.set_variable("data_folder", data_filepath) interface_2.initialize() interface_2.load(model_filepath) performance = interface_2.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance)
def testTrainAndValidateMarginLoss(self): interface = BasicInterface() block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction_margin_loss.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.initialize() interface.train() val = interface.validate() self.assertGreater(0.25, val)
def testPredictWithoutTraining(self): interface = BasicInterface() block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.initialize() pred = interface.predict() self.assertEqual(51, len(pred)) self.assertIn("->", pred[0])
def testPredictEmbeddings(self): interface = BasicInterface() block_filename = "full_ml_tests/relation_prediction/predict_embeddings.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.initialize() embs = interface.predict() self.assertEqual(51, len(embs)) self.assertEqual(81, len(embs[0])) self.assertEqual(60, len(embs[1])) self.assertEqual(181, len(embs[0][0]))
def testSeqtoSeqWithLuongKVAttention(self): filename = "full_ml_tests/toy_nmt/toy_nmt_luong_kv_attention.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testSeqtoSeqWithLuongMulAttentionSaveAndLoad(self): filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent) model_filepath = self.setup_holder.filepath_handler.get_test_data_path( "stored_models/toy_nmt/luong") if os.path.exists(model_filepath): shutil.rmtree(model_filepath) interface.save(model_filepath) interface_2 = BasicInterface() interface_2.load_file(block_filepath) interface_2.set_variable("data_folder", data_filepath) interface_2.set_variable("embedding_folder", embedding_filepath) interface_2.initialize() interface_2.load(model_filepath) predictions = interface_2.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testBasicSeqtoSeqPerplexity(self): filename = "full_ml_tests/toy_nmt/basic_toy_nmt_with_perplexity.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() initial_validation = interface.validate() interface.train() second_validation = interface.validate() self.assertLess(50, initial_validation) self.assertLess(1, second_validation) self.assertGreater(1.01, second_validation)
def testSeqtoSeqWithUnkReplacement(self): filename = "full_ml_tests/toy_nmt/toy_nmt_replace_unks.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.add_console_logger({"formatting": ["token_replacement"]}) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) unk_count = 0 missed_count = 0 for i, s in enumerate(gold_sentences): gold_tokens = s.split(" ") for j, t in enumerate(gold_tokens): if predictions[i][j] != "UNK": if predictions[i][j] != t: missed_count += 1 else: unk_count += 1 self.assertEqual(0, unk_count) self.assertGreater(10, missed_count)
def testParameterSearchCanApply(self): interface = BasicInterface() filename = "iris_tests/full_iris_with_parameter_search.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) search_configuration = interface.search(greedy=True, minimize_valid_score=False) interface.apply_search_configuration(search_configuration) interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance)
def testLoggerSavesLog(self): interface = BasicInterface() filename = "iris_tests/full_iris.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_block_path( "iris_tests") logger_filepath = self.setup_holder.filepath_handler.get_test_data_path( "test_output/logs/iris/log.txt") log_dir_filepath = self.setup_holder.filepath_handler.get_test_data_path( "test_output/logs/iris/") if os.path.exists(log_dir_filepath): shutil.rmtree(log_dir_filepath) interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize() logger_config = { "training": ["status", "loss", "parameters"], "validation": ["all"] } interface.add_file_logger(logger_config, logger_filepath) interface.train() performance = interface.evaluate() self.assertGreaterEqual(1.0, performance) self.assertLess(0.9, performance) self.assertTrue(os.path.exists(logger_filepath))
def testReverseSequenceTaskWithCombinedVocab(self): filename = "full_ml_tests/pointer_network/pointer_plus_decoder.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/reverse_sequence_plus_vocab/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
from Mindblocks.interface import BasicInterface interface = BasicInterface() block_filepath = "blocks/graph_free/deep_elmo_relation_prediction.block" embedding_filepath = "data/glove.6B/glove.6B.100d.txt" data_filepath = "data/webquestions_small_valid" interface.load_file(block_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.set_variable("data_folder", data_filepath) interface.initialize() def print_test(): first = True with open("output.txt", 'w') as output_file: for line in interface.predict(): if first: first = False else: print("", file=output_file) if line is not None: print("\t".join([line[0], line[2], line[3]]), file=output_file, end="") else: print(line, file=output_file, end="") def print_debug(): first = True
from Mindblocks.interface import BasicInterface controller = BasicInterface() controller.make_gui()