def init_model(self, model_config): model_name = model_config["model_name"] model_str = model_config["model_str"] model_dir = model_config["model_dir"] FLAGS = namespace_utils.load_namespace(os.path.join(self.model_config_path, model_name+".json")) if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() FLAGS.token_emb_mat = self.embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = self.embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = self.embedding_mat.shape[1] FLAGS.extra_symbol = self.extral_symbol model.build_placeholder(FLAGS) model.build_op() model.init_step() model.load_model(model_dir, model_str) return model
def init_model(self, model_config): model_name = model_config["model_name"] model_str = model_config["model_str"] model_dir = model_config["model_dir"] model_config_path = model_config["model_config_path"] FLAGS = namespace_utils.load_namespace( os.path.join(model_config_path, model_name + ".json")) if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() vocab_path = model_config["vocab_path"] if sys.version_info < (3, ): embedding_info = pkl.load(open(os.path.join(vocab_path), "rb")) else: embedding_info = pkl.load(open(os.path.join(vocab_path), "rb"), encoding="iso-8859-1") if FLAGS.apply_elmo: FLAGS.elmo_token_emb_mat = embedding_info["elmo"] FLAGS.elmo_vocab_size = embedding_info["elmo"].shape[0] FLAGS.elmo_emb_size = embedding_info["elmo"].shape[1] print(len(embedding_info["token2id"])) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol model.build_placeholder(FLAGS) model.build_op() model.init_step() model.load_model(model_dir, model_str) return model, token2id
def init_model(self, model_config): model_name = model_config["model_name"] model_str = model_config["model_str"] model_dir = model_config["model_dir"] model_config_path = model_config["model_config_path"] vocab_path = model_config["vocab_path"] if sys.version_info < (3, ): embedding_info = pkl.load(open(os.path.join(vocab_path), "rb")) else: embedding_info = pkl.load(open(os.path.join(vocab_path), "rb"), encoding="iso-8859-1") if model_config.get("label_emb_path", None): label_emb_mat = pkl.load(open(label_emb_path, "rb")) else: label_emb_mat = None token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS = namespace_utils.load_namespace( os.path.join(model_config_path, model_name + ".json")) if FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() elif FLAGS.scope == "ReAugument": model = ReAugument() FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol FLAGS.class_emb_mat = label_emb_mat model.build_placeholder(FLAGS) model.build_op() model.init_step() model.load_model(model_dir, model_str) return model, token2id
def train(FLAGS): os.environ["CUDA_VISIBLE_DEVICES"] = "1" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, gpu_options=gpu_options) sess = tf.Session(config=session_conf) print("started session") with sess.as_default(): if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() model.build_placeholder(FLAGS) model.build_op() model.init_step(sess) for epoch in range(FLAGS.max_epochs): train_data = get_batch_data.get_batches(train_anchor, train_check, train_label, FLAGS.batch_size, token2id, is_training=True) for corpus in train_data: anchor, check, label = corpus [loss, _, global_step, accuracy, preds] = model.step(sess, [anchor, check, label], is_training=True) print(accuracy, loss)
def test(config): model_config_path = config["model_config_path"] FLAGS = namespace_utils.load_namespace(model_config_path) os.environ["CUDA_VISIBLE_DEVICES"] = config.get("gpu_id", "") w2v_path = config["w2v_path"] vocab_path = config["vocab_path"] test_path = config["test_path"] model_dir = config["model_dir"] model_str = config["model_str"] model_name = config["model"] if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(os.path.join(model_dir, model_name)): os.mkdir(os.path.join(model_dir, model_name)) if not os.path.exists(os.path.join(model_dir, model_name, "logs")): os.mkdir(os.path.join(model_dir, model_name, "logs")) if not os.path.exists(os.path.join(model_dir, model_name, "models")): os.mkdir(os.path.join(model_dir, model_name, "models")) [ test_anchor, test_check, test_label, test_anchor_len, test_check_len, embedding_info ] = prepare_data(test_path, w2v_path, vocab_path, make_vocab=False) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() model.build_placeholder(FLAGS) model.build_op() model.init_step() model.load_model(os.path.join(model_dir, model_name, "models"), model_str) test_data = get_batch_data.get_batches(test_anchor, test_check, test_label, FLAGS.batch_size, token2id, is_training=False) test_loss, test_accuracy = 0, 0 cnt = 0 for index, corpus in enumerate(test_data): anchor, check, label = corpus try: [loss, logits, pred_probs, accuracy] = model.infer([anchor, check, label], mode="test", is_training=False) test_loss += loss * anchor.shape[0] test_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue test_loss /= float(cnt) test_accuracy /= float(cnt) print(test_loss, test_accuracy)
def train(config): model_config_path = config["model_config_path"] FLAGS = namespace_utils.load_namespace(model_config_path) os.environ["CUDA_VISIBLE_DEVICES"] = config.get("gpu_id", "") train_path = config["train_path"] w2v_path = config["w2v_path"] vocab_path = config["vocab_path"] dev_path = config["dev_path"] model_dir = config["model_dir"] model_name = config["model"] if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(os.path.join(model_dir, model_name)): os.mkdir(os.path.join(model_dir, model_name)) if not os.path.exists(os.path.join(model_dir, model_name, "logs")): os.mkdir(os.path.join(model_dir, model_name, "logs")) if not os.path.exists(os.path.join(model_dir, model_name, "models")): os.mkdir(os.path.join(model_dir, model_name, "models")) logger = logger_utils.get_logger( os.path.join(model_dir, model_name, "logs", "log.info")) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) # with tf.Graph().as_default(): # session_conf = tf.ConfigProto( # allow_soft_placement=True, # log_device_placement=True, # gpu_options=gpu_options) # sess = tf.Session(config=session_conf) [ train_anchor, train_check, train_label, train_anchor_len, train_check_len, embedding_info ] = prepare_data(train_path, w2v_path, vocab_path, make_vocab=True) [ dev_anchor, dev_check, dev_label, dev_anchor_len, dev_check_len, embedding_info ] = prepare_data(dev_path, w2v_path, vocab_path, make_vocab=False) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() model.build_placeholder(FLAGS) model.build_op() model.init_step() best_dev_accuracy, best_dev_loss = 0, 100 cnt = 0 for epoch in range(FLAGS.max_epochs): train_loss, train_accuracy = 0, 0 train_data = get_batch_data.get_batches(train_anchor, train_check, train_label, FLAGS.batch_size, token2id, is_training=True) cnt = 0 for index, corpus in enumerate(train_data): anchor, check, label = corpus try: [loss, _, global_step, accuracy, preds] = model.step([anchor, check, label], is_training=True) train_loss += loss * anchor.shape[0] train_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue train_loss /= float(cnt) train_accuracy /= float(cnt) info = OrderedDict() info["epoch"] = str(epoch) info["train_loss"] = str(train_loss) info["train_accuracy"] = str(accuracy) logger.info("epoch\t{}\ttrain\tloss\t{}\taccuracy\t{}".format( epoch, train_loss, train_accuracy)) dev_data = get_batch_data.get_batches(dev_anchor, dev_check, dev_label, FLAGS.batch_size, token2id, is_training=False) dev_loss, dev_accuracy = 0, 0 cnt = 0 for index, corpus in enumerate(dev_data): anchor, check, label = corpus try: [loss, logits, pred_probs, accuracy] = model.infer([anchor, check, label], mode="test", is_training=False) dev_loss += loss * anchor.shape[0] dev_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue dev_loss /= float(cnt) dev_accuracy /= float(cnt) info["dev_loss"] = str(dev_loss) info["dev_accuracy"] = str(dev_accuracy) logger.info("epoch\t{}\tdev\tloss\t{}\taccuracy\t{}".format( epoch, dev_loss, dev_accuracy)) if dev_accuracy > best_dev_accuracy or dev_loss < best_dev_loss: timestamp = str(int(time.time())) model.save_model( os.path.join(model_dir, model_name, "models"), model_name + "_{}_{}_{}".format(timestamp, dev_loss, dev_accuracy)) best_dev_accuracy = dev_accuracy best_dev_loss = dev_loss info["best_dev_loss"] = str(dev_loss) info["best_accuracy"] = str(dev_accuracy) logger_utils.json_info( os.path.join(model_dir, model_name, "logs", "info.json"), info) logger.info( "epoch\t{}\tbest_dev\tloss\t{}\tbest_accuracy\t{}".format( epoch, dev_loss, dev_accuracy))
def train(config): model_config_path = config["model_config_path"] FLAGS = namespace_utils.load_namespace(model_config_path) os.environ["CUDA_VISIBLE_DEVICES"] = config.get("gpu_id", "") train_path = config["train_path"] w2v_path = config["w2v_path"] vocab_path = config["vocab_path"] dev_path = config["dev_path"] elmo_w2v_path = config.get("elmo_w2v_path", None) model_dir = config["model_dir"] model_name = config["model"] model_dir = config["model_dir"] try: model_name = FLAGS["output_folder_name"] except: model_name = config["model"] print(model_name, "====model name====") if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(os.path.join(model_dir, model_name)): os.mkdir(os.path.join(model_dir, model_name)) if not os.path.exists(os.path.join(model_dir, model_name, "logs")): os.mkdir(os.path.join(model_dir, model_name, "logs")) if not os.path.exists(os.path.join(model_dir, model_name, "models")): os.mkdir(os.path.join(model_dir, model_name, "models")) logger = logger_utils.get_logger(os.path.join(model_dir, model_name, "logs","log.info")) FLAGS.vocab_path = vocab_path json.dump(FLAGS, open(os.path.join(model_dir, model_name, "logs", model_name+".json"), "w")) [train_anchor, train_check, train_label, train_anchor_len, train_check_len, embedding_info] = prepare_data(train_path, w2v_path, vocab_path, make_vocab=True, elmo_w2v_path=elmo_w2v_path, elmo_pca=FLAGS.elmo_pca, data_type=config["data_type"]) [dev_anchor, dev_check, dev_label, dev_anchor_len, dev_check_len, embedding_info] = prepare_data(dev_path, w2v_path, vocab_path, make_vocab=False, elmo_w2v_path=elmo_w2v_path, elmo_pca=FLAGS.elmo_pca, data_type=config["data_type"]) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol logger.info("==vocab size {}".format(len(token2id))) logger.info("vocab path {}".format(vocab_path)) if FLAGS.apply_elmo: FLAGS.elmo_token_emb_mat = embedding_info["elmo"] FLAGS.elmo_vocab_size = embedding_info["elmo"].shape[0] FLAGS.elmo_emb_size = embedding_info["elmo"].shape[1] if FLAGS.scope == "BiMPM": model = BiMPM() elif FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() elif FLAGS.scope == "DRCN": model = DRCN() elif FLAGS.scope == "RepresentationModel": model = RepresentationModel() elif FLAGS.scope == "MAN": model = MAN() elif FLAGS.scope == "HardAttention": model = HardAttention() model.build_placeholder(FLAGS) model.build_op() model.init_step() print("========begin to train=========") best_dev_accuracy, best_dev_loss, best_dev_f1 = 0, 100, 0 learning_rate = FLAGS.learning_rate cnt = 0 toleration_cnt = 0 toleration = 10 for epoch in range(FLAGS.max_epochs): train_loss, train_accuracy = 0, 0 train_data = get_batch_data.get_batches(train_anchor, train_check, train_label, FLAGS.batch_size, token2id, is_training=True) cnt = 0 train_accuracy_score, train_precision_score, train_recall_score = 0, 0 ,0 train_label_lst, train_true_lst = [], [] for index, corpus in enumerate(train_data): anchor, check, label = corpus try: [loss, _, global_step, accuracy, preds] = model.step( [anchor, check, label], is_training=True, learning_rate=learning_rate) train_label_lst += np.argmax(preds, axis=-1).tolist() train_true_lst += label.tolist() train_loss += loss*anchor.shape[0] train_accuracy += accuracy*anchor.shape[0] cnt += anchor.shape[0] except: continue train_loss /= float(cnt) train_accuracy = accuracy_score(train_true_lst, train_label_lst) train_recall = recall_score(train_true_lst, train_label_lst, average='macro') train_precision = precision_score(train_true_lst, train_label_lst, average='macro') train_f1 = f1_score(train_true_lst, train_label_lst, average='macro') # [train_precision, # train_recall, # train_f1] = evaluate(train_label_lst, train_true_lst, 1) info = OrderedDict() info["epoch"] = str(epoch) info["train_loss"] = str(train_loss) info["train_accuracy"] = str(train_accuracy) info["train_f1"] = str(train_f1) logger.info("epoch\t{}\ttrain\tloss\t{}\taccuracy\t{}\tf1\t{}".format(epoch, train_loss, train_accuracy, train_f1)) dev_data = get_batch_data.get_batches(dev_anchor, dev_check, dev_label, FLAGS.batch_size, token2id, is_training=False) dev_loss, dev_accuracy = 0, 0 cnt = 0 dev_label_lst, dev_true_lst = [], [] for index, corpus in enumerate(dev_data): anchor, check, label = corpus try: [loss, logits, pred_probs, accuracy] = model.infer( [anchor, check, label], mode="test", is_training=False, learning_rate=learning_rate) dev_label_lst += np.argmax(pred_probs, axis=-1).tolist() dev_true_lst += label.tolist() dev_loss += loss*anchor.shape[0] dev_accuracy += accuracy*anchor.shape[0] cnt += anchor.shape[0] except: continue dev_loss /= float(cnt) dev_accuracy = accuracy_score(dev_true_lst, dev_label_lst) dev_recall = recall_score(dev_true_lst, dev_label_lst, average='macro') dev_precision = precision_score(dev_true_lst, dev_label_lst, average='macro') dev_f1 = f1_score(dev_true_lst, dev_label_lst, average='macro') # [dev_precision, # dev_recall, # dev_f1] = evaluate(dev_label_lst, dev_true_lst, 1) info["dev_loss"] = str(dev_loss) info["dev_accuracy"] = str(dev_accuracy) info["dev_f1"] = str(dev_f1) logger.info("epoch\t{}\tdev\tloss\t{}\taccuracy\t{}\tf1\t{}".format(epoch, dev_loss, dev_accuracy, dev_f1)) if dev_f1 > best_dev_f1 or dev_loss < best_dev_loss: timestamp = str(int(time.time())) model.save_model(os.path.join(model_dir, model_name, "models"), model_name+"_{}_{}_{}".format(timestamp, dev_loss, dev_f1)) best_dev_f1 = dev_f1 best_dev_loss = dev_loss toleration_cnt = 0 info["best_dev_loss"] = str(dev_loss) info["dev_f1"] = str(dev_f1) logger_utils.json_info(os.path.join(model_dir, model_name, "logs", "info.json"), info) logger.info("epoch\t{}\tbest_dev\tloss\t{}\tbest_accuracy\t{}\tbest_f1\t{}".format(epoch, dev_loss, dev_accuracy, best_dev_f1)) else: toleration_cnt += 1 if toleration_cnt == toleration: toleration_cnt = 0 learning_rate *= 0.9
def train(config): model_config_path = config["model_config_path"] FLAGS = namespace_utils.load_namespace(model_config_path) os.environ["CUDA_VISIBLE_DEVICES"] = config.get("gpu_id", "") train_path = config["train_path"] w2v_path = config["w2v_path"] vocab_path = config["vocab_path"] dev_path = config["dev_path"] elmo_w2v_path = config.get("elmo_w2v_path", None) label_emb_path = config.get("label_emb_path", None) if label_emb_path: import pickle as pkl label_emb_mat = pkl.load(open(label_emb_path, "rb")) model_dir = config["model_dir"] try: model_name = FLAGS["output_folder_name"] except: model_name = config["model"] print(model_name, "====model name====") if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(os.path.join(model_dir, model_name)): os.mkdir(os.path.join(model_dir, model_name)) if not os.path.exists(os.path.join(model_dir, model_name, "logs")): os.mkdir(os.path.join(model_dir, model_name, "logs")) if not os.path.exists(os.path.join(model_dir, model_name, "models")): os.mkdir(os.path.join(model_dir, model_name, "models")) json.dump( FLAGS, open(os.path.join(model_dir, model_name, "logs", model_name + ".json"), "w")) logger = logger_utils.get_logger( os.path.join(model_dir, model_name, "logs", "log.info")) [ train_anchor, train_check, train_label, train_anchor_len, train_check_len, embedding_info ] = prepare_data(train_path, w2v_path, vocab_path, make_vocab=True, elmo_w2v_path=elmo_w2v_path, elmo_pca=FLAGS.elmo_pca, emb_idf=config.emb_idf) [ dev_anchor, dev_check, dev_label, dev_anchor_len, dev_check_len, embedding_info ] = prepare_data(dev_path, w2v_path, vocab_path, make_vocab=False, elmo_w2v_path=elmo_w2v_path, elmo_pca=FLAGS.elmo_pca, emb_idf=config.emb_idf) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] if config.emb_idf: idf_emb_mat = embedding_info["idf_matrix"] FLAGS.idf_emb_mat = idf_emb_mat FLAGS.with_idf = True FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol FLAGS.class_emb_mat = label_emb_mat if FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() elif FLAGS.scope == "Capsule": model = Capsule() elif FLAGS.scope == "LabelNetwork": model = LabelNetwork() elif FLAGS.scope == "LEAM": model = LEAM() elif FLAGS.scope == "SWEM": model = SWEM() elif FLAGS.scope == "TextCNN": model = TextCNN() elif FLAGS.scope == "DeepPyramid": model = DeepPyramid() elif FLAGS.scope == "ReAugument": model = ReAugument() if FLAGS.scope in ["Capsule", "DeepPyramid"]: max_anchor_len = FLAGS.max_length max_check_len = 1 if_max_anchor_len = True, if_max_check_len = True else: max_anchor_len = FLAGS.max_length max_check_len = 1 if_max_anchor_len = False if_max_check_len = False model.build_placeholder(FLAGS) model.build_op() model.init_step() best_dev_f1 = 0.0 best_dev_loss = 100.0 learning_rate = FLAGS.learning_rate toleration = 1000 toleration_cnt = 0 print("=======begin to train=========") for epoch in range(FLAGS.max_epochs): train_loss, train_accuracy = 0, 0 train_data = get_batch_data.get_batches( train_anchor, train_check, train_label, FLAGS.batch_size, token2id, is_training=True, max_anchor_len=max_anchor_len, if_max_anchor_len=if_max_anchor_len, max_check_len=max_check_len, if_max_check_len=if_max_check_len) nan_data = [] cnt = 0 train_accuracy_score, train_precision_score, train_recall_score = 0, 0, 0 train_label_lst, train_true_lst = [], [] for index, corpus in enumerate(train_data): anchor, entity, label = corpus assert entity.shape[-1] == 1 try: [loss, _, global_step, accuracy, preds] = model.step([anchor, entity, label], is_training=True, learning_rate=learning_rate) import math if math.isnan(loss): print(anchor, entity, label, loss, "===nan loss===") break train_label_lst += np.argmax(preds, axis=-1).tolist() train_true_lst += label.tolist() train_loss += loss * anchor.shape[0] train_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue train_loss /= float(cnt) train_accuracy = accuracy_score(train_true_lst, train_label_lst) train_recall = recall_score(train_true_lst, train_label_lst, average='macro') train_precision = precision_score(train_true_lst, train_label_lst, average='macro') train_f1 = f1_score(train_true_lst, train_label_lst, average='macro') info = OrderedDict() info["epoch"] = str(epoch) info["train_loss"] = str(train_loss) info["train_accuracy"] = str(train_accuracy) info["train_f1"] = str(train_f1) logger.info("epoch\t{}\ttrain\tloss\t{}\taccuracy\t{}\tf1\t{}".format( epoch, train_loss, train_accuracy, train_f1)) dev_data = get_batch_data.get_batches( dev_anchor, dev_check, dev_label, FLAGS.batch_size, token2id, is_training=False, max_anchor_len=max_anchor_len, if_max_anchor_len=if_max_anchor_len, max_check_len=max_check_len, if_max_check_len=if_max_check_len) dev_loss, dev_accuracy = 0, 0 cnt = 0 dev_label_lst, dev_true_lst = [], [] for index, corpus in enumerate(dev_data): anchor, entity, label = corpus try: [loss, logits, pred_probs, accuracy] = model.infer([anchor, entity, label], mode="test", is_training=False, learning_rate=learning_rate) dev_label_lst += np.argmax(pred_probs, axis=-1).tolist() dev_true_lst += label.tolist() import math if math.isnan(loss): print(anchor, entity, pred_probs, index) dev_loss += loss * anchor.shape[0] dev_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue dev_loss /= float(cnt) dev_accuracy = accuracy_score(dev_true_lst, dev_label_lst) dev_recall = recall_score(dev_true_lst, dev_label_lst, average='macro') dev_precision = precision_score(dev_true_lst, dev_label_lst, average='macro') dev_f1 = f1_score(dev_true_lst, dev_label_lst, average='macro') info["dev_loss"] = str(dev_loss) info["dev_accuracy"] = str(dev_accuracy) info["dev_f1"] = str(dev_f1) logger.info("epoch\t{}\tdev\tloss\t{}\taccuracy\t{}\tf1\t{}".format( epoch, dev_loss, dev_accuracy, dev_f1)) if dev_f1 > best_dev_f1 or dev_loss < best_dev_loss: timestamp = str(int(time.time())) model.save_model( os.path.join(model_dir, model_name, "models"), model_name + "_{}_{}_{}".format(timestamp, dev_loss, dev_f1)) best_dev_f1 = dev_f1 best_dev_loss = dev_loss toleration_cnt = 0 info["best_dev_loss"] = str(dev_loss) info["dev_f1"] = str(dev_f1) logger_utils.json_info( os.path.join(model_dir, model_name, "logs", "info.json"), info) logger.info( "epoch\t{}\tbest_dev\tloss\t{}\tbest_accuracy\t{}\tbest_f1\t{}" .format(epoch, dev_loss, dev_accuracy, best_dev_f1)) else: toleration_cnt += 1 if toleration_cnt == toleration: toleration_cnt = 0 learning_rate *= 0.5
def train(config): model_config_path = config["model_config_path"] FLAGS = namespace_utils.load_namespace(model_config_path) os.environ["CUDA_VISIBLE_DEVICES"] = config.get("gpu_id", "") train_path = config["train_path"] w2v_path = config["w2v_path"] vocab_path = config["vocab_path"] dev_path = config["dev_path"] model_dir = config["model_dir"] model_name = config["model"] if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(os.path.join(model_dir, model_name)): os.mkdir(os.path.join(model_dir, model_name)) if not os.path.exists(os.path.join(model_dir, model_name, "logs")): os.mkdir(os.path.join(model_dir, model_name, "logs")) if not os.path.exists(os.path.join(model_dir, model_name, "models")): os.mkdir(os.path.join(model_dir, model_name, "models")) json.dump( FLAGS, open(os.path.join(model_dir, model_name, "logs", model_name + ".json"), "w")) logger = logger_utils.get_logger( os.path.join(model_dir, model_name, "logs", "log.info")) [train_corpus, train_corpus_label, train_corpus_len, embedding_info] = prepare_data(train_path, w2v_path, vocab_path, make_vocab=True) token2id = embedding_info["token2id"] id2token = embedding_info["id2token"] embedding_mat = embedding_info["embedding_matrix"] extral_symbol = embedding_info["extra_symbol"] FLAGS.token_emb_mat = embedding_mat FLAGS.char_emb_mat = 0 FLAGS.vocab_size = embedding_mat.shape[0] FLAGS.char_vocab_size = 0 FLAGS.emb_size = embedding_mat.shape[1] FLAGS.extra_symbol = extral_symbol if FLAGS.scope == "ESIM": model = ESIM() elif FLAGS.scope == "BiBLOSA": model = BiBLOSA() elif FLAGS.scope == "BaseTransformer": model = BaseTransformer() elif FLAGS.scope == "UniversalTransformer": model = UniversalTransformer() model.build_placeholder(FLAGS) model.build_op() model.init_step() best_train_accuracy, best_train_loss = 0, 100 toleration = 1000 toleration_cnt = 0 for epoch in range(FLAGS.max_epochs): train_loss, train_accuracy = 0, 0 train_data = get_batch_data.get_classify_batch( train_corpus, train_corpus_label, FLAGS.batch_size, token2id, is_training=True, if_word_drop=FLAGS.with_word_drop, word_drop_rate=FLAGS.word_drop_rate) nan_data = [] cnt = 0 for index, corpus in enumerate(train_data): anchor, label = corpus try: [loss, _, global_step, accuracy, preds] = model.step([anchor, label], is_training=True) train_loss += loss * anchor.shape[0] train_accuracy += accuracy * anchor.shape[0] cnt += anchor.shape[0] except: continue train_loss /= float(cnt) train_accuracy /= float(cnt) info = OrderedDict() info["epoch"] = str(epoch) info["train_loss"] = str(train_loss) info["train_accuracy"] = str(train_accuracy) logger.info("epoch\t{}\ttrain\tloss\t{}\taccuracy\t{}".format( epoch, train_loss, train_accuracy)) if train_accuracy > best_train_accuracy and train_loss < best_train_loss: timestamp = str(int(time.time())) model.save_model( os.path.join(model_dir, model_name, "models"), model_name + "_{}_{}_{}".format(timestamp, train_loss, train_accuracy)) best_train_accuracy = train_accuracy best_train_loss = train_loss toleration_cnt = 0 info["best_train_loss"] = str(best_train_loss) info["best_train_accuracy"] = str(best_train_accuracy) logger_utils.json_info( os.path.join(model_dir, model_name, "logs", "info.json"), info) logger.info( "epoch\t{}\tbest_train\tloss\t{}\tbest_accuracy\t{}".format( epoch, train_loss, train_accuracy)) else: toleration_cnt += 1 if toleration_cnt >= toleration: break