def Train(trainfile): word2id, embeddings = getDicEmbed() traindata = getTrainData(trainfile) model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings, dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label, vocab=word2id, shuffle=args.shuffle, model_path=ckpt_prefix, summary_path=summary_path, log_path=log_path, result_path=result_path, CRF=args.CRF, update_embedding=args.update_embedding) model.build_graph() dev_data = traindata[:5000] dev_size = len(dev_data) train_data = traindata[5000:] train_size = len(train_data) print("train data: {0}\n dev data: {1}".format(train_size, dev_size)) model.train(traindata, dev_data)
def train(): """ 模型训练 """ train_writer = SummaryWriter(log_dir='./log/train') test_writer = SummaryWriter(log_dir='./log/test') # step1 模型 bilstm_crf = BiLSTM_CRF(opt.vocab_size, opt.emb_dim, opt.emb_dim//2, opt.tag_num, dropout=opt.dropout) if opt.load_model_path: # 是否加载checkpoint bilstm_crf.load(opt.load_model_path) # step2 数据 rmrb_train_dataset = RmrbDataset(train=True) rmrb_test_dataset = RmrbDataset(train=False) rmrb_train_dataloader = DataLoader(rmrb_train_dataset, batch_size=64, shuffle=True) rmrb_test_dataloader = DataLoader(rmrb_test_dataset, batch_size=len(rmrb_test_dataset), shuffle=True) # step3 损失函数和优化器 # loss_fn = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=bilstm_crf.parameters(), lr=lr, weight_decay=opt.weight_decay) previous_loss = 1e9 iteration = 0 for epoch in range(opt.max_epoch): print('epoch {}'.format(epoch)) for ii, (x_batch, y_batch) in enumerate(rmrb_train_dataloader): # 计算loss loss = bilstm_crf.log_likelihood(x_batch, y_batch) loss.backward() optimizer.step() optimizer.zero_grad() if ii % 20 == 0: # print('loss:{}'.format(loss.item())) train_writer.add_scalar('Loss', loss.item(), iteration) iteration += 1 if loss > previous_loss: lr = lr * opt.lr_decay else: previous_loss = loss.item() # 保存模型检查点 bilstm_crf.save() # 评价指标 with t.no_grad(): bilstm_crf.eval() # 将模型设置为验证模式 for x_test, y_test in rmrb_test_dataloader: test_loss = bilstm_crf.log_likelihood(x_test, y_test) test_writer.add_scalar('Loss', test_loss.item(), iteration) y_pre = bilstm_crf(x_test) print(classification_report(t.flatten(y_test), t.flatten(y_pre))) bilstm_crf.train() # 将模型恢复成训练模式
def train_model(self): ''' 开始训练 :return: ''' model = BiLSTM_CRF(args, self.embedding, self.tag2id, self.word2id, self.paths, config=config) model.build_graph() print("train data: {}".format(len(self.train_data))) print("dev data: {}".format(len(self.dev_data))) model.train(self.train_data, self.dev_data, args)
if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) print(model) model.build_graph() # hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) # train model on the whole training data print("train data: {}".format(len(train_data))) # use test_data as the dev_data to see overfitting phenomena model.train(train=train_data, dev=test_data) # testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() print("test data: {}".format(test_size)) model.test(test_data) # demo elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path)
for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_pred[i]]) model_pred.append(sent_res) # label_path = os.path.join(hp.result_path, 'label_' + epoch_num) # metric_path = os.path.join(hp.result_path, 'result_metric_' + epoch_num) result = conlleval(model_pred) print(result) # print(len(label_pred)) if __name__ == '__main__': logging.info("Loading data") train_data = load_data(hp.prepro_dir + 'train.utf8', name="train") test_data = load_data(hp.prepro_dir + 'test.utf8', name='test') model = BiLSTM_CRF() loss, train_op, global_step = model.train() logits, transition_params = model.eval() logging.info("Graph loaded") config_proto = tf.ConfigProto(allow_soft_placement=True) config_proto.gpu_options.allow_growth = True with tf.Session(config=config_proto) as sess: saver = tf.train.Saver(max_to_keep=hp.max_to_keep) ckpt = tf.train.latest_checkpoint(hp.log_dir) if ckpt is None: logging.info("Initializing from scratch") sess.run(tf.global_variables_initializer()) else: logging.info("Restore model from {}".format(ckpt)) saver.restore(sess, ckpt) last_save_steps = 0 for i in range(hp.epoch):
ckpt_prefix = os.path.join(model_path, "model") # 預設 './data_path_save/timestamp/checkpoints/model' paths['model_path'] = ckpt_prefix result_path = os.path.join(output_path, "results") # 預設 './data_path_save/timestamp/results' paths['result_path'] = result_path if not os.path.exists(result_path): os.makedirs(result_path) log_path = os.path.join(result_path, "log.txt") # 預設 './data_path_save/timestamp/results/log.txt' paths['log_path'] = log_path get_logger(log_path).info(str(args)) ## training model if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) # 建立model model.build_graph() # 建立graph print("train data: {}".format(len(train_data))) model.train(train=train_data, dev=test_data) # 模型訓練 ## testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) # 建立model model.build_graph() # 建立graph print("test data: {}".format(test_size)) model.test(test_data) # 模型測試 ## demo elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file)
best_idx = 0 if parameters['reload']: print('loading model:', parameters['reload']) checkpoint = torch.load(models_path + parameters['reload']) #model.load_state_dict(checkpoint) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) adjust_learning_rate(optimizer, lr=learning_rate) sys.stdout.flush() from conlleval import evaluate model.train(True) ratio = 0.0 if parameters['adv']: ratio = 0.5 from conlleval import evaluate def evaluating_batch(model, datas, best_F, display_confusion_matrix=False): true_tags = [] pred_tags = [] save = False new_F = 0.0
os.makedirs(result_path) log_path = os.path.join(result_path, "log.txt") get_logger(log_path).info(str(args)) # training model if args.mode == 'train': model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings, dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label, vocab=word2id, shuffle=args.shuffle, model_path=ckpt_prefix, summary_path=summary_path, log_path=log_path, result_path=result_path, CRF=args.CRF, update_embedding=args.update_embedding) model.build_graph() # hyperparameters-tuning, split train/dev # train model on the whole training raw_data print("train raw_data: {}".format(len(train_data))) model.train(train_data, test_data) # we could use test_data as the dev_data to see the overfitting phenomena # testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings, dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label, vocab=word2id, shuffle=args.shuffle, model_path=ckpt_file, summary_path=summary_path, log_path=log_path, result_path=result_path, CRF=args.CRF, update_embedding=args.update_embedding) model.build_graph() print("test raw_data: {}".format(test_size)) model.test(test_data) elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path)
momentum=learning_momentum) elif optimizer_choice == OptimizationMethod.Adam: optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters())) elif optimizer_choice == OptimizationMethod.AdaDelta: optimizer = torch.optim.Adadelta( filter(lambda p: p.requires_grad, model.parameters())) best_dev_results = [-1.0, -1.0, -1.0, -1.0] best_test_results = [-1.0, -1.0, -1.0, -1.0] best_train_results = [-1.0, -1.0, -1.0, -1.0] batch_count = math.ceil(len(train_set) / mini_batch_size) model.train(True) for epoch in range(max_epoch): train_indecies = np.random.permutation(len(train_set)) full_logs = [] if epoch == 0: # print(opts_str) full_logs.append(opts_str) train_time = 0 for batch_i in range(batch_count): start_idx = batch_i * mini_batch_size end_idx = min((batch_i + 1) * mini_batch_size, len(train_set)) mini_batch_idx = train_indecies[start_idx:end_idx] sentence_masks, words, chars, tags, \
vocab = load_vocabulary('./dataset/vocabulary.pkl') else: build_vocabulary('./dataset/vocabulary.pkl', train_path, 10) vocab = load_vocabulary('./dataset/vocabulary.pkl') # read_dataset & training if args.mode == 'train': model = BiLSTM_CRF(args, tag2label, vocab, log_path, logger, config) model.build_graph() print('Start training ...') print('training data contains : {} lines'.format(len(train_data))) saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=model.config) as sess: sess.run(tf.global_variables_initializer()) model.train(sess=sess, train=train_data, dev=train_data, saver=saver) elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) log_path['model_path'] = ckpt_file model = BiLSTM_CRF(args, tag2label, vocab, log_path, logger, config=config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: print('Start demo ...') saver.restore(sess, ckpt_file) while True: print('Please input sentence(pause enter or space to exit):') demo_sent = input() if demo_sent == '' or demo_sent.isspace():
def main(_): print('start app') data_path = os.path.join(FLAGS.train_data, 'word2id.pkl') word2id = read_dictionary(data_path) if FLAGS.pretrain_embedding == 'random': data_embeddings = random_embedding(word2id, FLAGS.embedding_dim) else: embedding_path = 'pretrain_embedding.npy' data_embeddings = np.array(np.load(embedding_path), dtype='float32') if FLAGS.mode != 'demo': train_file = os.path.join(FLAGS.train_data, 'train_data') test_file = os.path.join(FLAGS.test_data, 'test_data') train_data = read_corpus(train_file) test_data = read_corpus(test_file) test_size = len(test_data) time_stamp = str(int( time.time())) if FLAGS.mode == 'train' else FLAGS.demo_model def generator_dir(file_path): if not os.path.exists(file_path): os.makedirs(file_path) return file_path output_path = generator_dir( os.path.join(FLAGS.train_data + '_save', time_stamp)) summary_path = generator_dir(os.path.join(output_path, 'summary')) model_path = generator_dir(os.path.join(output_path, 'checkpoints')) ckpt_prefix = generator_dir(os.path.join(model_path, 'model')) result_path = generator_dir(os.path.join(output_path, 'results')) if FLAGS.mode == 'train': print('train ==================') """ def __init__(self, batch_size, epoch, hidden_size, embeddings, crf, update_embedding, dropout_keepprob, optimizer, learning_rate, clip, tag2label, vocab, shuffle, model_p, summary_p, results_p, config): """ model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size, data_embeddings, FLAGS.CRF, FLAGS.update_embedding, FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.clipping, tag2label, word2id, FLAGS.shuffle, model_path, summary_path, result_path, config) model.build_graph() model.train(train_data, test_data) elif FLAGS.mode == 'test': print('test ===============') ckpt_file = tf.train.latest_checkpoint(model_path) print('ckpt file {}'.format(ckpt_file)) model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size, data_embeddings, FLAGS.CRF, FLAGS.update_embedding, FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.clipping, tag2label, word2id, FLAGS.shuffle, ckpt_file, summary_path, result_path, config) model.build_graph() print('test data {}'.format(test_size)) model.test(test_data) elif FLAGS.mode == 'demo': print('demo ===========') ckpt_file = tf.train.latest_checkpoint(model_path) print('ckpt file {}'.format(ckpt_file)) model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size, data_embeddings, FLAGS.CRF, FLAGS.update_embedding, FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.clipping, tag2label, word2id, FLAGS.shuffle, ckpt_file, summary_path, result_path, config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: saver.restore(sess, ckpt_file) while True: print("please input you sentence:") demo_sentence = input() if not demo_sentence or demo_sentence.isspace(): print('bye') break else: demo_sent = list(demo_sentence.strip()) damo_data = [(demo_sent, [0] * len(demo_sent))] tag = model.demo_one(sess, damo_data) per, loc, org = get_entiry(tag, demo_sent) print('per {0} loc {1} org {2}'.format(per, loc, org))
## training model if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config, on_train=True) model.build_graph() # hyperparameters-tuning, split train/dev dev_data = test_data; dev_size = len(dev_data) train_data = train_data; train_size = len(train_data) print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # ckpt_file = r'.\data_path_save\1527663228\checkpoints\model-17136' # paths['model_path'] = ckpt_file # print(ckpt_file) model.train(train=train_data, dev=dev_data) # ## train model on the whole training data # print("train data: {}".format(len(train_data))) # model.train(train=train_data, dev=test_data) # use test_data as the dev_data to see overfitting phenomena ## testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) # ckpt_file = r'.\data_path_save\1527697768\checkpoints\model-19992' paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() print("test data: {}".format(test_size)) model.test(test_data)
def my_train(): os.makedirs(f"model_result", exist_ok=True) torch.manual_seed(1) device = torch.device('cuda') data_dir = f"data/{DATASET}/processed" # 加载 train_data = NERDataset(os.path.join(data_dir, "train.pkl")) test_data = NERDataset(os.path.join(data_dir, "test.pkl")) dev_data = NERDataset(os.path.join(data_dir, "dev.pkl")) word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl")) tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl")) idx_to_tag = {n: m for m, n in tag_to_idx.items()} train_loader = DataLoader( train_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) dev_loader = DataLoader( dev_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) test_loader = DataLoader( test_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) # 建模 model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM, HIDDEN_DIM, DROPOUT).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE) print("\n开始训练") f1_max = 0 cur_patience = 0 # 用于避免过拟合 for epoch in range(EPOCHS): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % LOG_INTERVAL == 0: print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format( epoch, 100.0 * i / len(train_loader), loss.item())) dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, idx_to_tag) test_precision, test_recall, test_f1 = evaluate( model, test_loader, idx_to_tag) print( f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}" ) print( f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n" ) torch.save(model.state_dict(), f"model_result/{epoch}.pt") if dev_f1 > f1_max: # 用于检测过拟合情况 f1_max = dev_f1 cur_patience = 0 if dev_f1 > 0.9 and test_f1 > 0.9: break else: cur_patience += 1 if cur_patience >= PATIENCE: # 多次低于最高f1,break break print("Best dev F1: ", f1_max)
tgt_vocab_size = len(tgt_vocab) tgt_unknown = tgt_vocab_size tgt_padding = tgt_vocab_size + 1 #print(tgt_vocab) embedding = load_word2vec_embedding(args.word_embedding_file, args.embedding_dim, src_vocab_size) if args.mode == 'train': model = BiLSTM_CRF(args, embedding, src_vocab, tgt_vocab, src_padding, tgt_padding, paths) model.build_graph() train_data = read_corpus(args.src_file, args.tgt_file, src_vocab, tgt_vocab, src_unknown, tgt_unknown) print("train data: {}".format(len(train_data))) model.train(train_data=train_data, test_data=None) elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embedding, src_vocab, tgt_vocab, src_padding, tgt_padding, paths) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('============= demo =============') saver.restore(sess, ckpt_file) while (1): print('Please input your sentence:') demo_sent = input() if demo_sent == '' or demo_sent.isspace():
def trainAll(args): if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() ## hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) ## train model on the whole training data print("train data: {}".format(len(train_data))) model.train( train=train_data, dev=test_data ) # use test_data as the dev_data to see overfitting phenomena ## testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() print("test data: {}".format(test_size)) model.test(test_data) ## demo elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: print('============= demo =============') saver.restore(sess, ckpt_file) while (1): print('Please input your sentence:') demo_sent = input() if demo_sent == '' or demo_sent.isspace(): print('See you next time!') break else: demo_sent = list(demo_sent.strip()) demo_data = [(demo_sent, ['O'] * len(demo_sent))] tag = model.demo_one(sess, demo_data) PER, LOC, ORG = get_entity(tag, demo_sent) print('PER: {}\nLOC: {}\nORG: {}'.format(PER, LOC, ORG)) elif args.mode == 'savemodel': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: saver.restore(sess, ckpt_file) demo_sent = tf.placeholder(tf.string, name='input') demo_sent = list(str(demo_sent).strip()) demo_data = [(demo_sent, ['O'] * len(demo_sent))] tag = model.demo_one(sess, demo_data) PER, LOC, ORG = get_entity(tag, demo_sent) result = {'PER': PER, 'LOC': LOC, 'ORG': ORG} print('PER: {}\nLOC: {}\nORG: {}'.format(PER, LOC, ORG)) # #保存SavedModel模型 builder = tf.saved_model.builder.SavedModelBuilder('./savemodels') signature = predict_signature_def(inputs={'input': demo_sent}, outputs={'output': result}) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={'predict': signature}) builder.save() print('savemodel saves')
ckpt_prefix = os.path.join(model_path, 'model') summary_path = args.log_path if not os.path.exists(summary_path): os.makedirs(summary_path) ## training model if args.mode == 'train': print('==========lr====', args.lr) model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings, dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label, vocab=word2id, shuffle=args.shuffle, model_path=ckpt_prefix, summary_path=summary_path, CRF=args.CRF, update_embedding=args.update_embeddings) model.build_graph() print('train data len=', len(train_data)) model.train(train_data, test_data) elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings, dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label, vocab=word2id, shuffle=args.shuffle, model_path=ckpt_file, summary_path=summary_path, CRF=args.CRF, update_embedding=args.update_embedding) model.build_graph() print('test data: {}'.format(test_size)) model.test(test_data) elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file)
label2id = ner_cfg.generate_tag_to_label() logger = logging.getLogger(__name__) current_dir = os.path.dirname(os.path.abspath(__file__)) ## get char embeddings word2id_pos2id = read_dictionary('word2id_pos2id_new.pkl') word2id = word2id_pos2id['word2id'] pos2id = word2id_pos2id['pos2id'] word_embedding = np.array(np.load('word2vec.npy'), dtype=np.float32) pos_embedding = np.array(np.load('pos2vec.npy'), dtype=np.float32) config = Config(word2id, pos2id, label2id, batch_size=128, n_epochs=200, n_neurons=60) config.word_embedding = word_embedding config.pos_embedding = pos_embedding ## read corpus and get training data train_data, test_data = read_corpus('train_data') # test_data = read_corpus('test_data') # test_size = len(test_data) model = BiLSTM_CRF(is_training=True, config=config) model.build_graph() model.train(train_data=train_data, valid_data=test_data) # model.test(test_data)
def train(self): config = self.conf() model = BiLSTM_CRF(self.embeddings, self.dicts["labels2idx"], self.dicts["words2idx"], self.dicts["intents2idx"], config=config) model.build_graph() model.train(self.train_set)
## training model if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() ## hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) ## train model on the whole training data print("train data: {}".format(len(train_data))) model.train(train=train_data, dev=test_data) # use test_data as the dev_data to see overfitting phenomena ## testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() print("test data: {}".format(test_size)) model.test(test_data) ## demo elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file)
tag2label, word2id, paths, config=config) model.num_tags = len(word2id) model.build_graph() ## hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) ## train model on the whole training data print("train data: {}".format(len(pre_train_data))) model.train(train=pre_train_data, dev=test_data ) # use test_data as the dev_data to see overfitting phenomena ## training model if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() ## hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
model.build_graph() # hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) # train model on the whole training data ckpt_file = tf.train.latest_checkpoint(model_path) #采用最新的模型训练 # ckpt_file=r'model_path\DaGuang\1566526104\checkpoints/model.ckpt-8' #指定载入模型训练 print(ckpt_file) paths['model_path'] = ckpt_file model.train( train=train_data, dev=test_data, model_path=ckpt_file ) # use test_data.txt as the dev_data to see overfitting phenomena # testing model elif args.mode == 'test': ckpt_file = tf.train.latest_checkpoint(model_path) # ckpt_file = r'model_path\DaGuang\1566611347\checkpoints/model.ckpt-9' print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph()
import torch import torch.optim as optim from dataset import Dataset from model import BiLSTM_CRF # torch.set_default_tensor_type('torch.cuda.FloatTensor') epochs = 100 dataset = Dataset() train_loader = dataset.get_train_loader(1) model = BiLSTM_CRF(dataset.get_vocab_size(), dataset.get_label_index_dict(), 128, 128) optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4) model.train() for epoch in range(epochs): for iter, batch in enumerate(train_loader): sentence_in, targets = batch.line, batch.label sentence_in = sentence_in.permute([1, 0]).reshape(-1).contiguous() targets = targets.permute([1, 0]).reshape(-1).contiguous() model.zero_grad() loss = model.neg_log_likelihood(sentence_in.squeeze(-1), targets.squeeze(-1)) / len(sentence_in) loss.backward() optimizer.step() print("{}-{}: {:.5f}".format(epoch, iter, loss.item()))
def run(sentences): # 配置session的参数 os.environ['CUDA_VISIBLE_DEVICES'] = '0' # 使用GPU 0 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 日志级别设置 config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.2 # need ~700MB GPU memory # hyperparameters超参数设置 # 创建一个解析器对象,并告诉它将会有些什么参数 # 那么当你的程序运行时,该解析器就可以用于处理命令行参数 parser = argparse.ArgumentParser( description='BiLSTM-CRF for Chinese NER task') parser.add_argument('--train_data', type=str, default='data_path', help='train data source') parser.add_argument('--test_data', type=str, default='data_path', help='test data source') parser.add_argument('--batch_size', type=int, default=64, help='#sample of each minibatch') # batch :批次大小 在深度学习中,一般采用SGD训练,即每次训练在训练集中取batchsize个样本训练 # iteration:中文翻译为迭代,1个iteration等于使用batchsize个样本训练一次 # 一个迭代 = 一个正向通过+一个反向通过 parser.add_argument('--epoch', type=int, default=40, help='#epoch of training') # epoch:迭代次数,1个epoch等于使用训练集中的全部样本训练一次 # 一个epoch = 所有训练样本的一个正向传递和一个反向传递 举个例子,训练集有1000个样本,batchsize=10,那么: 训练完整个样本集需要: 100次iteration,1次epoch。 parser.add_argument('--hidden_dim', type=int, default=300, help='#dim of hidden state') # 输出向量的维度:300维 parser.add_argument('--optimizer', type=str, default='Adam', help='Adam/Adadelta/Adagrad/RMSProp/Momentum/SGD') # 优化器用的Adam parser.add_argument('--CRF', type=str2bool, default=True, help='use CRF at the top layer. if False, use Softmax') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--clip', type=float, default=5.0, help='gradient clipping') parser.add_argument('--dropout', type=float, default=0.5, help='dropout keep_prob') # dropout是指在深度学习网络的训练过程中,对于神经网络单元,按照一定的概率将其暂时从网络中丢弃 parser.add_argument('--update_embedding', type=str2bool, default=True, help='update embedding during training') parser.add_argument( '--pretrain_embedding', type=str, default='random', help='use pretrained char embedding or init it randomly') parser.add_argument('--embedding_dim', type=int, default=300, help='random init char embedding_dim') parser.add_argument('--shuffle', type=str2bool, default=True, help='shuffle training data before each epoch') parser.add_argument('--mode', type=str, default='demo', help='train/test/demo') parser.add_argument('--demo_model', type=str, default='1559398699', help='model for test and demo') # 传递参数送入模型中 args = parser.parse_args() # 初始化embedding矩阵,读取词典 word2id = read_dictionary(os.path.join('.', args.train_data, 'word2id.pkl')) # 通过调用random_embedding函数返回一个len(vocab)*embedding_dim=3905*300的矩阵(矩阵元素均在-0.25到0.25之间)作为初始值 if args.pretrain_embedding == 'random': embeddings = random_embedding(word2id, args.embedding_dim) else: embedding_path = 'pretrain_embedding.npy' embeddings = np.array(np.load(embedding_path), dtype='float32') # 读取训练集和测试集 if args.mode != 'demo': train_path = os.path.join('.', args.train_data, 'train_data') test_path = os.path.join('.', args.test_data, 'test_data') train_data = read_corpus(train_path) test_data = read_corpus(test_path) test_size = len(test_data) # 设置路径 paths = {} timestamp = str(int( time.time())) if args.mode == 'train' else args.demo_model output_path = os.path.join('.', args.train_data + "_save", timestamp) if not os.path.exists(output_path): os.makedirs(output_path) summary_path = os.path.join(output_path, "summaries") paths['summary_path'] = summary_path if not os.path.exists(summary_path): os.makedirs(summary_path) model_path = os.path.join(output_path, "checkpoints/") if not os.path.exists(model_path): os.makedirs(model_path) ckpt_prefix = os.path.join(model_path, "model") paths['model_path'] = ckpt_prefix result_path = os.path.join(output_path, "results") paths['result_path'] = result_path if not os.path.exists(result_path): os.makedirs(result_path) log_path = os.path.join(result_path, "log.txt") paths['log_path'] = log_path get_logger(log_path).info(str(args)) # 将参数写入日志文件 if args.mode == 'train': # 训练模型 model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() model.train(train=train_data, dev=test_data) elif args.mode == 'test': # 测试模型 ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() print("test data: {}".format(test_size)) model.test(test_data) # demo elif args.mode == 'demo': location = [] ckpt_file = tf.train.latest_checkpoint(model_path) print("model path: ", ckpt_file) paths['model_path'] = ckpt_file # 设置模型路径 model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: saver.restore(sess, ckpt_file) for sentence in sentences: demo_sent = sentence demo_sent = list(demo_sent.strip()) # 删除空白符 demo_data = [(demo_sent, ['O'] * len(demo_sent))] tag = model.demo_one(sess, demo_data) PER, LOC, ORG = get_entity(tag, demo_sent) # 根据标注序列输出对应的字符 new_LOC = list(set(LOC)) # 去重 loc = ' '.join(new_LOC) location.append(loc) return location