print ("Char emb:", char_emb) print ("Bichar emb:", bichar_emb) print ("Gaz file:",gaz_file) if status == 'train': print ("Model saved to:", save_model_dir) sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_use_char = False data.HP_batch_size = 10 data.use_bigram = False data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data_initialization(data, gaz_file, train_file, dev_file, test_file) data.generate_instance_with_gaz(train_file,'train') data.generate_instance_with_gaz(dev_file,'dev') data.generate_instance_with_gaz(test_file,'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb) data.build_gaz_pretrain_emb(gaz_file) train(data, save_model_dir,dset_dir, seg) elif status == 'test': data = load_data_setting(dset_dir) data.generate_instance_with_gaz(dev_file,'dev') load_model_decode(model_dir, data , 'dev', gpu, seg) data.generate_instance_with_gaz(test_file,'test')
np.random.seed(seed_num) torch.cuda.manual_seed(seed_num) os.environ["CUDA_VISIBLE_DEVICES"] = "0" ## weibo train_file = "data/Weibo/weiboNER.train" dev_file = "data/Weibo/weiboNER.dev" test_file = "data/Weibo/weiboNER.test" word_emb_file = "data/gigaword_chn.all.a2b.uni.ite50.vec" print(train_file) data = Data() data.HP_gpu = False #是否使用GPU data.norm_gaz_emb = False #词向量是否归一化 data.HP_fix_gaz_emb = True #词向量表大小是否固定 data.HP_bilstm = True data.random_seed = seed_num # 整体参数设定位置 data.HP_lr = 0.01 data.HP_lr_decay = 0.01 data.HP_iteration = 150 data.HP_batch_size = 20 data.gaz_dropout = 0.4 data.weight_decay = 0.00000005 data.use_clip = False #是否控制梯度 data.HP_clip = 30 #最大梯度 # LSTM参数 data.HP_hidden_dim = 300 data.HP_dropout = 0.7
print "Char emb:", char_emb print "Bichar emb:", bichar_emb print "Gaz file:", gaz_file if status == 'train': print "Model saved to:", save_model_dir sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_use_char = False data.HP_batch_size = 1 data.use_bigram = False data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data_initialization(data, gaz_file, train_file, dev_file, test_file) data.generate_instance_with_gaz(train_file, 'train') data.generate_instance_with_gaz(dev_file, 'dev') data.generate_instance_with_gaz(test_file, 'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb) data.build_gaz_pretrain_emb(gaz_file) train(data, save_model_dir, seg) elif status == 'test': data = load_data_setting(dset_dir) data.generate_instance_with_gaz(dev_file, 'dev') load_model_decode(model_dir, data, 'dev', gpu, seg) data.generate_instance_with_gaz(test_file, 'test') load_model_decode(model_dir, data, 'test', gpu, seg) elif status == 'decode':
# -*- coding: utf-8 -*- # @Author: Jie # @Date: 2017-06-15 14:11:08 # @Last Modified by: Jie Yang, Contact: [email protected] # @Last Modified time: 2018-07-06 11:08:27 import time import sys import argparse import random import copy import torch import gc import pickle as pickle import torch.autograd as autograd import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import numpy as np from utils.metric import get_ner_fmeasure from model.bilstmcrf import BiLSTM_CRF as SeqModel from utils.data import Data seed_num = 100 random.seed(seed_num) torch.manual_seed(seed_num) np.random.seed(seed_num) def data_initialization(data, gaz_file, train_file, dev_file, test_file): data.build_alphabet(train_file) data.build_alphabet(dev_file) data.build_alphabet(test_file)