def train(**kwargs): kwargs.update({'model': 'PCNN_ATT'}) opt.parse(kwargs) if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = getattr(models, 'PCNN_ATT')(opt) if opt.use_gpu: model.cuda() # loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('{} train data: {}; test data: {}'.format(now(), len(train_data), len(test_data))) # criterion and optimizer # criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6) # train # max_pre = -1.0 # max_rec = -1.0 for epoch in range(opt.num_epochs): total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): label = [l[0] for l in label_set] optimizer.zero_grad() model.batch_size = opt.batch_size loss = model(data, label) if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) loss.backward() optimizer.step() total_loss += loss.item() # if idx % 100 == 99: # print('{}: Train iter: {} finish'.format(now(), idx)) if epoch > 2: # true_y, pred_y, pred_p= predict(model, test_data_loader) # all_pre, all_rec = eval_metric(true_y, pred_y, pred_p) pred_res, p_num = predict_var(model, test_data_loader) all_pre, all_rec = eval_metric_var(pred_res, p_num) last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) print( '{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}' .format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec)) else: print('{} Epoch {}/{}: train loss: {};'.format( now(), epoch + 1, opt.num_epochs, total_loss))
def train(**kwargs): kwargs.update({'model': 'PCNN_ONE'}) opt.parse(kwargs) if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # torch.manual_seed(opt.seed) model = getattr(models, 'PCNN_ONE')(opt) if opt.use_gpu: # torch.cuda.manual_seed_all(opt.seed) model.cuda() # model = nn.DataParallel(model) # loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('train data: {}; test data: {}'.format(len(train_data), len(test_data))) criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(model.parameters(), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # train print("start training...") max_pre = -1.0 max_rec = -1.0 for epoch in range(opt.num_epochs): total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): label = [l[0] for l in label_set] if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) data = select_instance(model, data, label) model.batch_size = opt.batch_size optimizer.zero_grad() out = model(data) loss = criterion(out, Variable(label)) loss.backward() optimizer.step() total_loss += loss.data[0] if epoch < 3: continue true_y, pred_y, pred_p = predict(model, test_data_loader) all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p) last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) if last_pre > max_pre and last_rec > max_rec: print("save model") max_pre = last_pre max_rec = last_rec model.save(opt.print_opt) print('{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}'.format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec))
def train(**kwargs): # 设置随机数种子 setup_seed(opt.seed) #调用config.py里的parse函数,可对opt更改默认参数,增加缺少的数值 kwargs.update({'model': 'PCNN_ONE'}) opt.parse(kwargs) # 如果使用gpu,设置使用指定的gpu if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # torch.manual_seed(opt.seed) '''将PCNN_ONE中的各种设定赋到model里 model: PCNN_ONE( (word_embs): Embedding(114043, 50) (pos1_embs): Embedding(102, 5) (pos2_embs): Embedding(102, 5) (convs): ModuleList( (0): Conv2d(1, 230, kernel_size=(3, 60), stride=(1, 1), padding=(1, 0)) ) (mask_embedding): Embedding(4, 3) (linear): Linear(in_features=690, out_features=53, bias=True) (dropout): Dropout(p=0.5, inplace=False) ) ''' model = getattr(models, 'PCNN_ONE')(opt) #如果使用gpu,则对model里的数值进行处理 if opt.use_gpu: # torch.cuda.manual_seed_all(opt.seed) model.cuda() # parallel # model = nn.DataParallel(model) # loading data # DataModel : dataset.nyt.NYTData 将dataset中的opt.data + 'Data'(即NTYData)对应的值给dataModel DataModel = getattr(dataset, opt.data + 'Data') # 将NTY中的train,test数据拿到加载 '''以train数据为例,拿到dataset/NYT/train/ 下的两个npy文件 每个数据以二元组(bag,rel)组成,bag为bags_feature.npy中的一个bag, rel为labels.npy中的一个数据、记录的是原始bags_train.txt中每个bag中句子标签, 两者间是一一对应的 bags_feature中的一个bag为 es:[0, 0] num:只针对这行,‘train’例:m.010039 m.01vwm8g NA 99161,292483 对第4个进行操作,如果有逗号则切分;最后统计有多少个这个,就是num (bag 里面一样,不变) new_sen:句子的数组,数据不变,数组后面用0填充了,如[[0,2,4,525,6,112,15099,....,0,0,0]] new_pos:[相对实体1的位置,相对实体2的位置]的数组,数据不变,数组后面用0填充了,如[[84,83,82,81,80,79,....,0,0,0], [50,49,48,47,46,45,....,0,0,0]] new_entPos:实体1和实体2在词表的下标的位置且每个值都加1,升序,[[1,35]] new_masks:最后的句子的数组,数据不变,数组后面用0填充了,即位置如[[1,2,2,2,2,2,2,2,2,....,0,0,0,0]] labels中的一个rel为:[0, -1, -1, -1],一个bag中的label的总和不足4个则用-1补足,超过4个则只取前4个,如[0, -1, -1, -1] ''' train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) #同上 test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('train data: {}; test data: {}'.format(len(train_data), len(test_data))) # 交叉熵损失函数 criterion = nn.CrossEntropyLoss() ''' model.parameters() : <bound method Module.parameters of PCNN_ONE( (word_embs): Embedding(114043, 50) (pos1_embs): Embedding(102, 5) (pos2_embs): Embedding(102, 5) (convs): ModuleList( (0): Conv2d(1, 230, kernel_size=(3, 60), stride=(1, 1), padding=(1, 0)) ) (mask_embedding): Embedding(4, 3) (linear): Linear(in_features=690, out_features=53, bias=True) (dropout): Dropout(p=0.5, inplace=False) )> 优化算法的设定 optim.Adadelta(net.parameters(), rho=0.9,eps=1e-6, weight_decay=opt.weight_decay) params(iterable):待优化参数的iterable或者是定义了参数组的dict rho:用于计算平方梯度的运行平均值的系数(默认: 0.9) eps:为了增加数值计算的稳定性二加到分母里的项(默认:1e-6) weight_decay:权重衰减(L2惩罚)(默认:0) ''' optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay) # optimizer = optim.Adadelta(model.parameters(), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # train print("start training...") max_pre = -1.0 max_rec = -1.0 for epoch in range(opt.num_epochs): total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): ''' data 元组: (bag1, bag2,bag3,.....)bag的数据内容如上 label_set 元组:([bag1的rels],[],.....) label: 将每个bag的第一个句子中的label取出作为一个数组 ''' label = [l[0] for l in label_set] if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) ''' data: [select_ent, select_num, select_sen, select_pf, select_pool, select_mask],里面都是tensor格式 select_ent, select_num:存的是,每个bag里的es和num [[bag1的对应数据],[bag2],....] select_sen, select_pf, select_pool, select_mask:每个bag里的对应的数据中的一条的数组集合 如select_sen里的是每个bag里sen中的一个句子数组(怎么选择这个句子则是由select_instance里的max_ins_id下标决定) [[bag1的对应数据],[bag2],....] ''' data = select_instance(model, data, label) model.batch_size = opt.batch_size # 将梯度初始化为零 optimizer.zero_grad() ''' model(data, train=True)等价于调用了 model.forward(data, train=True) 只是隐藏了 ''' #向前传播,求出预测的值 out = model(data, train=True) #求loss loss = criterion(out, label) #反向传播求梯度 loss.backward() #更新所有参数 optimizer.step() total_loss += loss.item() if epoch < -1: continue ''' true_y:tesr_data_loader中的lanbels放到新的数组里,[[bag1的rel],[],.....,]labels中的一个rel为:[0, -1, -1, -1],一个bag中的label的总和不足4个则用-1补足,超过4个则只取前4个,如[0, -1, -1, -1] pred_y:把每个bag经过向前传播后得到out。当out中第i行的最大值不在第一个数且out中第i行的最大值大于 -1.0,pred_label为out中第i行最大值的下标,否则为0,[bag1的预测最大值的下标,bag2....,....] pred_p:把每个bag经过向前传播后得到out。out中前i行的每行最大值的最大的那个数的值,如果大于-1.0则为tmp_prob或tmp_NA_prob,否则为-1.0,最后将tmp_prob或tmp_NA_prob加入pred_p数组中 以上的i的范围均为[0, bag中句子的个数] ''' true_y, pred_y, pred_p = predict(model, test_data_loader) '''调用utils.py里的eval_metric函数得到pr,re fp_re的数组 all_pre:每次循环计算得到的pr值,且保证下一个值不会和前一个重复。循环次数由true_y的个数决定(即test_data_loader中bag的个数) all_rec:每次循环计算得到的recall值,且保证下一个值不会和前一个重复。循环次数由true_y的个数决定(即test_data_loader中bag的个数) fp_res: 第idx个最大值的下标(即这个最大值处于第几个bag) 和 对应的bag的经过向前传播后得到out。out中前i行的每行最大值的最大的那个数的值 有关fp_res的补充: idx表示第几个bag,i:第idx个最大值的下标(即这个最大值处于第几个bag),所对应的bag的rel数组 第idx个最大值的下标(即这个最大值处于第几个bag),所对应的bag的第一个句子的label 如果 第idx个最大值的下标(即这个最大值处于第几个bag),所对应的bag的第一个句子的label为0, 且j(第idx个最大值的下标(即这个最大值处于第几个bag),所对应的bag,处理后得到out中最大值的下标)大于0,即该最大值位置不是第一个 才算入fp_res中 ''' all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p) #得到最新算到的pre和recall last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: #将数据all_pre, all_rec, fp_res写入相关文件中 save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) #记录峰值,将该model相关数据写入文件,同时更新峰值 if last_pre > max_pre and last_rec > max_rec: print("save model") max_pre = last_pre max_rec = last_rec model.save(opt.print_opt) print('{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}'.format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec))