def dev(use_gpu, batch_size, model_path): dataset = build_dataset_pytorch.QuoraDataset_train( 'data/dev_data_pad.pkl', 'data/dev_data_char_pad.pkl', 'data/dev_data_flags_pad.pkl') dataloader = DataLoader(dataset, batch_size, shuffle=False, num_workers=0) mm1 = mp.MatchModel(vocab_size=100000, tr_w_embed_dim=300, tr_w_emb_dim_flag=100, char_size=50, char_embed_dim=20, output_dim=100, hidden_dim=200, use_gpu=use_gpu, yt_layer_num=3, mode='test') mm1.load_state_dict(t.load(model_path, map_location='cpu')) mm1.eval() with open('data/vector_n.pkl', 'rb') as handle: data = pickle.load( handle ) # Warning: If adding something here, also modifying saveDataset word_vec = t.from_numpy(np.array(data['word_vec'])) if use_gpu: mm1.cuda() def data_convert(*data): datas = [] for i in data: datas.append(i.cuda()) return datas data_num = 0 correct_all = 0 for i, data in enumerate(dataloader): if i == 5: p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data print(t.nonzero(p_inputs).shape[0], t.nonzero(q_inputs).shape[0]) if use_gpu: [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \ data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec) p_input = [p_inputs, p_inputs_char, word_vec, p_flags] q_input = [q_inputs, q_inputs_char, q_flags] out_p, out_q, cos = mm1(100, p_input, q_input) vis = visdom.Visdom(env=u'test1') for x in range(len(out_p)): # print('\n%d:'%(x), cos[x].detach().numpy()) # vis.image(out_p[x].detach().numpy()[:,:400], win='p_layer_%d_%d'%(x, 0), opts={'title':'p_layer%d'%(x)}) # vis.image(out_q[x].detach().numpy()[:,:400], win='q_layer_%d_%d'%(x, 0), opts={'title':'q_layer%d'%(x)}) vis.image(cos[x].detach().numpy(), win='q_layer_%d_%d' % (x, 0), opts={'title': 'cos_layer%d' % (x)}) break
def dev(use_gpu, batch_size, model_path): dataset = build_dataset_pytorch.QuoraDataset_train( 'data/dev_data_pad.pkl', 'data/dev_data_char_pad.pkl', 'data/dev_data_flags_pad.pkl') dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=1) mm1 = mp.MatchModel(vocab_size=100000, tr_w_embed_dim=300, tr_w_emb_dim_flag=0, char_size=50, char_embed_dim=20, output_dim=100, hidden_dim=200, use_gpu=use_gpu, yt_layer_num=3, mode='test') mm1.load_state_dict(t.load(model_path)) mm1.eval() with open('data/vector_n.pkl', 'rb') as handle: data = pickle.load( handle ) # Warning: If adding something here, also modifying saveDataset word_vec = t.from_numpy(np.array(data['word_vec'])) if use_gpu: mm1.cuda() def data_convert(*data): datas = [] for i in data: datas.append(i.cuda()) return datas data_num = 0 correct_all = 0 for i, data in enumerate(dataloader): p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data if use_gpu: [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \ data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec) p_input = [p_inputs, p_inputs_char, word_vec, p_flags] q_input = [q_inputs, q_inputs_char, q_flags] output = mm1(100, p_input, q_input) if use_gpu: output = output.cpu() targets = targets.cpu() # print(output, targets) correct_num = (t.argmax(output, 1) == t.argmax(targets, 1)).numpy() correct_sum = correct_num.sum() data_num = data_num + len(correct_num) correct_all = correct_all + correct_sum print("-- Dev Acc %.3f" % (correct_all / data_num)) return correct_all / data_num
def train(use_gpu, model_path, lr, epochs, model_prefix, batch_size): dataset = build_dataset_pytorch.QuoraDataset_train( 'data/train_data_pad.pkl', 'data/train_data_char_pad.pkl', 'data/train_data_flags_pad.pkl') dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=1) mm = mp.MatchModel(vocab_size=100000, tr_w_embed_dim=300, tr_w_emb_dim_flag=0, char_size=50, char_embed_dim=20, output_dim=100, hidden_dim=200, use_gpu=use_gpu, yt_layer_num=3) # for name, param in mm.named_parameters(): # print(name, param.size()) optimizer = t.optim.Adam(mm.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() t.manual_seed(2) if model_path: mm.load_state_dict(t.load(model_path)) if use_gpu: # device_ids = [1,2] mm.cuda() # mm = nn.DataParallel(mm, device_ids = device_ids) # optimizer = nn.DataParallel(optimizer, device_ids = device_ids) criterion.cuda() t.cuda.manual_seed_all(2) # loss_meter = meter.AverageValueMeter() with open('data/vector_n.pkl', 'rb') as handle: data = pickle.load( handle ) # Warning: If adding something here, also modifying saveDataset word_vec = t.from_numpy(np.array(data['word_vec'])) def data_convert(*data): datas = [] for i in data: datas.append(i.cuda()) return datas max_dev = 0 max_test = 0 for e in range(0, epochs): correct_all = 0 data_num = 0 # loss_meter.reset() print("----- Epoch {}/{} -----".format(e + 1, epochs)) # optimizer.zero_grad() for i, data_ in tqdm(enumerate(dataloader), desc="Training"): p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data_ if use_gpu: [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \ data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec) optimizer.zero_grad() p_input = [p_inputs, p_inputs_char, word_vec, p_flags] q_input = [q_inputs, q_inputs_char, q_flags] output = mm(100, p_input, q_input) loss = criterion(output, t.argmax(targets, 1)) # print(loss) # print(output) if use_gpu: output = output.cpu() targets = targets.cpu() correct_num = (t.argmax(output, 1) == t.argmax(targets, 1)).numpy() correct_sum = correct_num.sum() # print('correct_sum:', correct_num) accuracy = correct_sum / len(correct_num) data_num = data_num + len(correct_num) correct_all = correct_all + correct_sum loss.backward() optimizer.step() # loss_meter.add(loss.data[0]) if (i + 1) % 600 == 0: tqdm.write("----- Epoch %d Batch %d -- Loss %.2f -- Acc %.3f" % (e + 1, i + 1, loss, accuracy)) tqdm.write("----- epoch %d -- Loss %.2f -- Acc %.3f" % (e + 1, loss, correct_all / data_num)) t.save(mm.state_dict(), '%s_%s.pth' % (model_prefix, e)) dev_acc = dev(True, 50, '%s_%s.pth' % (model_prefix, e)) test_acc = test(True, 50, '%s_%s.pth' % (model_prefix, e)) max_dev = max(max_dev, dev_acc) max_test = max(max_test, test_acc) print('max test acc:', max_test, '\nmax dev acc:', max_dev)