def val(val_loader, model, device, flip=False): ''' model evaluation :param: val_loader: dataloader, model: cpkt, device:device, flip: bool :return: float ''' model.eval() with torch.no_grad(): score = [] for datas, ages, sexs, labels in val_loader: datas = datas.to(device) ages = ages.to(device) sexs = sexs.to(device) labels = labels.to(device) outputs = model(datas, ages, sexs) if flip: datas_flip = datas.flip([2]) outputs_flip = model(datas_flip) outputs_mean = torch.add(outputs, outputs_flip) / 2 x = calc_f1(labels, outputs_mean) else: x = calc_f1(labels, outputs) score.append(x) test_acc = sum(score) / len(score) return test_acc
def train_epoch(self, model, optimizer, criterion): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 for inputs, target in tqdm(self.trn_dataloader): inputs = inputs.to(self.device) target = target.to(self.device) # zero the parameter gradients optimizer.zero_grad() # forward output = model(inputs) # print("output:", output) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 # print("output: \t target:".format(output, target)) # print("shape of output:", output.size()) # print("shape of target:", target.size()) f1 = utils.calc_f1(target, torch.sigmoid(output)) # print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1)) f1_meter += f1 if it_count != 0 and it_count % self.show_interval == 0: print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1)) return loss_meter / it_count, f1_meter / it_count
def evaluate(sess, model, val_features_batches, val_support_batches, y_val_batches, val_mask_batches, val_data, placeholders): """evaluate GCN model.""" total_pred = [] total_lab = [] total_loss = 0 total_acc = 0 num_batches = len(val_features_batches) for i in range(num_batches): features_b = val_features_batches[i] support_b = val_support_batches[i] y_val_b = y_val_batches[i] val_mask_b = val_mask_batches[i] num_data_b = np.sum(val_mask_b) if num_data_b == 0: continue else: feed_dict = utils.construct_feed_dict(features_b, support_b, y_val_b, val_mask_b, placeholders) outs = sess.run([model.loss, model.accuracy, model.outputs], feed_dict=feed_dict) total_pred.append(outs[2][val_mask_b]) total_lab.append(y_val_b[val_mask_b]) total_loss += outs[0] * num_data_b total_acc += outs[1] * num_data_b total_pred = np.vstack(total_pred) total_lab = np.vstack(total_lab) loss = total_loss / len(val_data) acc = total_acc / len(val_data) micro, macro = utils.calc_f1(total_pred, total_lab, FLAGS.multilabel) return loss, acc, micro, macro
def test(model, criterion, features, adj, labels, mask, device): features = torch.FloatTensor(features).to(device) labels = torch.LongTensor(labels).to(device) total_correct = 0 if device == torch.device("cpu"): adj = adj[0] features = features[0] labels = labels[0] mask = mask[0] # Adj -> Torch Sparse Tensor i = torch.LongTensor(adj[0]) # indices v = torch.FloatTensor(adj[1]) # values adj = torch.sparse.FloatTensor(i.t(), v, adj[2]).to(device) model.to(device) output = model(adj, features) if args.multilabel: loss = criterion(output, labels.type_as(output)) pred = torch.sigmoid(output) >= 0.5 total_correct += torch.eq(pred.squeeze(), labels.squeeze()).all(dim=1).sum().item() else: loss = criterion(output, torch.max(labels, 1)[1]) pred = output[mask].argmax(dim=1, keepdim=True) labels = torch.max(labels[mask], 1)[1] total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item() acc = total_correct / sum(mask) micro, macro = utils.calc_f1(pred.squeeze(), labels.squeeze(), args.multilabel) return loss.item(), acc, micro, macro
def val(val_loader, model_list, model_weight, device, flip=False): ''' model evaluation :param: val_loader: dataloader, model_list: list, model_weight: list, device:device, flip: bool :return: float ''' model_weight = torch.Tensor(model_weight).to(device) with torch.no_grad(): score_list = [] for datas, ages, sexs, labels in tqdm(val_loader): datas = datas.to(device) ages = ages.to(device) sexs = sexs.to(device) labels = labels.to(device) output_list = [] for index, model in enumerate(model_list): outputs = model(datas, ages, sexs) if flip: datas_flip = datas.flip([2]) outputs_flip = model(datas_flip, ages, sexs) outputs_mean = torch.add(outputs, outputs_flip) / 2 outputs = outputs_mean weight = model_weight[index] outputs *= weight output_list.append(outputs) outputs_mean = torch.sum(torch.stack(output_list), dim=0) score = calc_f1(labels, outputs_mean) score_list.append(score) test_acc = sum(score_list) / len(score_list) print('Test Accuracy: {}/{}={} %'.format(sum(score_list), len(score_list), test_acc), flush=True) return test_acc
def train_epoch(model, optimizer, criterion, train_dataloader, show_interval=10): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 for inputs, fr, target in train_dataloader: inputs = inputs.to(device) target = target.to(device) fr = fr.to(device) # zero the parameter gradients optimizer.zero_grad() # forward if config.kind == 1: output = model(inputs, fr) elif config.kind == 2: output, _ = model(inputs) else: output = model(inputs) if config.kind == 2 and config.top4_DeepNN_tag: output = output[:, config.top4_tag_list] target = target[:, config.top4_tag_list] loss = criterion( output, target) # BCEWithLogitsLoss, 先对output进行sigmoid,然后求BCELoss loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) f1 = utils.calc_f1(target, output) f1_meter += f1 if it_count != 0 and it_count % show_interval == 0: print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1)) return loss_meter / it_count, f1_meter / it_count
def train_beat_epoch(model, optimizer, criterion, train_dataloader, show_interval=10): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 for inputs, beat, target in train_dataloader: inputs = inputs.to(device) beat = beat.to(device) target = target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward output = model(inputs, beat) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 f1 = utils.calc_f1(target, torch.sigmoid(output)) f1_meter += f1 if it_count != 0 and it_count % show_interval == 0: print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1)) return loss_meter / it_count, f1_meter / it_count
def train_epoch(model, optimizer, criterion, train_dataloader, show_interval=10): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 for inputs, target in train_dataloader: target, sex, age = splitTarget(target) inputs = inputs.to(device) target, sex, age = target.to(device), sex.to(device), age.to(device) # zero the parameter gradients optimizer.zero_grad() # forward if config.fuse == 'False': output = model(inputs) elif config.fuse == 'True': output = model(inputs, sex, age) else: raise ValueError('Not supported choise for \'config.fuse\' item!') loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 f1 = utils.calc_f1(target, torch.sigmoid(output)) f1_meter += f1 if it_count != 0 and it_count % show_interval == 0: print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1)) # 为什么使用f1分数来选择模型?原因在于f1分数是评价标准而不是loss return loss_meter / it_count, f1_meter / it_count # average loss and average f1
def train_epoch(model, optimizer, criterion, train_dataloader, epoch, lr, best_f1, show_interval=10): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 tq = tqdm.tqdm(total=len(train_dataloader) * config.batch_size) tq.set_description('epoch %d, lr %.4f, best_f:%.4f' % (epoch, lr, best_f1)) for i, (inputs, target) in enumerate(train_dataloader): inputs = inputs.to(device) target = target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward output = model(inputs) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 f1 = utils.calc_f1(target, torch.sigmoid(output)) f1_meter += f1 tq.update(config.batch_size) tq.set_postfix(loss="%.4f f1:%.3f" % (loss.item(), f1)) tq.close() #if it_count != 0 and it_count % show_interval == 0: # print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1), end='\r') return loss_meter / it_count, f1_meter / it_count
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 with torch.no_grad(): if torch.cuda.is_available(): label_all = torch.Tensor().cuda() pred_all = torch.Tensor().cuda() else: label_all = torch.Tensor() pred_all = torch.Tensor() tq = tqdm.tqdm(total=len(val_dataloader) * config.batch_size) for inputs, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) output = model(inputs) it_count += 1 label_all = torch.cat((label_all, target), 0) pred_all = torch.cat((pred_all, output), 0) tq.update(config.batch_size) tq.close() output = pred_all target = label_all loss = criterion(output, target) loss_meter = loss.item() output = torch.sigmoid(output) f1 = utils.calc_f1(target, output, threshold) # f1_meter += f1 return loss_meter, f1
def evaluate(model, args, val_features_batches, val_support_batches, y_val_batches, val_mask_batches, val_data, pid="None"): """evaluate GCN model.""" total_pred = [] total_lab = [] total_out = [] total_loss = 0 total_acc = 0 num_batches = len(val_features_batches) for i in range(num_batches): features_b = val_features_batches[i] support_b = val_support_batches[i] y_val_b = y_val_batches[i] val_mask_b = val_mask_batches[i] num_data_b = np.sum(val_mask_b) if num_data_b == 0: continue else: package = { "features": features_b, "support": support_b, "y_train": y_val_b, "train_mask": val_mask_b } out_dict = slave_run_evaluate(model, args, package, pid=pid) total_pred.append(out_dict["pred"].cpu().detach().numpy()[val_mask_b]) total_out.append(out_dict["out"].cpu().detach().numpy()[val_mask_b]) total_lab.append(y_val_b[val_mask_b]) # total_pred.append(out_dict["pred"].cpu().tolist()) # total_out.append(out_dict["out"].cpu().tolist()) # total_lab.append(y_val_b[val_mask_b]) # total_pred.append(out_dict["pred"][val_mask_b].cpu().tolist()) # total_out.append(out_dict["out"][val_mask_b].cpu().tolist()) # total_lab.append(y_val_b[val_mask_b]) total_loss += out_dict["loss"] * num_data_b total_acc += out_dict["acc"] #* num_data_b total_pred = np.vstack(total_pred) total_out = np.vstack(total_out) total_lab = np.vstack(total_lab) loss = total_loss / len(val_data) acc = total_acc / num_batches micro, macro = calc_f1(total_pred, total_lab, args.multilabel) return loss, acc, micro, macro
def ada_boost(x_train, y_train, x_test, y_test): print('#ADA Boost Testing\n\n') shift_y_train = shiftData(y_train) shift_y_test = shiftData(y_test) train_accuracy = [] test_accuracy = [] train_f1 = [] test_f1 = [] parameter = [] for num_trees in range(1, 15, 1): print("Testing with ", num_trees, " trees") ada = AdaBoostClassifier(num_trees, 1) ada.train(x_train, shift_y_train) preds_train = ada.predict(x_train) preds_test = ada.predict(x_test) parameter.append(num_trees) train_accuracy.append(accuracy_score(preds_train, shift_y_train)) test_accuracy.append(accuracy_score(preds_test, shift_y_test)) train_f1.append(calc_f1(preds_train, shift_y_train)) test_f1.append(calc_f1(preds_test, shift_y_test)) f1 = plt.figure(1) plt.plot(parameter, train_accuracy) plt.plot(parameter, test_accuracy) plt.title("ADA Boost Accuracy vs Number of Trees") plt.ylabel("Accuracy") plt.xlabel("Number of Trees") plt.legend(['Training Accuracy', 'Testing Accuracy']) f1.show() f2 = plt.figure(2) plt.plot(parameter, train_f1) plt.plot(parameter, test_f1) plt.title("ADA Boost F1 vs Number of Trees") plt.ylabel("F1") plt.xlabel("Number of Trees") plt.legend(['Training F1', 'Testing F1']) plt.show()
def validate(val_loader, model): data_time = AverageMeter() microF1 = AverageMeter() test_p1, test_p3, test_p5 = 0, 0, 0 test_ndcg1, test_ndcg3, test_ndcg5 = 0, 0, 0 model.eval() with torch.no_grad(): end = time.time() for batch_idx, (input, target) in enumerate(val_loader): data_time.update(time.time() - end) input = input.cuda() target = target.cuda() output = model(input) target = target.data.cpu().float() output = output.data.cpu() _p1, _p3, _p5 = precision_k(output.topk(k=5)[1].numpy(), target.numpy(), k=[1, 3, 5]) test_p1 += _p1 test_p3 += _p3 test_p5 += _p5 _ndcg1, _ndcg3, _ndcg5 = ndcg_k(output.topk(k=5)[1].numpy(), target.numpy(), k=[1, 3, 5]) test_ndcg1 += _ndcg1 test_ndcg3 += _ndcg3 test_ndcg5 += _ndcg5 output[output > 0.5] = 1 output[output <= 0.5] = 0 micro, macro = calc_f1(target, output) microF1.update(micro.item(), input.size(0)) np.set_printoptions(formatter={'float': '{: 0.4}'.format}) print('the result of micro: \n', microF1.avg) test_p1 /= len(val_loader) test_p3 /= len(val_loader) test_p5 /= len(val_loader) test_ndcg1 /= len(val_loader) test_ndcg3 /= len(val_loader) test_ndcg5 /= len(val_loader) print("precision@1 : %.4f , precision@3 : %.4f , precision@5 : %.4f " % (test_p1, test_p3, test_p5)) print("ndcg@1 : %.4f , ndcg@3 : %.4f , ndcg@5 : %.4f " % (test_ndcg1, test_ndcg3, test_ndcg5)) return (microF1.avg)
def random_forsest_random_seed(x_train, y_train, x_test, y_test, count): print('#Random Forest Number of Trees\n\n') accuracy_training = [] accuracy_testing = [] f1_testing = [] f1_training = [] features = [] for i in range(0, count): rclf = RandomForestClassifier(max_depth=7, max_features=25, n_trees=151) rclf.fit(x_train, y_train) preds_train = rclf.predict(x_train) preds_test = rclf.predict(x_test) features.append(i) accuracy_training.append(accuracy_score(preds_train, y_train)) accuracy_testing.append(accuracy_score(preds_test, y_test)) f1_training.append(calc_f1(preds_train, y_train)) f1_testing.append(calc_f1(preds_test, y_test)) f1 = plt.figure(1) plt.plot(features, accuracy_training) plt.plot(features, accuracy_testing) plt.title("Accuracy vs Seed") plt.ylabel("Accuracy") plt.xlabel("Seed Index") plt.legend(['Training Accuracy', 'Testing Accuracy']) f1.show() f2 = plt.figure(2) plt.plot(features, f1_training) plt.plot(features, f1_testing) plt.title("F1 vs Seed") plt.ylabel("F1") plt.xlabel("Seed Index") plt.legend(['Training F1', 'Testing F1']) plt.show()
def random_forest_testing_max_features(x_train, y_train, x_test, y_test): print('#Random Forest Number of Trees\n\n') accuracy_training = [] accuracy_testing = [] f1_testing = [] f1_training = [] features = [] for max_features in [1, 2, 5, 8, 10, 20, 25, 35, 50]: rclf = RandomForestClassifier(max_depth=7, max_features=max_features, n_trees=50) rclf.fit(x_train, y_train) preds_train = rclf.predict(x_train) preds_test = rclf.predict(x_test) features.append(max_features) accuracy_training.append(accuracy_score(preds_train, y_train)) accuracy_testing.append(accuracy_score(preds_test, y_test)) f1_training.append(calc_f1(preds_train, y_train)) f1_testing.append(calc_f1(preds_test, y_test)) f1 = plt.figure(1) plt.plot(features, accuracy_training) plt.plot(features, accuracy_testing) plt.title("Accuracy vs Max Features") plt.ylabel("Accuracy") plt.xlabel("Max Features") plt.legend(['Training Accuracy', 'Testing Accuracy']) f1.show() f2 = plt.figure(2) plt.plot(features, f1_training) plt.plot(features, f1_testing) plt.title("F1 vs Max Features") plt.ylabel("F1") plt.xlabel("Max Features") plt.legend(['Training F1', 'Testing F1']) plt.show()
def evaluate(model, criterion, features_batches, support_batches, labels_batches, mask_batches, nodes, device): # Evaluate model total_pred = [] total_lab = [] total_loss = 0 total_acc = 0 total_nodes = 0 num_batches = len(features_batches) for i in range(num_batches): features_b = features_batches[i] support_b = support_batches[i] label_b = labels_batches[i] mask_b = mask_batches[i] num_data_b = np.sum(mask_b) if num_data_b == 0: continue else: # evaluate function features = torch.from_numpy(features_b).to(device) labels = torch.LongTensor(label_b).to(device) i = torch.LongTensor(support_b[0]) # indices v = torch.FloatTensor(support_b[1]) # values adj = torch.sparse.FloatTensor(i.t(), v, support_b[2]).to(device) model.to(device) output = model(adj, features) if args.multilabel: loss = criterion(output[mask_b], labels[mask_b].type_as(output)) pred = output[mask_b] pred[pred > 0] = 1 pred[pred <= 0] = 0 labels = labels[mask_b] total_acc += torch.eq(pred, labels).all(dim=1).sum().item() else: loss = criterion(output[mask_b], torch.max(labels[mask_b], 1)[1]) pred = output[mask_b].argmax(dim=1, keepdim=True) labels = torch.max(labels[mask_b], 1)[1] total_acc += torch.eq(pred.squeeze(), labels.squeeze()).sum().item() total_nodes += num_data_b total_pred.append(pred) total_lab.append(labels) total_loss += loss.item() total_pred = torch.cat(total_pred).cpu().squeeze().numpy() total_lab = torch.cat(total_lab).cpu().squeeze().numpy() loss = total_loss / num_batches acc = total_acc / total_nodes micro, macro = utils.calc_f1(total_pred, total_lab, args.multilabel) return loss, acc, micro, macro
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 with torch.no_grad(): for inputs, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) output = model(inputs) loss = criterion(output, target) loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) f1 = utils.calc_f1(target, output, threshold) f1_meter += f1 return loss_meter / it_count, f1_meter / it_count
def decision_tree_testing_depth(x_train, y_train, x_test, y_test, min, max): print('#Decision Tree Depth Testing\n\n') accuracyTrain = np.zeros(max - min) accuracyTest = np.zeros(max - min) f1Train = np.zeros(max - min) f1Test = np.zeros(max - min) depths = np.arange(min, max) index = 0 for depth in depths: clf = DecisionTreeClassifier(max_depth=depth) clf.fit(x_train, y_train) preds_train = clf.predict(x_train) preds_test = clf.predict(x_test) accuracyTrain[index] = accuracy_score(preds_train, y_train) accuracyTest[index] = accuracy_score(preds_test, y_test) preds = clf.predict(x_test) f1Test[index] = calc_f1(preds_train, y_train) f1Train[index] = calc_f1(preds_test, y_test) index += 1 f1 = plt.figure(1) plt.plot(depths, accuracyTrain) plt.plot(depths, accuracyTest) plt.title("accuracy vs number of trees") plt.ylabel("Accuracy") plt.xlabel("Depth") plt.legend(['Training Accuracy', 'Testing Accuracy']) f1.show() f2 = plt.figure(2) plt.plot(depths, f1Train) plt.plot(depths, f1Test) plt.title("F1 vs number of trees") plt.ylabel("F1") plt.xlabel("Depth") plt.legend(['Training F1', 'Testing F1']) plt.show()
def fine_tuning(train_loader, model, criterion, optimizer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() microF1 = AverageMeter() macroF1 = AverageMeter() model.train() end = time.time() bar = Bar('Training', max=len(train_loader)) for batch_idx, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda() output = model(input) loss = criterion(output, target.float()) target = target.data.cpu().float() output = output.data.cpu() micro, macro = calc_f1(target, output) losses.update(loss.item(), input.size(0)) microF1.update(micro.item(), input.size(0)) macroF1.update(macro.item(), input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() model.weight_norm() bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Micro-f1: {microF1: .4f} |Macro-f1: {macroF1: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, microF1=microF1.avg, macroF1=macroF1.avg, ) bar.next() bar.finish() return (losses.avg, microF1.avg, macroF1.avg)
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 with torch.no_grad(): if torch.cuda.is_available(): label_all = torch.Tensor().cuda() pred_all = torch.Tensor().cuda() else: label_all = torch.Tensor() pred_all = torch.Tensor() # tq = tqdm.tqdm(total=len(val_dataloader) * config.batch_size) for inputs, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) output, _ = model(inputs) it_count += 1 label_all = torch.cat((label_all, target), 0) pred_all = torch.cat((pred_all, output), 0) # tq.update(config.batch_size) # tq.close() output = pred_all target = label_all loss = criterion(output, target) loss_meter = loss.item() output = torch.sigmoid(output) if args.model_kind == 1: f1 = utils.calc_f1(target, output, threshold) else: f1 = utils2.calc_f1(target, output, threshold) acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1( target, output, threshold) fout = open('log.txt', 'a+', encoding='utf-8') fout.write('\n' + '*' * 20 + '\n') fout.write('acc:' + str(acc) + '\n') fout.write('true_positives:' + str(true_positives) + '\n') fout.write('real_positives:' + str(real_positives) + '\n') fout.write('predicted_positives:' + str(predicted_positives) + '\n') fout.close() # f1_meter += f1 return loss_meter, f1, target, output
def train_epoch(model, optimizer, criterion, train_dataloader, epoch, lr, best_f1, val_dataloader, model_save_dir, state, round_): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 #tq = tqdm.tqdm(total=len(train_dataloader)*config.batch_size) #tq.set_description('epoch %d, lr %.4f, best_f:%.4f' % (epoch, lr, best_f1)) for i, (inputs, target) in enumerate(train_dataloader): inputs = inputs.to(device) target = target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward output, _ = model(inputs) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 if args.model_kind == 1: f1 = utils.calc_f1(target, output, 0.5) else: f1 = utils2.calc_f1(target, output, 0.5) f1_meter += f1 #tq.update(config.batch_size) if epoch > round_ and i % 127 == 126: val_loss, val_f1, _, _ = val_epoch(model, criterion, val_dataloader) if best_f1 < val_f1: best_f1 = val_f1 state['state_dict'] = model.state_dict() save_ckpt(state, True, model_save_dir) # print('save best') print('#epoch:%02d val_loss:%0.3e val_f1:%.3f' % (epoch, val_loss, val_f1)) #tq.set_postfix(loss="%.4f f1:%.3f" % (loss.item(), f1)) #tq.close() #if it_count != 0 and it_count % show_interval == 0: # print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1), end='\r') return loss_meter / it_count, f1_meter / it_count, best_f1
def evaluate(sess, model, val_features_batches, val_support_batches, y_val_batches, val_mask_batches, val_data, placeholders, clusters_adj): """evaluate GCN model.""" total_pred = [] total_lab = [] total_loss = 0 total_acc = 0 num_batches = len(val_features_batches) for batch_id in range(num_batches): features_b = val_features_batches[batch_id] support_b = val_support_batches[batch_id] y_val_b = y_val_batches[batch_id] val_mask_b = val_mask_batches[batch_id] num_data_b = np.sum(val_mask_b) if clusters_adj is not None: cluster_adj = clusters_adj[batch_id] if num_data_b == 0: continue else: feed_dict = utils.construct_feed_dict(features_b, support_b, y_val_b, val_mask_b, placeholders) outs = sess.run([model.loss, model.accuracy, model.outputs], feed_dict=feed_dict) total_pred.append(outs[2][val_mask_b]) total_lab.append(y_val_b[val_mask_b]) total_loss += outs[0] * num_data_b total_acc += outs[1] * num_data_b total_pred = np.vstack(total_pred) total_lab = np.vstack(total_lab) # import pdb; pdb.set_trace() sp.save_npz(f'cluster/clusters_adj', clusters_adj) np.save(f'cluster/cluster_y', total_lab) np.save(f'cluster/total_pred', total_pred) loss = total_loss / len(val_data) acc = total_acc / len(val_data) micro, macro = utils.calc_f1(total_pred, total_lab, FLAGS.multilabel) return loss, acc, micro, macro
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter,acc_meter,recall_meter,precision_meter,loss_meter, it_count = 0, 0, 0,0,0,0 with torch.no_grad(): for inputs, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) output = model(inputs) loss = criterion(output, target) loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) f1 = utils.calc_f1(target, output, threshold) acc = utils.cal_accuracy_score(target,output) recall = utils.cal_recall_score(target,output) precision = utils.cal_percision_score(target,output) f1_meter += f1 acc_meter +=acc recall_meter +=recall precision_meter += precision return loss_meter / it_count, f1_meter / it_count , acc_meter / it_count , recall_meter / it_count, precision_meter / it_count
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 with torch.no_grad(): for inputs, target in val_dataloader: inputs = inputs.to(device) target, sex, age = splitTarget(target) target, sex, age = target.to(device), sex.to(device), age.to( device) if config.fuse == 'False': output = model(inputs) elif config.fuse == 'True': output = model(inputs) else: raise ValueError( 'Not supported choise for \'config.fuse\' item in test phase!' ) loss = criterion(output, target) loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) f1 = utils.calc_f1(target, output, threshold) f1_meter += f1 return loss_meter / it_count, f1_meter / it_count
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 with torch.no_grad(): for inputs, fr, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) fr = fr.to(device) if config.kind == 1: output = model(inputs, fr) elif config.kind == 2: output, _ = model(inputs) else: output = model(inputs) if config.kind == 2 and config.top4_DeepNN_tag: output = output[:, config.top4_tag_list] target = target[:, config.top4_tag_list] loss = criterion(output, target) loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) f1 = utils.calc_f1(target, output, threshold) f1_meter += f1 return loss_meter / it_count, f1_meter / it_count
if (args.command == "val"): if torch.cuda.is_available(): label_all = torch.Tensor().cuda() pred_all = torch.Tensor().cuda() else: label_all = torch.Tensor() pred_all = torch.Tensor() for i in range(5): #if i!=3: # continue config.train_data = 'path/train' args.fold = i target, output = val(args) label_all = torch.cat((label_all, target), 0) pred_all = torch.cat((pred_all, output), 0) f1 = utils.calc_f1(label_all, pred_all, 0.5) acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1( label_all, pred_all, 0.5) fout = open('log.txt', 'a+', encoding='utf-8') fout.write('\n' + '*' * 20 + '\n') fout.write('acc:' + str(acc) + '\n') fout.write('true_positives:' + str(true_positives) + '\n') fout.write('real_positives:' + str(real_positives) + '\n') fout.write('predicted_positives:' + str(predicted_positives) + '\n') fout.close() # acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1(target, output, 0.5) print('f1:%.4f' % (f1)) if (args.command == "check"):
def fine_tuning(train_loader, model, criterion, optimizer): F1 = np.zeros(54) score_micro = np.zeros(3) score_macro = np.zeros(3) data_time = AverageMeter() losses = AverageMeter() microF1 = AverageMeter() macroF1 = AverageMeter() model.train() test_p1, test_p3, test_p5 = 0, 0, 0 test_ndcg1, test_ndcg3, test_ndcg5 = 0, 0, 0 end = time.time() # bar = Bar('Training', max=len(train_loader)) for batch_idx, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda() output = model(input) loss = criterion(output, target.float()) target = target.data.cpu().float() output = output.data.cpu() micro, macro = calc_f1(target, output) losses.update(loss.item(), input.size(0)) microF1.update(micro.item(), input.size(0)) macroF1.update(macro.item(), input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time _p1, _p3, _p5 = precision_k(output.topk(k=5)[1].numpy(), target.numpy(), k=[1, 3, 5]) test_p1 += _p1 test_p3 += _p3 test_p5 += _p5 _ndcg1, _ndcg3, _ndcg5 = ndcg_k(output.topk(k=5)[1].numpy(), target.numpy(), k=[1, 3, 5]) test_ndcg1 += _ndcg1 test_ndcg3 += _ndcg3 test_ndcg5 += _ndcg5 output[output > 0.5] = 1 output[output <= 0.5] = 0 for l in range(54): F1[l] += f1_score(target[:, l], output[:, l], average='binary') # precision[l] += precision_score(target[:, l], output[:, l], average='binary') # recall[l] += recall_score(target[:, l], output[:, l], average='binary') # micro, macro = calc_f1(target, output) # acc += accuracy_score(target, output) # print("acc",acc) score_micro += [ precision_score(target, output, average='micro'), recall_score(target, output, average='micro'), f1_score(target, output, average='micro') ] score_macro += [ precision_score(target, output, average='macro'), recall_score(target, output, average='macro'), f1_score(target, output, average='macro') ] # acc = calc_acc(target, output) np.set_printoptions(formatter={'float': '{: 0.4}'.format}) print('the result of F1: \n', F1 / len(train_loader)) print('the result of micro: \n', score_micro / len(train_loader)) print('the result of macro: \n', score_macro / len(train_loader)) test_p1 /= len(train_loader) test_p3 /= len(train_loader) test_p5 /= len(train_loader) test_ndcg1 /= len(train_loader) test_ndcg3 /= len(train_loader) test_ndcg5 /= len(train_loader) print("precision@1 : %.4f , precision@3 : %.4f , precision@5 : %.4f " % (test_p1, test_p3, test_p5)) print("ndcg@1 : %.4f , ndcg@3 : %.4f , ndcg@5 : %.4f " % (test_ndcg1, test_ndcg3, test_ndcg5))
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset assert args.dataset == 'amazon2m' g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64) val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64) # Normalize features features = torch.FloatTensor(data['feat']) if args.normalize: train_feats = features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(features) features = torch.FloatTensor(features) in_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model if args.self_loop: print("adding self-loop edges") g = add_self_loop(g) # g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False raise ValueError('no cuda') else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) train_cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) val_cluster_iterator = ClusterIter( args.dataset, g, args.psize_val, 1, val_nid, use_pp=False) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(train_cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(train_cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(train_cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: total_f1_mic = [] total_f1_mac = [] model.eval() for j, cluster in enumerate(val_cluster_iterator): cluster.copy_from_parent() with torch.no_grad(): logits = model(cluster) batch_labels = cluster.ndata['labels'] # batch_val_mask = cluster.ndata['val_mask'] val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(), logits.cpu().numpy(), multitask) total_f1_mic.append(val_f1_mic) total_f1_mac.append(val_f1_mac) val_f1_mic = np.mean(total_f1_mic) val_f1_mac = np.mean(total_f1_mac) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl')))
label = ner ner = nn.utils.rnn.pad_sequence(ner, batch_first=True).type( torch.LongTensor) ner = ner.cuda() with torch.no_grad(): pred = model(X, mask_X, length) loss = model.cal_loss(X, mask_X, length, label=ner) for i, item in enumerate(pred): pred_set.append(item[0:length.cpu().numpy()[i]]) #pred_set.extend(pred) for item in label: label_set.append(item.numpy()) valid_loss += loss.item() valid_loss = valid_loss / len(dev_X) acc, recall, f1, pred_result, label_result = calc_f1( pred_set, label_set, data_manager.ner_list) INFO = 'epoch %d, train loss %f, valid loss %f, acc %f, recall %f, f1 %f ' % ( epoch, train_loss, valid_loss, acc, recall, f1) logging.info(INFO) print(INFO) if epoch == 0: break pred_result = cal_ner_result(pred_set, data_manager.ner_list) label_result = cal_ner_result(label_set, data_manager.ner_list) #acc,recall,f1,pred_result,label_result = calc_f1(pred_set, label_set, dev_X, data_manager.ner_list) #INFO = 'epoch %d, train loss %f, valid loss %f, acc %f, recall %f, f1 %f '% (epoch, train_loss, valid_loss,acc,recall,f1) #logging.info(INFO) #print(INFO) #print(INFO+'\t'+INFO_THRE) # 正负样本分析