def train(self): ''' 训练模型,必须实现此方法 :return: ''' dataset = CarDataset() # 24 + 1 model = fasterRCNN(num_classes=25) load_pretrained_weights(model, './weights/resnet50-19c8e357.pth') model = model.cuda() model.train() # fang[-1] optimizer = build_optimizer(model, optim='adam') max_epoch = args.EPOCHS batch_size = args.BATCH scheduler = build_scheduler(optimizer, lr_scheduler='cosine', max_epoch=max_epoch) train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, collate_fn=self.my_collate) cudnn.benchmark = True for epoch in range(max_epoch): for index, data in enumerate(train_loader): ims, cls_labels, bbox_labels = data targets = [] for i in range(len(ims)): d = {} d['labels'] = torch.tensor(cls_labels[i], dtype=torch.long).cuda() d['boxes'] = torch.tensor(bbox_labels[i], dtype=torch.long).cuda() targets.append(d) ims = torch.tensor([im.cpu().detach().numpy() for im in ims]) ims = ims.cuda() out = model(ims, targets) loss_classifier = out['loss_classifier'] loss_box_reg = out['loss_box_reg'] loss_objectness = out['loss_objectness'] loss_rpn_box_reg = out['loss_rpn_box_reg'] loss = 0.5 * loss_classifier + 5 * loss_box_reg + loss_objectness + 10 * loss_rpn_box_reg loss.backward() optimizer.step() if index % 10 == 0: print( "Epoch: [{}/{}][{}/{}] Loss: loss_classifier: {:.2f}, " "loss_box_reg: {:.2f}, loss_objectness: {:.2f}, " "loss_rpn_box_reg: {:.2f}, total loss: {:.2f}".format( epoch + 1, max_epoch, index + 1, len(train_loader), loss_classifier, loss_box_reg, loss_objectness, loss_rpn_box_reg, loss)) # n_iter = epoch*len(train_loader) + index # writer.add_scalar('loss', loss, n_iter) scheduler.step() if (epoch + 1) % 100 == 0: torch.save(model.state_dict(), 'last.pth')
def train(self): ''' 训练模型,必须实现此方法 :return: ''' dataset = FacialBeautyDataset() model = osnet_x1_0(num_classes=1, pretrained=True, loss='smoothL1Loss', use_gpu=True) # load_pretrained_weights(model, './weights/pretrained/osnet_x1_0_imagenet.pth') path = remote_helper.get_remote_data( 'https://www.flyai.com/m/osnet_x1_0_imagenet.pth') load_pretrained_weights(model, path) model = model.cuda() optimizer = build_optimizer(model) max_epoch = args.EPOCHS batch_size = args.BATCH scheduler = build_scheduler(optimizer, lr_scheduler='cosine', max_epoch=max_epoch) criterion = nn.SmoothL1Loss() model.train() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) cudnn.benchmark = True for epoch in range(max_epoch): for index, data in enumerate(train_loader): im, label = data im = im.cuda() label = label.float().cuda() optimizer.zero_grad() out = model(im) loss = criterion(out, label) loss.backward() optimizer.step() if index % 100 == 0: print("Epoch: [{}/{}][{}/{}] Loss {:.4f}".format( epoch + 1, max_epoch, index + 1, len(train_loader), loss)) scheduler.step() torch.save(model.state_dict(), 'last.pth')
def train(args): total_epoch=args.epoch batch=args.batch num_workers=args.num_workers lr_init = args.lr logSoftMax=LogSoftMax() selCroEntLoss=SelectCrossEntropyLoss() weigthL1Loss=WeightL1Loss() model=buildlModel("train") trainLoader=build_data_loader(batch,num_workers) data_length=len(trainLoader) optimizer=build_optimizer(model,lr_init) for epoch in range(total_epoch): try: for step,data in enumerate(trainLoader): label_cls = data['label_cls'] label_loc = data['label_loc'] label_loc_weight = data['label_loc_weight'] cls,loc=model(data) cls=logSoftMax(cls) cls_loss=selCroEntLoss(cls,label_cls) loc_loss=weigthL1Loss(loc,label_loc,label_loc_weight) loss=process_loss(loc_loss,cls_loss)["total_loss"] print("epoch:{epoch} step:{step} loss:{loss}".format(epoch=epoch,step=step,loss=loss)) optimizer.zero_grad() loss.backward() optimizer.step() optimizer=adjust_optimizer(model,total_epoch,epoch,lr_init) except: pass
def train_gnn_mdi(data, args, log_path, device=torch.device('cpu')): model = get_gnn(data, args).to(device) if args.impute_hiddens == '': impute_hiddens = [] else: impute_hiddens = list(map(int, args.impute_hiddens.split('_'))) if args.concat_states: input_dim = args.node_dim * len(model.convs) * 2 else: input_dim = args.node_dim * 2 if hasattr(args, 'ce_loss') and args.ce_loss: output_dim = len(data.class_values) else: output_dim = 1 impute_model = MLPNet(input_dim, output_dim, hidden_layer_sizes=impute_hiddens, hidden_activation=args.impute_activation, dropout=args.dropout).to(device) if args.transfer_dir: # this ensures the valid mask is consistant load_path = './{}/test/{}/{}/'.format(args.domain, args.data, args.transfer_dir) print("loading fron {} with {}".format(load_path, args.transfer_extra)) model = torch.load(load_path + 'model' + args.transfer_extra + '.pt', map_location=device) impute_model = torch.load(load_path + 'impute_model' + args.transfer_extra + '.pt', map_location=device) trainable_parameters = list(model.parameters()) \ + list(impute_model.parameters()) print("total trainable_parameters: ", len(trainable_parameters)) # build optimizer scheduler, opt = build_optimizer(args, trainable_parameters) # train Train_loss = [] Test_rmse = [] Test_l1 = [] Lr = [] x = data.x.clone().detach().to(device) if hasattr(args, 'split_sample') and args.split_sample > 0.: if args.split_train: all_train_edge_index = data.lower_train_edge_index.clone().detach( ).to(device) all_train_edge_attr = data.lower_train_edge_attr.clone().detach( ).to(device) all_train_labels = data.lower_train_labels.clone().detach().to( device) else: all_train_edge_index = data.train_edge_index.clone().detach().to( device) all_train_edge_attr = data.train_edge_attr.clone().detach().to( device) all_train_labels = data.train_labels.clone().detach().to(device) if args.split_test: test_input_edge_index = data.higher_train_edge_index.clone( ).detach().to(device) test_input_edge_attr = data.higher_train_edge_attr.clone().detach( ).to(device) else: test_input_edge_index = data.train_edge_index.clone().detach().to( device) test_input_edge_attr = data.train_edge_attr.clone().detach().to( device) test_edge_index = data.higher_test_edge_index.clone().detach().to( device) test_edge_attr = data.higher_test_edge_attr.clone().detach().to(device) test_labels = data.higher_test_labels.clone().detach().to(device) else: all_train_edge_index = data.train_edge_index.clone().detach().to( device) all_train_edge_attr = data.train_edge_attr.clone().detach().to(device) all_train_labels = data.train_labels.clone().detach().to(device) test_input_edge_index = all_train_edge_index test_input_edge_attr = all_train_edge_attr test_edge_index = data.test_edge_index.clone().detach().to(device) test_edge_attr = data.test_edge_attr.clone().detach().to(device) test_labels = data.test_labels.clone().detach().to(device) if hasattr(data, 'class_values'): class_values = data.class_values.clone().detach().to(device) if args.valid > 0.: valid_mask = get_known_mask(args.valid, int(all_train_edge_attr.shape[0] / 2)).to(device) print("valid mask sum: ", torch.sum(valid_mask)) train_labels = all_train_labels[~valid_mask] valid_labels = all_train_labels[valid_mask] double_valid_mask = torch.cat((valid_mask, valid_mask), dim=0) valid_edge_index, valid_edge_attr = mask_edge(all_train_edge_index, all_train_edge_attr, double_valid_mask, True) train_edge_index, train_edge_attr = mask_edge(all_train_edge_index, all_train_edge_attr, ~double_valid_mask, True) print("train edge num is {}, valid edge num is {}, test edge num is input {} output {}"\ .format( train_edge_attr.shape[0], valid_edge_attr.shape[0], test_input_edge_attr.shape[0], test_edge_attr.shape[0])) Valid_rmse = [] Valid_l1 = [] best_valid_rmse = np.inf best_valid_rmse_epoch = 0 best_valid_l1 = np.inf best_valid_l1_epoch = 0 else: train_edge_index, train_edge_attr, train_labels =\ all_train_edge_index, all_train_edge_attr, all_train_labels print("train edge num is {}, test edge num is input {}, output {}"\ .format( train_edge_attr.shape[0], test_input_edge_attr.shape[0], test_edge_attr.shape[0])) if args.auto_known: args.known = float( all_train_labels.shape[0]) / float(all_train_labels.shape[0] + test_labels.shape[0]) print("auto calculating known is {}/{} = {:.3g}".format( all_train_labels.shape[0], all_train_labels.shape[0] + test_labels.shape[0], args.known)) obj = dict() obj['args'] = args obj['outputs'] = dict() for epoch in range(args.epochs): model.train() impute_model.train() known_mask = get_known_mask(args.known, int(train_edge_attr.shape[0] / 2)).to(device) double_known_mask = torch.cat((known_mask, known_mask), dim=0) known_edge_index, known_edge_attr = mask_edge(train_edge_index, train_edge_attr, double_known_mask, True) opt.zero_grad() x_embd = model(x, known_edge_attr, known_edge_index) pred = impute_model( [x_embd[train_edge_index[0]], x_embd[train_edge_index[1]]]) if hasattr(args, 'ce_loss') and args.ce_loss: pred_train = pred[:int(train_edge_attr.shape[0] / 2)] else: pred_train = pred[:int(train_edge_attr.shape[0] / 2), 0] if args.loss_mode == 1: pred_train[known_mask] = train_labels[known_mask] label_train = train_labels if hasattr(args, 'ce_loss') and args.ce_loss: loss = F.cross_entropy(pred_train, train_labels) else: loss = F.mse_loss(pred_train, label_train) loss.backward() opt.step() train_loss = loss.item() if scheduler is not None: scheduler.step(epoch) for param_group in opt.param_groups: Lr.append(param_group['lr']) model.eval() impute_model.eval() with torch.no_grad(): if args.valid > 0.: x_embd = model(x, train_edge_attr, train_edge_index) pred = impute_model([ x_embd[valid_edge_index[0], :], x_embd[valid_edge_index[1], :] ]) if hasattr(args, 'ce_loss') and args.ce_loss: pred_valid = class_values[ pred[:int(valid_edge_attr.shape[0] / 2)].max(1)[1]] label_valid = class_values[valid_labels] elif hasattr(args, 'norm_label') and args.norm_label: pred_valid = pred[:int(valid_edge_attr.shape[0] / 2), 0] pred_valid = pred_valid * max(class_values) label_valid = valid_labels label_valid = label_valid * max(class_values) else: pred_valid = pred[:int(valid_edge_attr.shape[0] / 2), 0] label_valid = valid_labels mse = F.mse_loss(pred_valid, label_valid) valid_rmse = np.sqrt(mse.item()) l1 = F.l1_loss(pred_valid, label_valid) valid_l1 = l1.item() if valid_l1 < best_valid_l1: best_valid_l1 = valid_l1 best_valid_l1_epoch = epoch if args.save_model: torch.save(model, log_path + 'model_best_valid_l1.pt') torch.save(impute_model, log_path + 'impute_model_best_valid_l1.pt') if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse best_valid_rmse_epoch = epoch if args.save_model: torch.save(model, log_path + 'model_best_valid_rmse.pt') torch.save( impute_model, log_path + 'impute_model_best_valid_rmse.pt') Valid_rmse.append(valid_rmse) Valid_l1.append(valid_l1) x_embd = model(x, test_input_edge_attr, test_input_edge_index) pred = impute_model( [x_embd[test_edge_index[0], :], x_embd[test_edge_index[1], :]]) if hasattr(args, 'ce_loss') and args.ce_loss: pred_test = class_values[pred[:int(test_edge_attr.shape[0] / 2)].max(1)[1]] label_test = class_values[test_labels] elif hasattr(args, 'norm_label') and args.norm_label: pred_test = pred[:int(test_edge_attr.shape[0] / 2), 0] pred_test = pred_test * max(class_values) label_test = test_labels label_test = label_test * max(class_values) else: pred_test = pred[:int(test_edge_attr.shape[0] / 2), 0] label_test = test_labels mse = F.mse_loss(pred_test, label_test) test_rmse = np.sqrt(mse.item()) l1 = F.l1_loss(pred_test, label_test) test_l1 = l1.item() if args.save_prediction: if epoch == best_valid_rmse_epoch: obj['outputs'][ 'best_valid_rmse_pred_test'] = pred_test.detach().cpu( ).numpy() if epoch == best_valid_l1_epoch: obj['outputs'][ 'best_valid_l1_pred_test'] = pred_test.detach().cpu( ).numpy() if args.mode == 'debug': torch.save(model, log_path + 'model_{}.pt'.format(epoch)) torch.save(impute_model, log_path + 'impute_model_{}.pt'.format(epoch)) Train_loss.append(train_loss) Test_rmse.append(test_rmse) Test_l1.append(test_l1) print('epoch: ', epoch) print('loss: ', train_loss) if args.valid > 0.: print('valid rmse: ', valid_rmse) print('valid l1: ', valid_l1) print('test rmse: ', test_rmse) print('test l1: ', test_l1) pred_train = pred_train.detach().cpu().numpy() label_train = label_train.detach().cpu().numpy() pred_test = pred_test.detach().cpu().numpy() label_test = label_test.detach().cpu().numpy() obj['curves'] = dict() obj['curves']['train_loss'] = Train_loss if args.valid > 0.: obj['curves']['valid_rmse'] = Valid_rmse obj['curves']['valid_l1'] = Valid_l1 obj['curves']['test_rmse'] = Test_rmse obj['curves']['test_l1'] = Test_l1 obj['lr'] = Lr obj['outputs']['final_pred_train'] = pred_train obj['outputs']['label_train'] = label_train obj['outputs']['final_pred_test'] = pred_test obj['outputs']['label_test'] = label_test pickle.dump(obj, open(log_path + 'result.pkl', "wb")) if args.save_model: torch.save(model, log_path + 'model.pt') torch.save(impute_model, log_path + 'impute_model.pt') # obj = objectview(obj) plot_curve(obj['curves'], log_path + 'curves.png', keys=None, clip=True, label_min=True, label_end=True) plot_curve(obj, log_path + 'lr.png', keys=['lr'], clip=False, label_min=False, label_end=False) plot_sample(obj['outputs'], log_path + 'outputs.png', groups=[['final_pred_train', 'label_train'], ['final_pred_test', 'label_test']], num_points=20) if args.save_prediction and args.valid > 0.: plot_sample(obj['outputs'], log_path + 'outputs_best_valid.png', groups=[['best_valid_rmse_pred_test', 'label_test'], ['best_valid_l1_pred_test', 'label_test']], num_points=20) if args.valid > 0.: print("best valid rmse is {:.3g} at epoch {}".format( best_valid_rmse, best_valid_rmse_epoch)) print("best valid l1 is {:.3g} at epoch {}".format( best_valid_l1, best_valid_l1_epoch))
def train(self): ''' 训练模型,必须实现此方法 :return: ''' train_dataset = FacialBeautyDataset(mode='train') test_dataset = FacialBeautyDataset(mode='test') # net_x1_0(num_classes=1, pretrained=True, loss='smoothL1Loss', use_gpu=True) # load_pretrained_weights(model, './weights/pretrained/osnet_x1_0_imagenet.pth') # path = remote_helper.get_remote_data('https://www.flyai.com/m/senet154-c7b49a05.pth') path = 'data/input/model/senet154-c7b49a05.pth' # model = inception(num_classes=1) # model = resnet101(num_classes=1) model = senet154(num_classes=1) load_pretrained_weights(model, path) # model = inception(weight='./weights/bn_inception-52deb4733.pth', num_classes=1) model = model.cuda() optimizer = build_optimizer(model, optim='adam') max_epoch = args.EPOCHS batch_size = args.BATCH # scheduler = build_scheduler(optimizer, lr_scheduler='multi_step', stepsize=[20, 30]) scheduler = build_scheduler(optimizer, lr_scheduler='cosine', max_epoch=max_epoch) # criterion = nn.MSELoss() # criterion = EDMLoss().cuda() criterion1 = nn.CrossEntropyLoss() criterion2 = nn.MSELoss() train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=1) cudnn.benchmark = True writer = tb.SummaryWriter() print(len(test_loader)) for epoch in range(max_epoch): model.train() for index, data in enumerate(train_loader): im, cls_label, val_label = data im = im.cuda() val_label = val_label.float().unsqueeze(1).cuda() # print(label.shape) # print(im.shape) # fang[-1] optimizer.zero_grad() out1, out2 = model(im) # loss = criterion(out, label) cls_loss = criterion1(out1, cls_label) reg_loss = criterion2(out2, val_label) loss = cls_loss + reg_loss loss.backward() optimizer.step() if index % 50 == 0: print("Epoch: [{}/{}][{}/{}] Loss {:.6f}".format( epoch + 1, max_epoch, index + 1, len(train_loader), loss * 5.0)) num_epochs = epoch * len(train_loader) + index # print(num_epochs) writer.add_scalar('loss', loss, num_epochs) scheduler.step() if (epoch + 1) % 2 == 0: model.eval() sum_r = 0. for data in test_loader: im, cls_label, val_label = data im = im.cuda() y1, y2 = model(im) y1 = nn.Softmax(y1).cpu().detach().numpy() index = np.argmax(y1, axis=1) y2 = y2.cpu().detach().numpy()[0][0] y = index + y2 y_gt = cls_label + val_label sum_r += (y - y_gt)**2 RMSE = sum_r num_epochs = epoch writer.add_scalar('sum-rmse', RMSE, num_epochs) print('RMSE:{}'.format(RMSE)) # torch.save(model.state_dict(), 'net_{}.pth'.format(str(epoch+1))) torch.save(model.state_dict(), 'last.pth') writer.close()
import os import torch import torchvision from torch import nn from torch import optim from torchvision import datasets, transforms from utils.utils import build_network, build_optimizer, build_writer, read_cfg, get_device from utils.meters import AverageMeter from dataset.DsoftDataset import DsoftDataset from trainer.DsoftTrainer import DsoftTrainer cfg = read_cfg(config_path='config/config.yaml') network = build_network(cfg=cfg) optimizer = build_optimizer(cfg=cfg, network=network) criterion = nn.BCEWithLogitsLoss() device = get_device(cfg=cfg) dump_input = torch.randn((1, 3, 224, 224)) writer = build_writer(cfg=cfg) writer.add_graph(network, input_to_model=dump_input) train_transform = transforms.Compose([ transforms.RandomResizedCrop(cfg['model']['image_size'][0]), transforms.RandomHorizontalFlip(), transforms.ToTensor(),
with open(os.path.join(output_dir, 'model_print'), 'w') as f: print(change_detector, file=f) print(speaker, file=f) print(spatial_info, file=f) # Data loading part train_dataset, train_loader = create_dataset(cfg, 'train') val_dataset, val_loader = create_dataset(cfg, 'val') train_size = len(train_dataset) val_size = len(val_dataset) # Define loss function and optimizer lang_criterion = LanguageModelCriterion().to(device) entropy_criterion = EntropyLoss().to(device) all_params = list(change_detector.parameters()) + list(speaker.parameters()) optimizer = build_optimizer(all_params, cfg) lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=cfg.train.optim.step_size, gamma=cfg.train.optim.gamma) # Train loop t = 0 epoch = 0 set_mode('train', [change_detector, speaker]) ss_prob = speaker.ss_prob while t < cfg.train.max_iter: epoch += 1 print('Starting epoch %d' % epoch)
def train_gnn_y(data, args, log_path, device=torch.device('cpu')): model = get_gnn(data, args).to(device) if args.impute_hiddens == '': impute_hiddens = [] else: impute_hiddens = list(map(int, args.impute_hiddens.split('_'))) if args.concat_states: input_dim = args.node_dim * len(model.convs) * 2 else: input_dim = args.node_dim * 2 impute_model = MLPNet(input_dim, 1, hidden_layer_sizes=impute_hiddens, hidden_activation=args.impute_activation, dropout=args.dropout).to(device) if args.predict_hiddens == '': predict_hiddens = [] else: predict_hiddens = list(map(int, args.predict_hiddens.split('_'))) n_row, n_col = data.df_X.shape predict_model = MLPNet(n_col, 1, hidden_layer_sizes=predict_hiddens, dropout=args.dropout).to(device) trainable_parameters = list(model.parameters()) \ + list(impute_model.parameters()) \ + list(predict_model.parameters()) # build optimizer scheduler, opt = build_optimizer(args, trainable_parameters) # train Train_loss = [] Test_rmse = [] Test_l1 = [] Lr = [] x = data.x.clone().detach().to(device) y = data.y.clone().detach().to(device) edge_index = data.edge_index.clone().detach().to(device) train_edge_index = data.train_edge_index.clone().detach().to(device) train_edge_attr = data.train_edge_attr.clone().detach().to(device) all_train_y_mask = data.train_y_mask.clone().detach().to(device) test_y_mask = data.test_y_mask.clone().detach().to(device) if args.valid > 0.: valid_mask = get_known_mask(args.valid, all_train_y_mask.shape[0]).to(device) valid_mask = valid_mask*all_train_y_mask train_y_mask = all_train_y_mask.clone().detach() train_y_mask[valid_mask] = False valid_y_mask = all_train_y_mask.clone().detach() valid_y_mask[~valid_mask] = False print("all y num is {}, train num is {}, valid num is {}, test num is {}"\ .format( all_train_y_mask.shape[0],torch.sum(train_y_mask), torch.sum(valid_y_mask),torch.sum(test_y_mask))) Valid_rmse = [] Valid_l1 = [] best_valid_rmse = np.inf best_valid_rmse_epoch = 0 best_valid_l1 = np.inf best_valid_l1_epoch = 0 else: train_y_mask = all_train_y_mask.clone().detach() print("all y num is {}, train num is {}, test num is {}"\ .format( all_train_y_mask.shape[0],torch.sum(train_y_mask), torch.sum(test_y_mask))) for epoch in range(args.epochs): model.train() impute_model.train() predict_model.train() known_mask = get_known_mask(args.known, int(train_edge_attr.shape[0] / 2)).to(device) double_known_mask = torch.cat((known_mask, known_mask), dim=0) known_edge_index, known_edge_attr = mask_edge(train_edge_index, train_edge_attr, double_known_mask, True) opt.zero_grad() x_embd = model(x, known_edge_attr, known_edge_index) X = impute_model([x_embd[edge_index[0, :int(n_row * n_col)]], x_embd[edge_index[1, :int(n_row * n_col)]]]) X = torch.reshape(X, [n_row, n_col]) pred = predict_model(X)[:, 0] pred_train = pred[train_y_mask] label_train = y[train_y_mask] loss = F.mse_loss(pred_train, label_train) loss.backward() opt.step() train_loss = loss.item() if scheduler is not None: scheduler.step(epoch) for param_group in opt.param_groups: Lr.append(param_group['lr']) model.eval() impute_model.eval() predict_model.eval() with torch.no_grad(): if args.valid > 0.: x_embd = model(x, train_edge_attr, train_edge_index) X = impute_model([x_embd[edge_index[0, :int(n_row * n_col)]], x_embd[edge_index[1, :int(n_row * n_col)]]]) X = torch.reshape(X, [n_row, n_col]) pred = predict_model(X)[:, 0] pred_valid = pred[valid_y_mask] label_valid = y[valid_y_mask] mse = F.mse_loss(pred_valid, label_valid) valid_rmse = np.sqrt(mse.item()) l1 = F.l1_loss(pred_valid, label_valid) valid_l1 = l1.item() if valid_l1 < best_valid_l1: best_valid_l1 = valid_l1 best_valid_l1_epoch = epoch torch.save(model, log_path + 'model_best_valid_l1.pt') torch.save(impute_model, log_path + 'impute_model_best_valid_l1.pt') torch.save(predict_model, log_path + 'predict_model_best_valid_l1.pt') if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse best_valid_rmse_epoch = epoch torch.save(model, log_path + 'model_best_valid_rmse.pt') torch.save(impute_model, log_path + 'impute_model_best_valid_rmse.pt') torch.save(predict_model, log_path + 'predict_model_best_valid_rmse.pt') Valid_rmse.append(valid_rmse) Valid_l1.append(valid_l1) x_embd = model(x, train_edge_attr, train_edge_index) X = impute_model([x_embd[edge_index[0, :int(n_row * n_col)]], x_embd[edge_index[1, :int(n_row * n_col)]]]) X = torch.reshape(X, [n_row, n_col]) pred = predict_model(X)[:, 0] pred_test = pred[test_y_mask] label_test = y[test_y_mask] mse = F.mse_loss(pred_test, label_test) test_rmse = np.sqrt(mse.item()) l1 = F.l1_loss(pred_test, label_test) test_l1 = l1.item() Train_loss.append(train_loss) Test_rmse.append(test_rmse) Test_l1.append(test_l1) print('epoch: ', epoch) print('loss: ', train_loss) if args.valid > 0.: print('valid rmse: ', valid_rmse) print('valid l1: ', valid_l1) print('test rmse: ', test_rmse) print('test l1: ', test_l1) pred_train = pred_train.detach().cpu().numpy() label_train = label_train.detach().cpu().numpy() pred_test = pred_test.detach().cpu().numpy() label_test = label_test.detach().cpu().numpy() obj = dict() obj['args'] = args obj['curves'] = dict() obj['curves']['train_loss'] = Train_loss if args.valid > 0.: obj['curves']['valid_rmse'] = Valid_rmse obj['curves']['valid_l1'] = Valid_l1 obj['curves']['test_rmse'] = Test_rmse obj['curves']['test_l1'] = Test_l1 obj['lr'] = Lr obj['outputs'] = dict() obj['outputs']['pred_train'] = pred_train obj['outputs']['label_train'] = label_train obj['outputs']['pred_test'] = pred_test obj['outputs']['label_test'] = label_test pickle.dump(obj, open(log_path + 'result.pkl', "wb")) torch.save(model, log_path + 'model.pt') torch.save(impute_model, log_path + 'impute_model.pt') torch.save(predict_model, log_path + 'predict_model.pt') # obj = objectview(obj) plot_curve(obj['curves'], log_path+'curves.png',keys=None, clip=True, label_min=True, label_end=True) plot_curve(obj, log_path+'lr.png',keys=['lr'], clip=False, label_min=False, label_end=False) plot_sample(obj['outputs'], log_path+'outputs.png', groups=[['pred_train','label_train'], ['pred_test','label_test'] ], num_points=20) if args.valid > 0.: print("best valid rmse is {:.3g} at epoch {}".format(best_valid_rmse,best_valid_rmse_epoch)) print("best valid l1 is {:.3g} at epoch {}".format(best_valid_l1,best_valid_l1_epoch))
def train_gnn_y(args, device=torch.device('cpu')): print( '################################data preprocessing##################################' ) seed_torch(args.seed) ##data loading data = load_data(args, 0) ## create tensor for transformer encoder # for explanation edges_trans_kfold = [] edges_timeDiff_kfold = [] edges_trans_kfold_test = [] edges_timeDiff_kfold_test = [] timeDiff_count_kfold_test = [] timeDiff_value_kfold_test = [] for k in range(args.kfold): #train edges_transformer = [] edges_timeDiff = [] for i, mask in enumerate(data.train_y_mask[k]): if mask == True: # Compute the positional encodings once in log space. edge_trans = data.edge_trans_train_kfold[k][i] edge_len = len(edge_trans) dim_pos = 16 pe = torch.zeros(edge_len, dim_pos) position = torch.arange(0, edge_len).unsqueeze(1) div_term = torch.exp( torch.arange(0, dim_pos, 2) * -(math.log(10000.0) / dim_pos)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) time_diff = torch.tensor(edge_trans) max_len = 150 if pe.size(0) >= max_len: zero_pad_len = 0 pe = pe[pe.size(0) - max_len:, :] tim_diff = time_diff[time_diff.size(0) - max_len:, :] else: zero_pad_len = max_len - pe.size(0) zero_pe = torch.zeros(zero_pad_len, dim_pos) zero_time_diff = torch.zeros(zero_pad_len, 1) pe = torch.cat([zero_pe, pe], dim=0) tim_diff = torch.cat([zero_time_diff, time_diff], dim=0) edges_transformer.append(pe) edges_timeDiff.append(tim_diff) edges_trans_kfold.append(torch.stack(edges_transformer)) edges_timeDiff_kfold.append(torch.stack(edges_timeDiff)) #test edges_transformer = [] edges_timeDiff = [] timeDiff_count = [] timeDiff_value = [] for i, mask in enumerate(data.test_y_mask[k]): if mask == True: # Compute the positional encodings once in log space. edge_trans = data.edge_trans_test_kfold[k][i] edge_len = len(edge_trans) dim_pos = 16 pe = torch.zeros(edge_len, dim_pos) position = torch.arange(0, edge_len).unsqueeze(1) div_term = torch.exp( torch.arange(0, dim_pos, 2) * -(math.log(10000.0) / dim_pos)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) time_diff = torch.tensor(edge_trans) ## padding if pe.size(0) >= max_len: zero_pad_len = 0 pe = pe[pe.size(0) - max_len:, :] tim_diff = time_diff[time_diff.size(0) - max_len:, :] timeDiff_count.append(max_len) timeDiff_value.append(time_diff[time_diff.size(0) - max_len:, :].view(1, -1)) else: timeDiff_count.append(pe.size(0)) timeDiff_value.append(time_diff.view(1, -1)) zero_pad_len = max_len - pe.size(0) zero_pe = torch.zeros(zero_pad_len, dim_pos) zero_time_diff = torch.zeros(zero_pad_len, 1) pe = torch.cat([zero_pe, pe], dim=0) tim_diff = torch.cat([zero_time_diff, time_diff], dim=0) edges_transformer.append(pe) edges_timeDiff.append(tim_diff) edges_trans_kfold_test.append(torch.stack(edges_transformer)) edges_timeDiff_kfold_test.append(torch.stack(edges_timeDiff)) timeDiff_count_kfold_test.append(timeDiff_count) timeDiff_value_kfold_test.append(timeDiff_value) n_row = data.user_num n_col = data.product_num x = data.x.clone().detach().to(device) edge_index = data.edge_index.detach().to(device) edge_attr = data.edge_attr.detach().to(device) train_edge_index = data.train_edge_index train_edge_attr = data.train_edge_attr test_edge_index = data.test_edge_index test_edge_attr = data.test_edge_attr dict_prev = data.dict_user_pro print( '################################training starts##################################' ) K_Fold = len(data.train_labels) print("K_Fold", K_Fold) num_item = 3 AUC_max_list = [] for k in range(K_Fold): print("K-th", k) AUC_list = [] ## k-th train and test labels edge_index = data.train_edge_index.detach().to(device) edge_attr = data.train_edge_attr.detach().to(device) train_y_labels = data.train_labels[k].clone().detach().to(device) train_y_mask = data.train_y_mask[k].clone().detach().to(device) edges_transformer = edges_trans_kfold[k].detach().to(device) edges_timeDiff = edges_timeDiff_kfold[k].detach().to(device) edge_index_test = data.test_edge_index.detach().to(device) edge_attr_test = data.test_edge_attr.detach().to(device) test_y_labels = data.test_labels[k].clone().detach().to(device) test_y_mask = data.test_y_mask[k].clone().detach().to(device) edges_transformer_test = edges_trans_kfold_test[k].detach().to(device) edges_timeDiff_test = edges_timeDiff_kfold_test[k].detach().to(device) model_sageGGNN = get_gnn(args).to(device) model_fuse = Fusion().to(device) model_trans_encoder = Encoder(16, args.node_dim, args.node_dim, max_len).to(device) model_trans_encoder_2 = Encoder_2(args.edge_dim, args.node_dim).to(device) predict_model = MLPNet(args.node_dim, 2).to(device) trainable_parameters = list(model_sageGGNN.parameters()) \ + list(model_trans_encoder.parameters()) \ + list(model_trans_encoder_2.parameters()) \ + list(model_fuse.parameters()) \ + list(predict_model.parameters()) # build optimizer scheduler, opt = build_optimizer(args, trainable_parameters) # train Train_loss = [] print("all y num is {}, train num is {}, test num is {}"\ .format( train_y_mask.shape[0],torch.sum(train_y_mask), torch.sum(test_y_mask))) for epoch in tqdm(range(args.epochs)): model_sageGGNN.train() predict_model.train() opt.zero_grad() x_full_hist = [] edge_attr_diff = edge_attr[:, 4].unsqueeze(-1) ##for encoder_2 x_embd, edge_attr_update = model_sageGGNN(x, edge_attr[:, :7], edge_index) edge_attr_merge = torch.cat([edge_attr_diff, edge_attr_update], dim=-1) ##for encoder_2 x_trans_encoder, _ = model_trans_encoder(edges_timeDiff) x_trans_encoder_2 = model_trans_encoder_2(x_embd, edge_attr_merge, edge_index) x_fuse = model_fuse(x_embd[train_y_mask], x_trans_encoder, x_trans_encoder_2[train_y_mask]) pred = predict_model(x_fuse) pred_train = pred label_train = train_y_labels correct_pred_train = correct_prediction( pred_train, label_train) / len(label_train) ## computing loss loss = F.cross_entropy(pred_train, label_train.long()) loss.backward() opt.step() train_loss = loss.item() if scheduler is not None: scheduler.step(epoch) ''' #for AUC pred_train_np = pred_train.detach().numpy() label_train_np = label_train.detach().numpy() pred_train_select = [entry[label_train_np[idx_train]] for idx_train, entry in enumerate(pred_train_np)] pred_train_select = np.array(pred_train_select) ''' predict_model.eval() with torch.no_grad(): edge_attr_diff = edge_attr_test[:, 4].unsqueeze( -1) ##for encoder_2 x_embd, edge_attr_update = model_sageGGNN( x, edge_attr_test[:, :7], edge_index_test) edge_attr_merge = torch.cat([edge_attr_diff, edge_attr_update], dim=-1) ##for encoder_2 x_trans_encoder, weights = model_trans_encoder( edges_timeDiff_test) x_trans_encoder_2 = model_trans_encoder_2( x_embd, edge_attr_merge, edge_index_test) x_fuse = model_fuse(x_embd[test_y_mask], x_trans_encoder, x_trans_encoder_2[test_y_mask]) pred = predict_model(x_fuse) pred_test = pred label_test = test_y_labels #for AUC pred_test_np = pred_test.cpu().numpy() pred_test = F.softmax(pred_test, dim=-1) label_test_np = label_test.cpu().numpy() pred_test_select = [ entry[1] for idx_test, entry in enumerate(pred_test_np) ] pred_test_select = np.array(pred_test_select) #Accuracy correct_pred_test = correct_prediction( pred_test, label_test) / len(label_test) #AUC AUC_test = roc_auc_score(label_test_np, pred_test_select) AUC_list.append(AUC_test) AUC_max_list.append(max(AUC_list)) print("AUC", AUC_max_list) print( '#################################################################################' ) print("AVE AUC", np.mean(AUC_max_list))
def train_gnn_mdi(data, args, log_path, device=torch.device('cpu')): model = get_gnn(data, args).to(device) if args.impute_hiddens == '': impute_hiddens = [] else: impute_hiddens = list(map(int, args.impute_hiddens.split('_'))) if args.concat_states: input_dim = args.node_dim * len(model.convs) * 2 else: input_dim = args.node_dim * 2 if hasattr(args, 'ce_loss') and args.ce_loss: output_dim = len(data.class_values) else: output_dim = 1 impute_model = MLPNet(input_dim, output_dim, hidden_layer_sizes=impute_hiddens, hidden_activation=args.impute_activation, dropout=args.dropout).to(device) if args.transfer_dir: # this ensures the valid mask is consistant load_path = './{}/test/{}/{}/'.format(args.domain, args.data, args.transfer_dir) print("loading fron {} with {}".format(load_path, args.transfer_extra)) model = torch.load(load_path + 'model' + args.transfer_extra + '.pt', map_location=device) impute_model = torch.load(load_path + 'impute_model' + args.transfer_extra + '.pt', map_location=device) trainable_parameters = list(model.parameters()) \ + list(impute_model.parameters()) print("total trainable_parameters: ", len(trainable_parameters)) # build optimizer scheduler, opt = build_optimizer(args, trainable_parameters) # train Train_loss = [] Test_rmse = [] Test_l1 = [] Lr = [] x = data.x.clone().detach().to(device) edge_index = data.edge_index.to(device) edge_attr = data.edge_attr.to(device) # print('edge_attr length is:',len(edge_attr)) edge_labels = data.edge_labels.to(device) train_edge_index = data.train_edge_index.clone().detach().to(device) train_edge_attr = data.train_edge_attr.clone().detach().to(device) train_labels = data.train_labels.clone().detach().to(device) if hasattr(data, 'class_values'): class_values = data.class_values.clone().detach().to(device) # if args.valid > 0.: # valid_mask = get_known_mask(args.valid, int(all_train_edge_attr.shape[0] / 2)).to(device) # print("valid mask sum: ",torch.sum(valid_mask)) # train_labels = all_train_labels[~valid_mask] # valid_labels = all_train_labels[valid_mask] # double_valid_mask = torch.cat((valid_mask, valid_mask), dim=0) # valid_edge_index, valid_edge_attr = mask_edge(all_train_edge_index, all_train_edge_attr, double_valid_mask, True) # train_edge_index, train_edge_attr = mask_edge(all_train_edge_index, all_train_edge_attr, ~double_valid_mask, True) # print("train edge num is {}, valid edge num is {}, test edge num is input {} output {}"\ # .format( # train_edge_attr.shape[0], valid_edge_attr.shape[0], # test_input_edge_attr.shape[0], test_edge_attr.shape[0])) # Valid_rmse = [] # Valid_l1 = [] # best_valid_rmse = np.inf # best_valid_rmse_epoch = 0 # best_valid_l1 = np.inf # best_valid_l1_epoch = 0 # else: print("train edge num is {}"\ .format( train_edge_attr.shape[0])) obj = dict() obj['args'] = args obj['outputs'] = dict() for epoch in range(args.epochs): print('epoch: ', epoch) model.train() impute_model.train() print('length of train_attr is:') print(len(train_edge_attr)) known_mask = get_train_mask(args.known, int(train_edge_attr.shape[0] / 2)).to(device) print(len(known_mask)) double_known_mask = torch.cat((known_mask, known_mask), dim=0) known_edge_index, known_edge_attr = mask_edge(train_edge_index, train_edge_attr, double_known_mask, True) # known_edge_index = data.train_edge_index # known_edge_attr = data.train_edge_attr print('length of known_edge_attr is', len(known_edge_attr)) opt.zero_grad() x_embd = model(x, known_edge_attr, known_edge_index) print(len(x_embd[0])) print('shape of x_embd is:') print(x_embd.shape) print('length of x_embd is:') print(len(x_embd)) # print(x_embd[1]) print('train_edge index:') print(train_edge_index[0]) print(len(train_edge_index[0])) print(train_edge_index[1]) pred = impute_model( [x_embd[train_edge_index[0]], x_embd[train_edge_index[1]]]) # print(pred) print('pred length is: ') print(len(pred)) print(pred) pred_train = pred[:int(train_edge_attr.shape[0] / 2)] print(pred_train) # pred_train = np.array(pred_train).reshape(-1, 8) # min_max_scaler = data.min_max_scaler # # origin_labels = origin_labels.reshape(-1,1) # pred_train = min_max_scaler.inverse_transform(pred_train) # print('predict train is :') # print(pred_train) if hasattr(args, 'ce_loss') and args.ce_loss: pred_train = pred[:int(train_edge_attr.shape[0] / 2)] else: pred_train = pred[:int(train_edge_attr.shape[0] / 2), 0] # pred_train = pred[:int(train_edge_attr.shape[0] / 2)] if args.loss_mode == 1: pred_train[known_mask] = train_labels[known_mask] label_train = train_labels if hasattr(args, 'ce_loss') and args.ce_loss: loss = F.cross_entropy(pred_train, label_train.long()) else: loss = F.mse_loss(pred_train, label_train) loss.backward() opt.step() train_loss = loss.item() if scheduler is not None: scheduler.step(epoch) for param_group in opt.param_groups: Lr.append(param_group['lr']) # model.eval() # impute_model.eval() # with torch.no_grad(): # if args.valid > 0.: # x_embd = model(x, train_edge_attr, train_edge_index) # pred = impute_model([x_embd[valid_edge_index[0], :], x_embd[valid_edge_index[1], :]]) # if hasattr(args,'ce_loss') and args.ce_loss: # pred_valid = class_values[pred[:int(valid_edge_attr.shape[0] / 2)].max(1)[1]] # label_valid = class_values[valid_labels] # elif hasattr(args,'norm_label') and args.norm_label: # pred_valid = pred[:int(valid_edge_attr.shape[0] / 2),0] # pred_valid = pred_valid * max(class_values) # label_valid = valid_labels # label_valid = label_valid * max(class_values) # else: # pred_valid = pred[:int(valid_edge_attr.shape[0] / 2),0] # label_valid = valid_labels # mse = F.mse_loss(pred_valid, label_valid) # valid_rmse = np.sqrt(mse.item()) # l1 = F.l1_loss(pred_valid, label_valid) # valid_l1 = l1.item() # if valid_l1 < best_valid_l1: # best_valid_l1 = valid_l1 # best_valid_l1_epoch = epoch # if args.save_model: # torch.save(model, log_path + 'model_best_valid_l1.pt') # torch.save(impute_model, log_path + 'impute_model_best_valid_l1.pt') # if valid_rmse < best_valid_rmse: # best_valid_rmse = valid_rmse # best_valid_rmse_epoch = epoch # if args.save_model: # torch.save(model, log_path + 'model_best_valid_rmse.pt') # torch.save(impute_model, log_path + 'impute_model_best_valid_rmse.pt') # Valid_rmse.append(valid_rmse) # Valid_l1.append(valid_l1) # # x_embd = model(x, test_input_edge_attr, test_input_edge_index) # pred = impute_model([x_embd[test_edge_index[0], :], x_embd[test_edge_index[1], :]]) # if hasattr(args,'ce_loss') and args.ce_loss: # pred_test = class_values[pred[:int(test_edge_attr.shape[0] / 2)].max(1)[1]] # label_test = class_values[test_labels] # elif hasattr(args,'norm_label') and args.norm_label: # pred_test = pred[:int(test_edge_attr.shape[0] / 2),0] # pred_test = pred_test * max(class_values) # label_test = test_labels # label_test = label_test * max(class_values) # else: # pred_test = pred[:int(test_edge_attr.shape[0] / 2),0] # label_test = test_labels # mse = F.mse_loss(pred_test, label_test) # test_rmse = np.sqrt(mse.item()) # l1 = F.l1_loss(pred_test, label_test) # test_l1 = l1.item() # if args.save_prediction: # if epoch == best_valid_rmse_epoch: # obj['outputs']['best_valid_rmse_pred_test'] = pred_test.detach().cpu().numpy() # if epoch == best_valid_l1_epoch: # obj['outputs']['best_valid_l1_pred_test'] = pred_test.detach().cpu().numpy() # # if args.mode == 'debug': # torch.save(model, log_path + 'model_{}.pt'.format(epoch)) # torch.save(impute_model, log_path + 'impute_model_{}.pt'.format(epoch)) # Train_loss.append(train_loss) # Test_rmse.append(test_rmse) # Test_l1.append(test_l1) # print('epoch: ', epoch) # print('loss: ', train_loss) # if args.valid > 0.: # print('valid rmse: ', valid_rmse) # print('valid l1: ', valid_l1) # print('test rmse: ', test_rmse) # print('test l1: ', test_l1) # # pred_train = pred_train.detach().cpu().numpy() # label_train = label_train.detach().cpu().numpy() # pred_test = pred_test.detach().cpu().numpy() # label_test = label_test.detach().cpu().numpy() # # obj['curves'] = dict() # obj['curves']['train_loss'] = Train_loss # if args.valid > 0.: # obj['curves']['valid_rmse'] = Valid_rmse # obj['curves']['valid_l1'] = Valid_l1 # obj['curves']['test_rmse'] = Test_rmse # obj['curves']['test_l1'] = Test_l1 # obj['lr'] = Lr # # obj['outputs']['final_pred_train'] = pred_train # obj['outputs']['label_train'] = label_train # obj['outputs']['final_pred_test'] = pred_test # obj['outputs']['label_test'] = label_test # pickle.dump(obj, open(log_path + 'result.pkl', "wb")) # # if args.save_model: # torch.save(model, log_path + 'model.pt') # torch.save(impute_model, log_path + 'impute_model.pt') # # # obj = objectview(obj) # plot_curve(obj['curves'], log_path+'curves.png',keys=None, # clip=True, label_min=True, label_end=True) # plot_curve(obj, log_path+'lr.png',keys=['lr'], # clip=False, label_min=False, label_end=False) # plot_sample(obj['outputs'], log_path+'outputs.png', # groups=[['final_pred_train','label_train'], # ['final_pred_test','label_test'] # ], # num_points=20) # if args.save_prediction and args.valid > 0.: # plot_sample(obj['outputs'], log_path+'outputs_best_valid.png', # groups=[['best_valid_rmse_pred_test','label_test'], # ['best_valid_l1_pred_test','label_test'] # ], # num_points=20) # if args.valid > 0.: # print("best valid rmse is {:.3g} at epoch {}".format(best_valid_rmse,best_valid_rmse_epoch)) # print("best valid l1 is {:.3g} at epoch {}".format(best_valid_l1,best_valid_l1_epoch)) # # # print('predict train is:') # print(pred_train) # print(len(pred_train)) # print('label train is:') # print(label_train) # print('predict test is:') # print(pred_test) # print('label test is:') # print(label_test) # with torch.no_grad(): # # torch.save(model, 'model_best_valid_rmse.pkl') # model.eval() # impute_model.eval() # torch.save(impute_model, 'impute_model_best_valid_rmse.pkl') print('train is over! now its final result:') known_index = data.train_edge_index.clone().detach().to(device) known_attr = data.train_edge_attr.clone().detach().to(device) x_embd = model(x, known_attr, known_index) print('length of x_embd is :') print(len(x_embd)) print(len(x_embd[edge_index[0]])) pred = impute_model([x_embd[edge_index[0]], x_embd[edge_index[1]]]) pred = pred[:int(edge_attr.shape[0] / 2)] print(len(pred)) pred = pred.cpu().detach().numpy().reshape(-1, 8) min_max_scaler = data.min_max_scaler # origin_labels = origin_labels.reshape(-1,1) pred_origin = min_max_scaler.inverse_transform(pred) print('predict train is :') print(pred_origin[0]) df_y = data.df_y df_class = data.df_class print(len(pred_origin)) print(len(df_y)) # pred_origin = np.append(pred_origin, df_y, axis=1) # axis=1表示对应行的数组进行拼接 pred_origin = np.column_stack((pred_origin, df_class, df_y)) # 将结果写入csv文件里 pd_data = pd.DataFrame(pred_origin, columns=[ "gender", "age", "bmi", "bloodGlucose", "proinsulin", "Cp120", "diabetesPredigreeFunction", "trainOutcome", "dataSource", "outcome" ]) pd_data.to_csv('pd_data_y_421.csv', index=False, float_format='%.04f') #将结果替代原数据中缺失的部分 df_X = data.df_X df_X = min_max_scaler.inverse_transform(df_X) print(df_X[0]) nrow, ncol = df_X.shape for i in range(nrow): for j in range(ncol): if np.isnan(df_X[i][j]): df_X[i][j] = pred_origin[i][j] # df = np.concatenate((df_X, df_y), axis=1) # axis=1表示对应行的数组进行拼接 df = np.column_stack((df_X, df_class, df_y)) pd_data_origin = pd.DataFrame(df, columns=[ "gender", "age", "bmi", "bloodGlucose", "proinsulin", "Cp120", "diabetesPredigreeFun", "trainOutcome", "dataSource", "outcome" ]) # pd_data_origin = pd.DataFrame(df, columns=["gender", "age", "bmi", "bloodGlucose", "add1", "add2", # "diabetesPredigreeFun", "trainOutcome", "dataSource", "outcome"]) pd_data_origin.to_csv("pd_data_origin_y_421.csv", index=False, float_format='%.04f')