model = model.cuda() loss_func = nn.CrossEntropyLoss() dataset = torch.from_numpy(np.load("../output/data/dataset_train.npy")) targets = torch.from_numpy( np.int64(np.load("../output/data/target_train.npy"))) dataset_test = np.load(dataset_path) targets_test = np.int64(np.load(target_path)) if args.L2norm: log_test = setup_logger( 0, 'test_log_norm', os.path.join(args.log_dir, 'test_log_norm.txt')) log = setup_logger( 0, 'train_log_norm', os.path.join(args.log_dir, 'train_log_norm.txt')) optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=1) else: log_test = setup_logger(0, 'test_log', os.path.join(args.log_dir, 'test_log.txt')) log = setup_logger(0, 'train_log', os.path.join(args.log_dir, 'train_log.txt')) optimizer = Adam(model.parameters(), lr=args.lr) max_accuracy = 0.0 overfitting_cnt = 0 f_accuracy_train = open(os.path.join(args.log_dir, 'acc_train.txt'), 'w') f_accuracy_test = open(os.path.join(args.log_dir, 'acc_test.txt'), 'w') f_loss = open(os.path.join(args.log_dir, 'loss.txt'), 'w') # # code for batch training # torch_dataset = data.TensorDataset(data_tensor=dataset, target_tensor=targets)
def main(): global opt, best_mae_error #dataset = CIFData(*opt.dataroot) dataset = h5(*opt.dataroot) collate_fn = collate_pool train_loader, val_loader, test_loader = get_train_val_test_loader( dataset=dataset,collate_fn=collate_fn,batch_size=opt.batch_size, train_size=opt.train_size, num_workers=opt.workers, val_size=opt.val_size, test_size=opt.test_size,pin_memory=opt.cuda, return_test=True) # obtain target value normalizer sample_data_list = [dataset[i] for i in sample(range(len(dataset)), 1000)] input, sample_target,_ = collate_pool(sample_data_list) input_1=input[0] normalizer = Normalizer(sample_target) s = Normalizer(input_1) model=NET() if torch.cuda.is_available(): print('cuda is ok') model = model.cuda() criterion = nn.MSELoss() optimizer = optim.SGD(model.parameters(), opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) opt.start_epoch = checkpoint['epoch'] best_mae_error = checkpoint['best_mae_error'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) normalizer.load_state_dict(checkpoint['normalizer']) print("=> loaded checkpoint '{}' (epoch {})".format(opt.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.resume)) scheduler = MultiStepLR(optimizer, milestones=opt.lr_milestones, gamma=0.1) for epoch in range(opt.start_epoch,opt.epochs): train(train_loader, model, criterion, optimizer, epoch,normalizer,s) mae_error = validate(val_loader, model, criterion, normalizer,s) if mae_error != mae_error: print('Exit due to NaN') sys.exit(1) is_best = mae_error < best_mae_error best_mae_error = min(mae_error, best_mae_error) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_mae_error': best_mae_error, 'optimizer': optimizer.state_dict(), 'normalizer': normalizer.state_dict(), 'opt': vars(opt) }, is_best) # test bset model print('---------Evaluate Model on Test Set---------------') best_checkpoint = torch.load('model_best.pth.tar') model.load_state_dict(best_checkpoint['state_dict']) validate(test_loader, model, criterion, normalizer, s,test=True)
# pin_memory=True,#important # num_workers=0) # # test_loader = DataLoader( # dataset=test_set, # batch_size=batch_size_, # shuffle=shuffle_or_not, # num_workers=0) exp_name_ = "{}_lr_{}_epoch_{}_batch_size_{}".format( exp_name, args.lr, args.epochs, args.batch_size) #save_path_model = "{}/models/{}.pt".format(project_path,exp_name) #sw = SW(logdir="{}{}{}".format(project_path, "/runs/", exp_name_)) # log_dir="./logdir/" + exp_name) exp_name_ = "{}_num_trainable_params_{}".format( exp_name_, sum(p.numel() for p in model.parameters() if p.requires_grad)) print("model param count {}".format( sum(p.numel() for p in model.parameters()))) print("of which trainable {}".format( sum(p.numel() for p in model.parameters() if p.requires_grad))) json_weight_path_with_rep = "{}_rep_{}_lr_{}_b_size_{}.json".format( json_weight_file_path, str(repetition), args.lr, args.batch_size) json_bias_path_with_rep = "{}_rep_{}_lr_{}_b_size_{}.json".format( json_bias_file_path, str(repetition), args.lr, args.batch_size) #initialize the weights #model.init_weights(init_func=init_func, init_func_bn=init_func_bn) #model_optimizer = Adam(params=model.parameters(), lr=lr_) exps_dict = {} if shuffle_label: shuffle_range = np.arange(args.shuffle_ratio_low,
test_loader = torch.utils.data.DataLoader( MyDataset(test_inputs, test_labels, transform_test), #先转化成torch能识别的 batch_size=BATCH_SIZE, # dataset 再批处理 shuffle=False, num_workers=2, pin_memory=True) # 实例化 net = NET() # 定义损失函数和优化方式 loss_func = nn.CrossEntropyLoss() # 损失函数为交叉熵 内置了softmax层 optimizer = optim.SGD( net.parameters(), lr=LR, momentum=0.9, # net.parameters()可迭代的variable指定因优化哪些参数 weight_decay=5e-4 ) # 优化方式为mini-batch momentum-SGD,weight_decay并采用L2正则化(权值衰减) # 开始训练 if __name__ == '__main__': with open('model_params.txt', 'w') as f4: # 将模型参数写入model_params.txt文件 for parameters in net.parameters(): # 模块参数的迭代器 f4.write(str(parameters)) f4.write('\n') for name, parameters in net.named_parameters(): f4.write(name + ':' + str(parameters.size())) f4.write('\n') f4.flush()