def train(epoch, LR): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = torch.load('model_v0.pkl') model = model.cuda() criterion = nn.MSELoss() # optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) optimizer = torch.optim.Adam(model.parameters(), lr=LR, betas=(0.9, 0.99)) test = train_data[-200:] test_img = np.array([each[0] for each in test]).reshape( (-1, WIDTH, HEIGHT, 1)) test_action = np.array([i[1] for i in test]) model.train() old_accuracy = test_accuracy(model, test_img, test_action) print('old accuracy is {}'.format(old_accuracy)) for epoch in range(EPOCH): t = 0 shuffle(train_data) train = train_data[:-300] train_img = np.array([each[0] for each in train]).reshape( (-1, WIDTH, HEIGHT, 1)) # [A, W, D], [left, forward, right, slow_yoll] train_action = np.array([[i[1]] for i in train]) test_img = np.array([each[0] for each in test]).reshape( (-1, WIDTH, HEIGHT, 1)) test_action = np.array([i[1] for i in test]) for data, target in zip(train_img, train_action): t += 1 if t % 500 == 0: print('epoch {} {}% has been down'.format( epoch, t / len(train) * 100)) data = Variable(torch.from_numpy(data.reshape(1, 1, 80, 60))) target = Variable(torch.from_numpy(target)).type(torch.FloatTensor) data = torch.tensor(data, dtype=torch.float32) target = target.to(device) data = data.to(device=device) optimizer.zero_grad() output = model(data) #target = target.reshape(1,4) #output = output.view(output.size(0), -1) #output.squeeze_() loss = criterion(output, target) loss.backward() optimizer.step() accuracy = test_accuracy(model, test_img, test_action) print('epoch {} accuracy is {}'.format(epoch, accuracy)) if accuracy >= old_accuracy: torch.save(model, 'model_v{}.pkl'.format(epoch)) old_accuracy = accuracy
def test_accuracy(model, test_img, test_action): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.cuda() accuracy = [] for data, target in zip(test_img, test_action): data = Variable(torch.from_numpy(data.reshape(1, 1, 80, 60))) data = torch.tensor(data, dtype=torch.float32).to(device=device) output = model(data) output = int(output.argmax(dim=1)) accuracy.append(1 if output == target.argmax() else 0) return (sum(accuracy) / len(accuracy))
def test_accuracy(test_img=train_img, test_action=train_action): # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = torch.load('model_v1.pkl') accuracy = [] for data, target in zip(test_img, test_action): data = Variable(torch.from_numpy(data.reshape(1, 1, 80, 60))) data = torch.tensor(data, dtype=torch.float32) output = model(data) output = output.argmax(dim=1) output = int(output.data[0]) accuracy.append(1 if output == target[0] else 0) print(sum(accuracy) / len(accuracy))
def train_model(taskid_path, args, net, train_dataloader, val_dataloader, density, dataset_length): log_path = os.path.join(taskid_path, "log") make_dir(log_path) writer = SummaryWriter(log_path) #torch.cuda.set_device(args.gpu_id) #net = net max_acc = 0 max_epoch = 0 judge = 0 for epoch in range(args.epochnum): paras = dict(net.named_parameters()) paras_new = [] for k, v in paras.items(): if 'mask' in k: if 'bias' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 2, 'weight_decay': args.weightdecay * 0 }] if 'mask_weight' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 0.05, 'weight_decay': args.weightdecay * 0 }] if '.weight' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1 }] if 'line' in k: if 'bias' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 2, 'weight_decay': args.weightdecay * 0 }] if 'weight' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1 }] if 'conv' in k: if 'bias' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1 }] if 'weight' in k: paras_new += [{ 'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1 }] optimizer = SGD(paras_new, lr=args.lr[epoch], momentum=args.momentum, weight_decay=args.weightdecay) # train net.train() train_loss = [] train_acc = [] print('Train: ' + "\n" + 'epoch:{}'.format(epoch + 1)) for index, (image, label) in enumerate(train_dataloader): batch_size = image.shape[0] image = Variable(image) #image = image #label = label out = net(image, label, torch.Tensor([epoch + 1]), density) if args.model == "resnet_18" or args.model == "resnet_50" or args.model == "densenet_121": out = torch.unsqueeze(out, 2) out = torch.unsqueeze(out, 3) label = Variable(label) if args.losstype == 'logistic': loss = logistic_F.apply(out, label) train_loss.append(loss.cpu().clone().data.numpy()) train_correct = label.mul(out) train_correct = torch.max(train_correct, torch.zeros(train_correct.size())) train_correct = torch.sum((train_correct > 0)) train_acc.append(train_correct.cpu().data.numpy()) if args.losstype == 'softmax': loss = softmax_F.apply(out, label) train_loss.append(loss.cpu().clone().data.numpy()) (tmp, out) = torch.sort(out, dim=1, descending=True) (tmp, label) = torch.max(label, dim=1) label = label.unsqueeze(2) error = ~(out == label) train_correct = args.batchsize - torch.sum(error[:, 0, 0, 0]) train_acc.append(train_correct.cpu().data.numpy()) optimizer.zero_grad() loss.backward() optimizer.step() print('batch:{}/{}'.format(index + 1, len(train_dataloader)) + " " + 'loss:{:.6f}'.format(loss / batch_size) + " " + 'acc:{:.6f}'.format(train_correct.cpu().data.numpy() / (batch_size * args.label_num))) length = dataset_length['train'] if index + 1 == len( train_dataloader) else args.batchsize * (index + 1) if (index + 1) % 10: writer.add_scalar('Train/Loss', sum(train_loss) / length, epoch) writer.add_scalar('Train/acc', sum(train_acc) / (length * args.label_num), epoch) # eval net.eval() with torch.no_grad(): eval_loss = [] eval_acc = [] for index, (image, label) in enumerate(val_dataloader): print('Val: ' + "\n" + 'epoch:{}'.format(epoch + 1)) batch_size = image.shape[0] image = Variable(image) image = image label = label out = net(image, label, torch.Tensor([epoch + 1]), density) if args.model == "resnet_18" or args.model == "resnet_50" or args.model == "densenet_121": out = torch.unsqueeze(out, 2) out = torch.unsqueeze(out, 3) label = Variable(label) if args.losstype == 'logistic': loss = logistic_F.apply(out, label) eval_loss.append(loss.cpu().data.numpy()) eval_correct = label.mul(out) eval_correct = torch.max(eval_correct, torch.zeros(eval_correct.size())) eval_correct = torch.sum((eval_correct > 0)) eval_acc.append(eval_correct.cpu().data.numpy()) if args.losstype == 'softmax': loss = softmax_F.apply(out, label) eval_loss.append(loss.cpu().data.numpy()) (tmp, out) = torch.sort(out, dim=1, descending=True) (tmp, label) = torch.max(label, dim=1) label = label.unsqueeze(2) error = ~(out == label) eval_correct = args.batchsize - torch.sum(error[:, 0, 0, 0]) eval_acc.append(eval_correct.cpu().data.numpy()) length = dataset_length['val'] if index + 1 == len( val_dataloader) else args.batchsize * (index + 1) print('batch:{}/{}'.format(index + 1, len(val_dataloader)) + " " + 'loss:{:.6f}'.format(loss / batch_size) + " " + 'acc:{:.6f}'.format(eval_correct.cpu().data.numpy() / (batch_size * args.label_num))) print("max_acc:" + str(max_acc)) if sum(eval_acc) / (length * args.label_num) > max_acc: judge = 1 max_acc = sum(eval_acc) / (length * args.label_num) print("rightnow max_acc:" + str(max_acc)) max_epoch = epoch writer.add_scalar('Eval/Loss', sum(eval_loss) / length, epoch) writer.add_scalar('Eval/acc', sum(eval_acc) / (length * args.label_num), epoch) if judge == 1 or (epoch + 1) % 50 == 0: # save torch.save(net, taskid_path + '/net-' + str(epoch + 1) + '.pkl') #torch.save(net.state_dict(), taskid_path + '/net-params-' + str(epoch + 1) + '.pkl') judge = 0 return max_acc, max_epoch + 1
def training_process(epoch): loss1, loss2, loss3, loss4, loss5 = [], [], [], [], [] acc1 = [] aG.train() aD.train() for batch_idx, (X_train_batch, Y_train_batch) in enumerate(trainloader): if (Y_train_batch.shape[0] < batch_size): continue # train G if ((batch_idx % gen_train) == 0): for p in aD.parameters(): p.requires_grad_(False) aG.zero_grad() label = np.random.randint(0, n_classes, batch_size) noise = np.random.normal(0, 1, (batch_size, n_z)) label_onehot = np.zeros((batch_size, n_classes)) label_onehot[np.arange(batch_size), label] = 1 noise[np.arange(batch_size), :n_classes] = label_onehot[np.arange( batch_size)] noise = noise.astype(np.float32) noise = torch.from_numpy(noise) noise = Variable(noise).cuda() fake_label = Variable(torch.from_numpy(label)).cuda() fake_data = aG(noise) gen_source, gen_class = aD(fake_data) gen_source = gen_source.mean() gen_class = criterion(gen_class, fake_label) gen_cost = -gen_source + gen_class gen_cost.backward() grad_update_thres(optimizer_g) optimizer_g.step() # train D if ((batch_idx % gen_train) == 0): for p in aD.parameters(): p.requires_grad_(True) aD.zero_grad() # train discriminator with input from generator label = np.random.randint(0, n_classes, batch_size) noise = np.random.normal(0, 1, (batch_size, n_z)) label_onehot = np.zeros((batch_size, n_classes)) label_onehot[np.arange(batch_size), label] = 1 noise[np.arange(batch_size), :n_classes] = label_onehot[np.arange( batch_size)] noise = noise.astype(np.float32) noise = torch.from_numpy(noise) noise = Variable(noise).cuda() fake_label = Variable(torch.from_numpy(label)).cuda() with torch.no_grad(): fake_data = aG(noise) disc_fake_source, disc_fake_class = aD(fake_data) disc_fake_source = disc_fake_source.mean() disc_fake_class = criterion(disc_fake_class, fake_label) # train discriminator with input from the discriminator real_data = Variable(X_train_batch).cuda() real_label = Variable(Y_train_batch).cuda() disc_real_source, disc_real_class = aD(real_data) prediction = disc_real_class.data.max(1)[1] accuracy = (float(prediction.eq(real_label.data).sum()) / float(batch_size)) * 100.0 disc_real_source = disc_real_source.mean() disc_real_class = criterion(disc_real_class, real_label) gradient_penalty = calc_gradient_penalty(aD, real_data, fake_data) disc_cost = disc_fake_source - disc_real_source + disc_real_class + disc_fake_class + gradient_penalty disc_cost.backward() grad_update_thres(optimizer_d) optimizer_d.step() # within the training loop loss1.append(gradient_penalty.item()) loss2.append(disc_fake_source.item()) loss3.append(disc_real_source.item()) loss4.append(disc_real_class.item()) loss5.append(disc_fake_class.item()) acc1.append(accuracy) if ((batch_idx % 50) == 0): print('\nEpoch:', epoch + 1, 'batch index:', batch_idx, 'loss 1-5:', "%.2f" % np.mean(loss1), "%.2f" % np.mean(loss2), "%.2f" % np.mean(loss3), "%.2f" % np.mean(loss4), "%.2f" % np.mean(loss5), ', train accuracy:', "%.2f" % np.mean(acc1))
#if args.with_gen==0: #print("Disc model with generator") #model_disc = torch.load('discriminator.model') model_disc.cuda() model_disc.eval() print('Load and eval discriminator model ...') model_disc = torch.nn.DataParallel(model_disc) cudnn.benchmark = True # load in a model and a batch of images batch_idx, (X_batch, Y_batch) = next(testloader) X_batch = Variable(X_batch, requires_grad=True).cuda() Y_batch_alternate = (Y_batch + 1) % 10 Y_batch_alternate = Variable(Y_batch_alternate).cuda() Y_batch = Variable(Y_batch).cuda() # calculate the mean image and make 10 copies (number of classes). X = X_batch.mean(dim=0) X = X.repeat(10, 1, 1, 1) # Make a unique label for each copy. Y = torch.arange(10).type(torch.int64) Y = Variable(Y).cuda() lr = 0.1 weight_decay = 0.001
avg_D_real_m_loss = 0 avg_D_real_m2_loss = 0 avg_D_fake_loss = 0 avg_G_fake_loss = 0 avg_percept_loss = 0 for i ,(img,att,seg,cat,nnseg) in enumerate(train_loader): bs = img.size(0) rnd_batch_num = np.random.randint(len(train_data),size=bs) rnd_att_list = [train_data[i][1] for i in rnd_batch_num] rnd_att_np = np.asarray(rnd_att_list) rnd_att = torch.from_numpy(rnd_att_np).float() #convert images to tensors and send to gpu seg = seg.type(torch.FloatTensor) nnseg = nnseg.type(torch.FloatTensor) img = Variable(img.cuda()) att = Variable(att.cuda()) rnd_att = Variable(rnd_att.cuda()) seg = Variable(seg.cuda()) nnseg = Variable(nnseg.cuda()) cat = Variable(cat.cuda()) Z = init_z_foreach_layout(cat, bs) img_norm = img * 2 - 1 img_G = img_norm requires_grad(G, False) requires_grad(D, True) D.zero_grad() #calculate loss for real image with segmask and attributes
#loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() if (batch_idx+1)%200 ==0: print('Running epoch: [{}/{}], step[{}/{}], Loss: {:.4f}'.format(epoch+1, 100, batch_idx+1, total_step, loss.item())) print('Finished Training') # testing --------------------------------------------------- correct = 0 total = 0 with torch.no_grad(): for i, (data, target) in enumerate(testloader): images = Variable(data).cuda() labels = Variable(target).cuda() _, outputs = model_disc(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) torch.save(model_disc,'./hw6_cifar10_disc.model')
if torch.cuda.is_available(): print("模型存入gpu成功!!!!") logistic_model.cuda() #如果gpu可用,则存入gpu criterion = nn.BCELoss() #损失函数 optimizer = torch.optim.SGD( logistic_model.parameters(), lr=1e-3, momentum=0.9) #梯度下降优化,第一个参数是模型的参数(logistic_model.parameters()) x_data = [[i[0], i[1]] for i in data] print(x_data) y_data = [i[2] for i in data] #print(y_data) if torch.cuda.is_available(): print('GPU计算!!!!!!!!!!!!') x = Variable(x_data).cuda() y = Variable(y_data).duda() else: print('普通计算!!!!!!!!!!!!') x = Variable(x_data) y = Variable(y_data) for epoch in range(5000): #forward out = logistic_model(x) loss = criterion(out, y) print_loss = loss.data[0] mask = out.ge(0.5).float() #如果大于0.5就是1,小于0.5就是0 correct = (abs(mask - y) < 0.001).sum() acc = correct.data[0] / x.size(0) #backward optimizer.zero_grad()