def test_cudnn_rnn_speed(self): from time import time iters = 100 h0 = np.random.rand(1, 128, 256).astype(np.float32) input = np.random.rand(128, 128, 128).astype(np.float32) dev = torch.device('cuda:0') t_rnn = tnn.RNN(128, 256, nonlinearity='relu').to(dev) t_optim = torch.optim.SGD(t_rnn.parameters(), lr=1e-3, momentum=0.9) t_input = torch.from_numpy(input).to(dev) t_h0 = torch.from_numpy(h0).to(dev) start_time = time() for i in range(iters): t_optim.zero_grad() t_output, th = t_rnn(t_input, t_h0) t_loss = (t_output**2).sum() + (th**2).sum() t_loss.backward() t_optim.step() print('torch time = ', time() - start_time) j_rnn = nn.RNN(128, 256, nonlinearity='relu') j_rnn.load_state_dict(t_rnn.state_dict()) j_optim = nn.SGD(j_rnn.parameters(), lr=1e-3, momentum=0.9) j_input, j_h0 = jt.array(input), jt.array(h0) start_time = time() for i in range(iters): j_output, jh = j_rnn(j_input, j_h0) j_loss = (j_output**2).sum() + (jh**2).sum() j_optim.step(j_loss) jt.sync_all(True) print('jittor Cudnn time = ', time() - start_time) jt_cudnn, jt.cudnn = jt.cudnn, None j_rnn = nn.RNN(128, 256, nonlinearity='relu') j_rnn.load_state_dict(t_rnn.state_dict()) j_optim = nn.SGD(j_rnn.parameters(), lr=1e-3, momentum=0.9) start_time = time() for i in range(iters): j_output, jh = j_rnn(j_input, j_h0) j_loss = (j_output**2).sum() + (jh**2).sum() j_optim.step(j_loss) jt.sync_all(True) print('jittor native time = ', time() - start_time) jt.cudnn = jt_cudnn
def test_cudnn_gru_train(self): dev = torch.device('cuda:0') t_rnn = tnn.GRU(32, 64).to(dev) t_optim = torch.optim.SGD(t_rnn.parameters(), lr=1e-3, momentum=0.9) j_rnn = nn.GRU(32, 64) j_rnn.load_state_dict(t_rnn.state_dict()) j_optim = nn.SGD(j_rnn.parameters(), lr=1e-3, momentum=0.9) h0 = np.random.rand(1, 4, 64).astype(np.float32) input = np.random.rand(12, 4, 32).astype(np.float32) for _ in range(10): t_optim.zero_grad() t_output, th = t_rnn( torch.from_numpy(input).to(dev), torch.from_numpy(h0).to(dev)) t_loss = (t_output**2).sum() + (th**2).sum() t_loss.backward() t_optim.step() j_input, jh = jt.array(input), jt.array(h0) j_output, jh = j_rnn(j_input, jh) j_loss = (j_output**2).sum() + (jh**2).sum() j_optim.step(j_loss) np.testing.assert_allclose(t_loss.item(), j_loss.item(), rtol=1e-4) np.testing.assert_allclose(t_rnn.bias_hh_l0.detach().cpu().numpy(), j_rnn.bias_hh_l0.data, atol=1e-4, rtol=1e-4)
def test_optimizer(self): class Model2(Module): def __init__(self, input_size): self.linear1 = nn.Linear(input_size, 10) self.relu1 = nn.Relu() self.linear2 = nn.Linear(10, 1) def execute(self, x): x = self.linear1(x) x = self.relu1(x) return self.linear2(x) def get_data(n): for i in range(n): x = np.random.rand(50, 1) y = x * x yield jt.float32(x), jt.float32(y) num = 2000 model = Model2(1) model.mpi_param_broadcast() optimizer = nn.SGD(model.parameters(), 0.1) dataset = list(enumerate(get_data(num))) for i in range(mpi.world_rank(), num, n): id, (x, y) = dataset[i] pred_y = model(x) loss = (pred_y - y)**2 loss_mean = loss.mean() optimizer.step(loss_mean) assert loss_mean.data < 0.0025, loss_mean.data jt.clean()
def main(): jt.seed(settings.SEED) np.random.seed(settings.SEED) model = get_model() train_loader = TrainDataset(data_root=settings.DATA_ROOT, split='train', batch_size=settings.BATCH_SIZE, shuffle=True) val_loader = ValDataset(data_root=settings.DATA_ROOT, split='val', batch_size=1, shuffle=False) writer = SummaryWriter(settings.WRITER_PATH) learning_rate = settings.LEARNING_RATE momentum = settings.MOMENTUM weight_decay = settings.WEIGHT_DECAY model_backbone = [] model_backbone.append(model.get_backbone()) model_head = model.get_head() params_list = [] for module in model_backbone: params_list.append(dict(params=module.parameters(), lr=learning_rate)) for module in model_head: for m in module.modules(): print(type(m).__name__, type(m)) params_list.append( dict(params=module.parameters(), lr=learning_rate * 10)) optimizer = nn.SGD(params_list, learning_rate, momentum, weight_decay) epochs = settings.EPOCHS evaluator = Evaluator(settings.NCLASS) for epoch in range(epochs): #train(model, train_loader, optimizer, epoch, learning_rate, writer) val(model, val_loader, epoch, evaluator, writer)
def test_multilayer_bidirectional_cudnn_lstm_train(self): dev = torch.device('cuda:0') t_rnn = tnn.LSTM(32, 64, num_layers=4, bidirectional=True).to(dev) t_optim = torch.optim.SGD(t_rnn.parameters(), lr=1e-3, momentum=0.9) j_rnn = nn.LSTM(32, 64, num_layers=4, bidirectional=True) j_rnn.load_state_dict(t_rnn.state_dict()) j_optim = nn.SGD(j_rnn.parameters(), lr=1e-3, momentum=0.9) h0 = np.random.rand(8, 4, 64).astype(np.float32) c0 = np.random.rand(8, 4, 64).astype(np.float32) input = np.random.rand(12, 4, 32).astype(np.float32) for _ in range(10): t_optim.zero_grad() t_output, (th, tc) = t_rnn( torch.from_numpy(input).to(dev), (torch.from_numpy(h0).to(dev), torch.from_numpy(c0).to(dev))) t_loss = (t_output**2).sum() + (th**2).sum() + (tc**2).sum() t_loss.backward() t_optim.step() j_input, jh0, jc0 = jt.array(input), jt.array(h0), jt.array(c0) j_output, (jh, jc) = j_rnn(j_input, (jh0, jc0)) j_loss = (j_output**2).sum() + (jh**2).sum() + (jc**2).sum() j_optim.step(j_loss) np.testing.assert_allclose(t_loss.item(), j_loss.item(), rtol=1e-4) np.testing.assert_allclose(t_rnn.bias_hh_l0.detach().cpu().numpy(), j_rnn.bias_hh_l0.data, atol=1e-4, rtol=1e-4)
def test_param_groups(self): pa = jt.ones((1,)) pb = jt.ones((1,)) data = jt.ones((1,)) opt = nn.SGD([ {"params":[pa], "lr":0.1}, {"params":[pb]}, ], 1) opt.step(pa*data+pb*data) assert pa.data == 0.9 and pb.data == 0, (pa, pb)
def test_densenet(self): self.setup_seed(1) loss_list = [] acc_list = [] mnist_net = MnistNet() global prev prev = time.time() SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay) # SGD = jt.optim.Adam(mnist_net.parameters(), lr=0.0001) for batch_idx, (data, target) in enumerate(self.train_loader): output = mnist_net(data) loss = nn.cross_entropy_loss(output, target) SGD.step(loss) def callback(batch_idx, loss, output, target): # print train info global prev pred = np.argmax(output, axis=1) acc = np.mean(target == pred) loss_list.append(loss[0]) acc_list.append(acc) print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}' .format(0, batch_idx, 600, 1. * batch_idx / 6.0, loss[0], acc, time.time() - prev)) # prev = time.time() jt.fetch(batch_idx, loss, output, target, callback) # Train Epoch: 0 [0/600 (0%)] Loss: 2.402650 Acc: 0.060000 # Train Epoch: 0 [1/600 (0%)] Loss: 2.770145 Acc: 0.100000 # Train Epoch: 0 [2/600 (0%)] Loss: 3.528072 Acc: 0.100000 # Train Epoch: 0 [3/600 (0%)] Loss: 2.992042 Acc: 0.100000 # Train Epoch: 0 [4/600 (1%)] Loss: 4.672772 Acc: 0.060000 # Train Epoch: 0 [5/600 (1%)] Loss: 5.003410 Acc: 0.080000 # Train Epoch: 0 [6/600 (1%)] Loss: 5.417546 Acc: 0.100000 # Train Epoch: 0 [7/600 (1%)] Loss: 5.137665 Acc: 0.100000 # Train Epoch: 0 [8/600 (1%)] Loss: 5.241075 Acc: 0.070000 # Train Epoch: 0 [9/600 (2%)] Loss: 4.515363 Acc: 0.100000 # Train Epoch: 0 [10/600 (2%)] Loss: 3.357187 Acc: 0.170000 # Train Epoch: 0 [20/600 (3%)] Loss: 2.265879 Acc: 0.100000 # Train Epoch: 0 [30/600 (5%)] Loss: 2.107000 Acc: 0.250000 # Train Epoch: 0 [40/600 (7%)] Loss: 1.918214 Acc: 0.290000 # Train Epoch: 0 [50/600 (8%)] Loss: 1.645694 Acc: 0.400000 jt.sync_all(True) assert np.mean(loss_list[-50:]) < 0.3 assert np.mean(acc_list[-50:]) > 0.9
def test_vgg(self): self.setup_seed(1) loss_list = [] acc_list = [] mnist_net = MnistNet() SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay) for batch_idx, (data, target) in enumerate(self.train_loader): output = mnist_net(data) loss = nn.cross_entropy_loss(output, target) # train step with jt.log_capture_scope( log_silent=1, log_v=1, log_vprefix="op.cc=100,exe=10", ) as logs: SGD.step(loss) def callback(loss, output, target, batch_idx): # print train info pred = np.argmax(output, axis=1) acc = np.sum(target == pred) / self.batch_size loss_list.append(loss[0]) acc_list.append(acc) print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f}' .format(0, batch_idx, 100, 1. * batch_idx, loss[0], acc)) jt.fetch(batch_idx, loss, output, target, callback) log_conv = find_log_with_re( logs, "Jit op key (not )?found: ((mkl)|(cudnn))_conv.*") log_matmul = find_log_with_re( logs, "Jit op key (not )?found: ((mkl)|(cublas))_matmul.*") if batch_idx: assert len(log_conv) == 38 and len(log_matmul) == 12, ( len(log_conv), len(log_matmul)) mem_used = jt.flags.stat_allocator_total_alloc_byte \ -jt.flags.stat_allocator_total_free_byte assert mem_used < 11e9, mem_used assert jt.core.number_of_lived_vars() < 3500 if (np.mean(loss_list[-50:]) < 0.2): break assert np.mean(loss_list[-50:]) < 0.2
def main(): batch_size = 64 learning_rate = 0.1 momentum = 0.9 weight_decay = 1e-4 epochs = 5 train_loader = MNIST(train=True, transform=trans.Resize(28)).set_attrs( batch_size=batch_size, shuffle=True) val_loader = MNIST(train=True, transform=trans.Resize(28)).set_attrs(batch_size=1, shuffle=False) model = Model() optimizer = nn.SGD(model.parameters(), learning_rate, momentum, weight_decay) for epoch in range(epochs): train(model, train_loader, optimizer, epoch) test(model, val_loader, epoch)
def main(): model = DeepLab(output_stride=16, num_classes=21) train_loader = TrainDataset(data_root='/home/guomenghao/voc_aug/mydata/', split='train', batch_size=4, shuffle=True) val_loader = ValDataset(data_root='/home/guomenghao/voc_aug/mydata/', split='val', batch_size=1, shuffle=False) learning_rate = 0.005 momentum = 0.9 weight_decay = 1e-4 optimizer = nn.SGD(model.parameters(), learning_rate, momentum, weight_decay) writer = SummaryWriter(os.path.join('curve', 'train.events.wo_drop')) epochs = 50 evaluator = Evaluator(21) for epoch in range(epochs): train(model, train_loader, optimizer, epoch, learning_rate, writer) val(model, val_loader, epoch, evaluator, writer)
def test_resnet(self): self.setup_seed(1) loss_list=[] acc_list=[] mnist_net = MnistNet() global prev prev = time.time() SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay) iters = 10 for batch_idx, (data, target) in enumerate(self.train_loader): if (batch_idx > iters): break jt.display_memory_info() output = mnist_net(data) loss = nn.cross_entropy_loss(output, target) SGD.step(loss) def callback(batch_idx, loss, output, target): global prev pred = np.argmax(output, axis=1) acc = np.mean(target==pred) loss_list.append(loss[0]) acc_list.append(acc) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}' .format(0, batch_idx, iters,1. * batch_idx / 6.0, loss[0], acc, time.time()-prev)) jt.fetch(batch_idx, loss, output, target, callback) jt.sync_all(True) jt.display_max_memory_info() _, out = jt.get_max_memory_treemap() out_ = out.split('\n') assert(out_[0] == 'root()') assert(out_[3].endswith('(_run_module_as_main)')) assert(out_[7].endswith('(_run_code)')) _, out = jt.get_max_memory_treemap(build_by=1) out_ = out.split('\n') assert(out_[0] == 'root()') assert(out_[4].endswith('(_run_module_as_main)')) assert(out_[8].endswith('(_run_code)'))
def train(model): batch_size = 16 train_loader = ShapeNetPart(partition='trainval', num_points=2048, class_choice=None, batch_size=batch_size, shuffle=True) test_loader = ShapeNetPart(partition='test', num_points=2048, class_choice=None, batch_size=batch_size, shuffle=False) seg_num_all = 50 seg_start_index = 0 print(str(model)) base_lr = 0.01 optimizer = nn.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=1e-4) lr_scheduler = LRScheduler(optimizer, base_lr) # criterion = nn.cross_entropy_loss() # here best_test_iou = 0 for epoch in range(200): #################### # Train #################### lr_scheduler.step(len(train_loader) * batch_size) train_loss = 0.0 count = 0.0 model.train() train_true_cls = [] train_pred_cls = [] train_true_seg = [] train_pred_seg = [] train_label_seg = [] # debug = 0 for data, label, seg in train_loader: # with jt.profile_scope() as report: seg = seg - seg_start_index label_one_hot = np.zeros((label.shape[0], 16)) # print (label.size()) for idx in range(label.shape[0]): label_one_hot[idx, label.numpy()[idx, 0]] = 1 label_one_hot = jt.array(label_one_hot.astype(np.float32)) data = data.permute(0, 2, 1) # for pointnet it should not be committed batch_size = data.size()[0] # print ('input data shape') # print (data.shape, label_one_hot.shape) # for pointnet b c n for pointnet2 b n c seg_pred = model(data, label_one_hot) seg_pred = seg_pred.permute(0, 2, 1) # print (seg_pred.size()) # print (seg_pred.size(), seg.size()) loss = nn.cross_entropy_loss(seg_pred.view(-1, seg_num_all), seg.view(-1)) # print (loss.data) optimizer.step(loss) pred = jt.argmax(seg_pred, dim=2)[0] # (batch_size, num_points) # print ('pred size =', pred.size(), seg.size()) count += batch_size train_loss += loss.numpy() * batch_size seg_np = seg.numpy() # (batch_size, num_points) pred_np = pred.numpy() # (batch_size, num_points) # print (type(label)) label = label.numpy() # added train_true_cls.append( seg_np.reshape(-1)) # (batch_size * num_points) train_pred_cls.append( pred_np.reshape(-1)) # (batch_size * num_points) train_true_seg.append(seg_np) train_pred_seg.append(pred_np) temp_label = label.reshape(-1, 1) train_label_seg.append(temp_label) # print(report) train_true_cls = np.concatenate(train_true_cls) train_pred_cls = np.concatenate(train_pred_cls) # print (train_true_cls.shape ,train_pred_cls.shape) train_acc = metrics.accuracy_score(train_true_cls, train_pred_cls) avg_per_class_acc = metrics.balanced_accuracy_score( train_true_cls.data, train_pred_cls.data) # print ('train acc =',train_acc, 'avg_per_class_acc', avg_per_class_acc) train_true_seg = np.concatenate(train_true_seg, axis=0) # print (len(train_pred_seg), train_pred_seg[0].shape) train_pred_seg = np.concatenate(train_pred_seg, axis=0) # print (len(train_label_seg), train_label_seg[0].size()) # print (train_label_seg[0]) train_label_seg = np.concatenate(train_label_seg, axis=0) # print (train_pred_seg.shape, train_true_seg.shape, train_label_seg.shape) train_ious = calculate_shape_IoU(train_pred_seg, train_true_seg, train_label_seg, None) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f, train iou: %.6f' % ( epoch, train_loss * 1.0 / count, train_acc, avg_per_class_acc, np.mean(train_ious)) # io.cprint(outstr) print(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_true_cls = [] test_pred_cls = [] test_true_seg = [] test_pred_seg = [] test_label_seg = [] for data, label, seg in test_loader: seg = seg - seg_start_index label_one_hot = np.zeros((label.shape[0], 16)) for idx in range(label.shape[0]): label_one_hot[idx, label.numpy()[idx, 0]] = 1 label_one_hot = jt.array(label_one_hot.astype(np.float32)) data = data.permute(0, 2, 1) # for pointnet should not be commit batch_size = data.size()[0] seg_pred = model(data, label_one_hot) seg_pred = seg_pred.permute(0, 2, 1) loss = nn.cross_entropy_loss(seg_pred.view(-1, seg_num_all), seg.view(-1, 1).squeeze(-1)) pred = jt.argmax(seg_pred, dim=2)[0] count += batch_size test_loss += loss.numpy() * batch_size seg_np = seg.numpy() pred_np = pred.numpy() label = label.numpy() # added test_true_cls.append(seg_np.reshape(-1)) test_pred_cls.append(pred_np.reshape(-1)) test_true_seg.append(seg_np) test_pred_seg.append(pred_np) test_label_seg.append(label.reshape(-1, 1)) test_true_cls = np.concatenate(test_true_cls) test_pred_cls = np.concatenate(test_pred_cls) test_acc = metrics.accuracy_score(test_true_cls, test_pred_cls) avg_per_class_acc = metrics.balanced_accuracy_score( test_true_cls, test_pred_cls) test_true_seg = np.concatenate(test_true_seg, axis=0) test_pred_seg = np.concatenate(test_pred_seg, axis=0) test_label_seg = np.concatenate(test_label_seg) test_ious = calculate_shape_IoU(test_pred_seg, test_true_seg, test_label_seg, None) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f, test iou: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc, np.mean(test_ious)) print(outstr)
acc = total_acc / total_num return acc if __name__ == '__main__': freeze_random_seed() # net = PointConvDensityClsSsg(n_classes = 40) net = PointCNNcls() # net = PointNet_cls() # net = PointNet2_cls(n_classes=40) # net = DGCNN (n_classes = 40) base_lr = 2e-2 decay_rate = 1e-4 # optimizer = nn.Adam(net.parameters(), lr=base_lr) # weight_decay is not supported now optimizer = nn.SGD(net.parameters(), lr=base_lr, momentum=0.9) lr_scheduler = LRScheduler(optimizer, base_lr) batch_size = 32 n_points = 1024 train_dataloader = ModelNet40(n_points=n_points, batch_size=batch_size, train=True, shuffle=True) val_dataloader = ModelNet40(n_points=n_points, batch_size=batch_size, train=False, shuffle=False) step = 0
def test_resnet(self): self.setup_seed(1) loss_list = [] acc_list = [] mnist_net = MnistNet() global prev prev = time.time() SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay) self.train_loader.endless = True for data, target in self.train_loader: batch_id = self.train_loader.batch_id epoch_id = self.train_loader.epoch_id # train step # with jt.log_capture_scope( # log_silent=1, # log_v=1, log_vprefix="op.cc=100,exe=10", # ) as logs: output = mnist_net(data) loss = nn.cross_entropy_loss(output, target) SGD.step(loss) def callback(epoch_id, batch_id, loss, output, target): # print train info global prev pred = np.argmax(output, axis=1) acc = np.mean(target == pred) loss_list.append(loss[0]) acc_list.append(acc) print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}' .format(epoch_id, batch_id, 600, 1. * batch_id / 6.0, loss[0], acc, time.time() - prev)) # prev = time.time() jt.fetch(epoch_id, batch_id, loss, output, target, callback) # log_conv = find_log_with_re(logs, # "Jit op key (not )?found: ((mkl)|(cudnn))_conv.*") # log_matmul = find_log_with_re(logs, # "Jit op key (not )?found: ((mkl)|(cublas))_matmul.*") # if batch_id > 2: # assert len(log_conv)==59 and len(log_matmul)==6, (len(log_conv), len(log_matmul)) mem_used = jt.flags.stat_allocator_total_alloc_byte \ -jt.flags.stat_allocator_total_free_byte # assert mem_used < 4e9, mem_used # TODO: why bigger? assert mem_used < 5.6e9, mem_used # example log: # Train Epoch: 0 [0/100 (0%)] Loss: 2.352903 Acc: 0.110000 # Train Epoch: 0 [1/100 (1%)] Loss: 2.840830 Acc: 0.080000 # Train Epoch: 0 [2/100 (2%)] Loss: 3.473594 Acc: 0.100000 # Train Epoch: 0 [3/100 (3%)] Loss: 3.131615 Acc: 0.200000 # Train Epoch: 0 [4/100 (4%)] Loss: 2.524094 Acc: 0.230000 # Train Epoch: 0 [5/100 (5%)] Loss: 7.780025 Acc: 0.080000 # Train Epoch: 0 [6/100 (6%)] Loss: 3.890721 Acc: 0.160000 # Train Epoch: 0 [7/100 (7%)] Loss: 6.370137 Acc: 0.140000 # Train Epoch: 0 [8/100 (8%)] Loss: 11.390827 Acc: 0.150000 # Train Epoch: 0 [9/100 (9%)] Loss: 21.598564 Acc: 0.080000 # Train Epoch: 0 [10/100 (10%)] Loss: 23.369165 Acc: 0.130000 # Train Epoch: 0 [20/100 (20%)] Loss: 4.804510 Acc: 0.100000 # Train Epoch: 0 [30/100 (30%)] Loss: 3.393924 Acc: 0.110000 # Train Epoch: 0 [40/100 (40%)] Loss: 2.286762 Acc: 0.130000 # Train Epoch: 0 [50/100 (50%)] Loss: 2.055014 Acc: 0.290000 if jt.in_mpi: assert jt.core.number_of_lived_vars( ) < 8100, jt.core.number_of_lived_vars() else: assert jt.core.number_of_lived_vars( ) < 7000, jt.core.number_of_lived_vars() if self.train_loader.epoch_id >= 2: break jt.sync_all(True) assert np.mean(loss_list[-50:]) < 0.5 assert np.mean(acc_list[-50:]) > 0.8
std=[0.2023, 0.1994, 0.2010]) ]) trainloader = CIFAR10(train=True, shuffle=True, batch_size=64, transform=train_transform) testloader = CIFAR10(train=False, shuffle=False, batch_size=100, transform=test_transform) model = get_model(args.model) optimizer = nn.SGD(parameters=model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4) summary_writer = tensorboardX.SummaryWriter(logdir=args.logdir) decay_lr_at = [int(args.epoch_num * i) for i in [0.25, 0.5, 0.75]] max_acc = 0. for epoch in range(args.epoch_num): if epoch in decay_lr_at: optimizer.lr *= 0.1 train_loss, train_acc = train(epoch, model, trainloader, optimizer) test_loss, test_acc = test(epoch, model, testloader) summary_writer.add_scalar('Train Loss', train_loss, epoch) summary_writer.add_scalar('Train Acc', train_acc, epoch)
val_loader = PascalVOCDataset(data_folder, split='test', keep_difficult=keep_difficult, batch_size=batch_size, shuffle=False) model = SSD300(n_classes=n_classes) biases = list() not_biases = list() for param in model.parameters(): if param.requires_grad: if param.name().endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = nn.SGD([{'params': biases, 'lr': 2 * lr}, {'params': not_biases}], lr, momentum=momentum, weight_decay=weight_decay) model.load_parameters(pickle.load(open("init.pkl", "rb"))) criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy) # setseed(19961107) def main(): global exp_id for epoch in range(start_epoch, epochs): if epoch % 5 == 0: model.save(os.path.join("tensorboard", exp_id, 'model_last.pkl')) # evaluate(test_loader=val_loader, model=model) if epoch in decay_lr_at: optimizer.lr *= 0.1 train(train_loader=train_loader, model=model,
shuffle=False, data_argu=True) length = len(train_loader) // batch_size # 一个batch的iters epochs = iterations // (len(train_loader) // 32 ) # 原论文batch_size为32跑了120000个iters,由此计算出epoches decay_lr_at = [it // (len(train_loader) // 32) for it in decay_lr_at] # 并计算出需要降lr的epochs集合 val_loader = PascalVOCDataset(data_folder, split='test', keep_difficult=keep_difficult, batch_size=batch_size, shuffle=False) model = SSD300(n_classes=n_classes) optimizer = nn.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) model.load_parameters(pickle.load(open("init.pkl", "rb"))) criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy) setseed(19961107) def main(): for epoch in range(start_epoch, epochs): if epoch in decay_lr_at: optimizer.lr *= 0.1 train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer,