net = PNet1() net.initialize(ctx=ctx) net.hybridize() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': base_lr, 'wd': weight_decay, 'momentum': momentum }) lossFun = LossFun() eval = Evaluate() train_dataset = DataSource(train_anno_path, transform=Compose([ RandomMirror(0.5), SubtractFloatMeans(MEANS), ToPercentCoords(), PermuteCHW() ]), ratio=6) save = './models/pnet1_' for k in range(start_epoch, max_iter + 1): # net.train() while lr_steps and k >= lr_steps[0]: new_lr = trainer.learning_rate * lr_decay lr_steps.pop(0) trainer.set_learning_rate(new_lr) images, targets = train_dataset.getbatch(train_batch) images = images.as_in_context(ctx)
def train(): start_epoch = 0 # dataset train_dataset = DataSource(train_anno_path, transform=Compose([ RandomMirror(0.5), SubtractFloatMeans(MEANS), ToPercentCoords(), PermuteCHW() ]), ratio=8) # net net = PNet() # optimizer and scheduler ##优化器 和调整器(用来调整学习率) optimizer = optim.SGD(net.parameters(), lr=base_lr, momentum=momentum, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, stepsize, gamma) # device if USE_CUDA: net = torch.nn.DataParallel(net) cudnn.benchmark = True if pre_checkpoint: cp = torch.load(pre_checkpoint) net.load_state_dict(cp['weights']) log.info("=> load state dict from {}...".format(pre_checkpoint)) if resume: optimizer.load_state_dict(cp['optimizer']) scheduler.load_state_dict(cp['scheduler']) start_epoch = cp['epoch'] log.info("=> resume from epoch: {}, now the lr is: {}".format( start_epoch, optimizer.param_groups[0]['lr'])) net.to(device) k = 0 for epoch in range(start_epoch, max_iter + 1): net.train() #targets就是标签,里面装的是各个框的真实值 images, targets = train_dataset.getbatch(train_batch) images = images.to(device) targets = targets.to(device) optimizer.zero_grad() pred_cls, pred_bbox = net(images) #交叉熵 loss_cls = AddClsLoss(pred_cls, targets, topk) #smooth_l1 loss_reg = AddRegLoss(pred_bbox, targets) loss = 3 * loss_cls + loss_reg loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), clip_grad) optimizer.step() scheduler.step() if k % display == 0: acc_cls = AddClsAccuracy(pred_cls, targets) acc_reg = AddBoxMap(pred_bbox, targets, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE) log.info( "train iter: {}, lr: {}, loss: {:.4f}, cls loss: {:.4f}, bbox loss: {:.4f}, cls acc: {:.4f}, bbox acc: {:.4f}" .format(k, optimizer.param_groups[0]['lr'], loss.item(), loss_cls.item(), loss_reg.item(), acc_cls, acc_reg)) if k % save_interval == 0: path = save_prefix + "_iter_{}.pkl".format(k) SaveCheckPoint(path, net, optimizer, scheduler, epoch) log.info("=> save model: {}".format(path)) k += 1 log.info("optimize done...") path = save_prefix + "_final.pkl" SaveCheckPoint(path, net, optimizer, scheduler, max_iter) log.info("=> save model: {} ...".format(path))
def train(net): start_epoch = 0 # dataset train_dataset = DataSource( train_anno_path, transform=Compose([ # TODO: Add random color jitter RandomColorJit(), RandomMirror(0.5), SubtractFloatMeans(MEANS), ToPercentCoords(), PermuteCHW() ]), ratio=train_ratio, image_shape=(INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3)) # net # optimizer and scheduler # Modified by Sherk, Adam optimizer is applied for faster convergence optimizer = optim.SGD(net.parameters(), lr=base_lr, momentum=momentum, weight_decay=weight_decay) # optimizer = optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay) # Adam takes no momentum scheduler = optim.lr_scheduler.MultiStepLR(optimizer, stepsize, gamma) # device if USE_CUDA: net = torch.nn.DataParallel(net) cudnn.benchmark = True if pre_checkpoint: cp = torch.load(pre_checkpoint) net.load_state_dict(cp['weights']) log.info("=> load state dict from {}...".format(pre_checkpoint)) if resume: optimizer.load_state_dict(cp['optimizer']) scheduler.load_state_dict(cp['scheduler']) start_epoch = cp['epoch'] log.info("=> resume from epoch: {}, now the lr is: {}".format( start_epoch, optimizer.param_groups[0]['lr'])) net.to(device) k = 0 for epoch in range(start_epoch, max_iter + 1): net.train() images, targets = train_dataset.getbatch(train_batch) images = images.to(device) targets = targets.to(device) optimizer.zero_grad() pred_cls, pred_bbox = net(images) loss_cls = AddClsLoss(pred_cls, targets, topk) loss_reg = AddRegLoss(pred_bbox, targets) loss = loss_ratio[0] * loss_cls + loss_ratio[1] * loss_reg loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), clip_grad) optimizer.step() scheduler.step() if k % display == 0: acc_cls = AddClsAccuracy(pred_cls, targets) acc_reg = AddBoxMap(pred_bbox, targets, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE) log.info( "train iter: {}, lr: {}, loss: {:.4f}, cls loss: {:.4f}, bbox loss: {:.4f}, cls acc: {:.4f}, bbox acc: {:.4f}" .format(k, optimizer.param_groups[0]['lr'], loss.item(), loss_cls.item(), loss_reg.item(), acc_cls, acc_reg)) # donot save the intermediate .pkls if k % save_interval == 0: path = save_prefix + "_iter_{}.pkl".format(k) SaveCheckPoint(path, net, optimizer, scheduler, epoch) log.info("=> save model: {}".format(path)) k += 1 log.info("optimize done...") path = save_prefix + "_final.pkl" SaveCheckPoint(path, net, optimizer, scheduler, max_iter) log.info("=> save model: {} ...".format(path))