Exemplo n.º 1
0
def train():
    start_epoch = 0
    # dataset
    train_dataset = DataSource(train_anno_path,
                               transform=Compose([
                                   RandomMirror(0.5),
                                   SubtractFloatMeans(MEANS),
                                   ToPercentCoords(),
                                   PermuteCHW()
                               ]),
                               ratio=8)

    # net
    net = PNet()

    # optimizer and scheduler
    ##优化器 和调整器(用来调整学习率)
    optimizer = optim.SGD(net.parameters(),
                          lr=base_lr,
                          momentum=momentum,
                          weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, stepsize, gamma)

    # device
    if USE_CUDA:
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True
    if pre_checkpoint:
        cp = torch.load(pre_checkpoint)
        net.load_state_dict(cp['weights'])
        log.info("=> load state dict from {}...".format(pre_checkpoint))
        if resume:
            optimizer.load_state_dict(cp['optimizer'])
            scheduler.load_state_dict(cp['scheduler'])
            start_epoch = cp['epoch']
            log.info("=> resume from epoch: {}, now the lr is: {}".format(
                start_epoch, optimizer.param_groups[0]['lr']))

    net.to(device)

    k = 0
    for epoch in range(start_epoch, max_iter + 1):
        net.train()
        #targets就是标签,里面装的是各个框的真实值
        images, targets = train_dataset.getbatch(train_batch)
        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        pred_cls, pred_bbox = net(images)

        #交叉熵
        loss_cls = AddClsLoss(pred_cls, targets, topk)
        #smooth_l1
        loss_reg = AddRegLoss(pred_bbox, targets)
        loss = 3 * loss_cls + loss_reg

        loss.backward()
        torch.nn.utils.clip_grad_norm_(net.parameters(), clip_grad)

        optimizer.step()
        scheduler.step()

        if k % display == 0:
            acc_cls = AddClsAccuracy(pred_cls, targets)
            acc_reg = AddBoxMap(pred_bbox, targets, INPUT_IMAGE_SIZE,
                                INPUT_IMAGE_SIZE)

            log.info(
                "train iter: {}, lr: {}, loss: {:.4f}, cls loss: {:.4f}, bbox loss: {:.4f}, cls acc: {:.4f}, bbox acc: {:.4f}"
                .format(k, optimizer.param_groups[0]['lr'], loss.item(),
                        loss_cls.item(), loss_reg.item(), acc_cls, acc_reg))

        if k % save_interval == 0:
            path = save_prefix + "_iter_{}.pkl".format(k)
            SaveCheckPoint(path, net, optimizer, scheduler, epoch)
            log.info("=> save model: {}".format(path))

        k += 1

    log.info("optimize done...")
    path = save_prefix + "_final.pkl"
    SaveCheckPoint(path, net, optimizer, scheduler, max_iter)
    log.info("=> save model: {} ...".format(path))
Exemplo n.º 2
0
def train(net):
    start_epoch = 0
    # dataset
    train_dataset = DataSource(
        train_anno_path,
        transform=Compose([
            # TODO: Add random color jitter
            RandomColorJit(),
            RandomMirror(0.5),
            SubtractFloatMeans(MEANS),
            ToPercentCoords(),
            PermuteCHW()
        ]),
        ratio=train_ratio,
        image_shape=(INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3))

    # net

    # optimizer and scheduler
    # Modified by Sherk, Adam optimizer is applied for faster convergence
    optimizer = optim.SGD(net.parameters(),
                          lr=base_lr,
                          momentum=momentum,
                          weight_decay=weight_decay)
    # optimizer = optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay)  # Adam takes no momentum
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, stepsize, gamma)

    # device
    if USE_CUDA:
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True
    if pre_checkpoint:
        cp = torch.load(pre_checkpoint)
        net.load_state_dict(cp['weights'])
        log.info("=> load state dict from {}...".format(pre_checkpoint))
        if resume:
            optimizer.load_state_dict(cp['optimizer'])
            scheduler.load_state_dict(cp['scheduler'])
            start_epoch = cp['epoch']
            log.info("=> resume from epoch: {}, now the lr is: {}".format(
                start_epoch, optimizer.param_groups[0]['lr']))

    net.to(device)

    k = 0
    for epoch in range(start_epoch, max_iter + 1):
        net.train()
        images, targets = train_dataset.getbatch(train_batch)
        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        pred_cls, pred_bbox = net(images)

        loss_cls = AddClsLoss(pred_cls, targets, topk)
        loss_reg = AddRegLoss(pred_bbox, targets)
        loss = loss_ratio[0] * loss_cls + loss_ratio[1] * loss_reg

        loss.backward()
        torch.nn.utils.clip_grad_norm_(net.parameters(), clip_grad)

        optimizer.step()
        scheduler.step()

        if k % display == 0:
            acc_cls = AddClsAccuracy(pred_cls, targets)
            acc_reg = AddBoxMap(pred_bbox, targets, INPUT_IMAGE_SIZE,
                                INPUT_IMAGE_SIZE)

            log.info(
                "train iter: {}, lr: {}, loss: {:.4f}, cls loss: {:.4f}, bbox loss: {:.4f}, cls acc: {:.4f}, bbox acc: {:.4f}"
                .format(k, optimizer.param_groups[0]['lr'], loss.item(),
                        loss_cls.item(), loss_reg.item(), acc_cls, acc_reg))

        # donot save the intermediate .pkls
        if k % save_interval == 0:
            path = save_prefix + "_iter_{}.pkl".format(k)
            SaveCheckPoint(path, net, optimizer, scheduler, epoch)
            log.info("=> save model: {}".format(path))

        k += 1

    log.info("optimize done...")
    path = save_prefix + "_final.pkl"
    SaveCheckPoint(path, net, optimizer, scheduler, max_iter)
    log.info("=> save model: {} ...".format(path))