示例#1
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          lr_init=None,
          lr_now=None,
          glob_step=None,
          lr_decay=None,
          gamma=None,
          max_norm=True):

    losses = utils.AverageMeter()
    model.train()

    for i, (inps, tars) in enumerate(tqdm(train_loader)):
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)

        #make prediction with model
        inputs = Variable(inps.cuda())
        targets = Variable(tars.cuda(non_blocking=True))
        outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        loss.backward()
        if max_norm:
            nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
        optimizer.step()

    return glob_step, lr_now, losses.avg
def train(
    train_loader,
    model,
    criterion,
    optimizer,
    lr_init=None,
    lr_now=None,
    glob_step=None,
    lr_decay=None,
    gamma=None,
    max_norm=True,
):
    losses = utils.AverageMeter()

    model.train()

    start = time.time()
    batch_time = 0
    bar = Bar(">>>", fill=">", max=len(train_loader))

    for i, (inps, tars) in enumerate(train_loader):
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)
        inputs = Variable(inps.cuda())
        targets = Variable(tars.cuda())

        outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        loss.backward()
        if max_norm:
            nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
        optimizer.step()

        # update summary
        if (i + 1) % 100 == 0:
            batch_time = time.time() - start
            start = time.time()

        bar.suffix = "({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.4f}".format(
            batch=i + 1,
            size=len(train_loader),
            batchtime=batch_time * 10.0,
            ttl=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
        )
        bar.next()

    bar.finish()
    return glob_step, lr_now, losses.avg
def train(train_loader, model, criterion, optimizer, joint_num,
          lr_init=None, lr_now=None, glob_step=None, lr_decay=None, gamma=None,
          max_norm=True):
    losses = utils.AverageMeter()

    model.train()
 
    start = time.time()
    batch_time = 0
    bar = Bar('>>>', fill='>', max=len(train_loader))
    
    for i, data in enumerate(train_loader):
        # Turn down Learning Rate
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay, gamma)
        
        joint2d, truth = data['joint2d'], data['truth']
        inputs=Variable(joint2d.cuda().type(torch.cuda.FloatTensor))
        targets=Variable(truth.cuda().type(torch.cuda.FloatTensor))
    
        outputs = model(inputs)
        outputs=torch.reshape(outputs,(-1,(joint_num)*3))
        targets=torch.reshape(targets,(-1,(joint_num)*3))

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)

        losses.update(loss.item(), inputs.size(0))
        loss.backward()
        
        if max_norm:
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        # update summary
        if (i + 1) % 100 == 0:
            batch_time = time.time() - start
            start = time.time()

        bar.suffix = '({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.4f}' \
            .format(batch=i + 1,
                    size=len(train_loader),
                    batchtime=batch_time * 10.0,
                    ttl=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg)
        bar.next()

    bar.finish()
    return glob_step, lr_now, losses.avg
def train(train_loader,
          model,
          criterion,
          optimizer,
          num_kpts=15,
          num_classes=200,
          lr_init=None,
          lr_now=None,
          glob_step=None,
          lr_decay=None,
          gamma=None,
          max_norm=True):
    losses = utils.AverageMeter()

    model.train()

    errs, accs = [], []
    start = time.time()
    batch_time = 0
    bar = Bar('>>>', fill='>', max=len(train_loader))

    for i, sample in enumerate(train_loader):
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)

        inputs = sample['X'].cuda()
        # NOTE: PyTorch issue with dim0=1.
        if inputs.shape[0] == 1:
            continue
        targets = sample['Y'].reshape(-1).cuda()

        outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        loss.backward()
        if max_norm:
            nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
        optimizer.step()

        # Set outputs to [0, 1].
        softmax = nn.Softmax()
        outputs = softmax(outputs)

        outputs = outputs.data.cpu().numpy()
        targets = one_hot(targets.data.cpu().numpy(), num_classes)

        errs.append(np.mean(np.abs(outputs - targets)))
        accs.append(
            metrics.accuracy_score(np.argmax(targets, axis=1),
                                   np.argmax(outputs, axis=1)))

        # update summary
        if (i + 1) % 100 == 0:
            batch_time = time.time() - start
            start = time.time()

        bar.suffix = '({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.6f}' \
            .format(batch=i + 1,
                    size=len(train_loader),
                    batchtime=batch_time * 10.0,
                    ttl=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg)
        bar.next()
    bar.finish()

    err = np.mean(np.array(errs, dtype=np.float32))
    acc = np.mean(np.array(accs, dtype=np.float32))
    print(">>> train error: {} <<<".format(err))
    print(">>> train accuracy: {} <<<".format(acc))
    return glob_step, lr_now, losses.avg, err, acc
示例#5
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          stat_2d,
          stat_3d,
          lr_init=None,
          lr_now=None,
          glob_step=None,
          lr_decay=None,
          gamma=None,
          max_norm=True):

    losses = utils.AverageMeter()

    model.train()

    # for i, (inps, tars) in enumerate(train_loader): # inps = (64, 32)
    pbar = tqdm(train_loader)
    for i, (inps, tars) in enumerate(pbar):  # inps = (64, 32)
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)

        ### Input unnormalization
        inputs_unnorm = data_process.unNormalizeData(
            inps.data.cpu().numpy(), stat_2d['mean'], stat_2d['std'],
            stat_2d['dim_use'])  # 64, 64
        dim_2d_use = stat_2d['dim_use']
        inputs_use = inputs_unnorm[:, dim_2d_use]  # (64, 32)
        ### Input distance normalization
        inputs_dist_norm, _ = data_process.input_norm(
            inputs_use)  # (64, 32) , array
        input_dist = torch.tensor(inputs_dist_norm, dtype=torch.float32)

        ### Targets unnormalization
        targets_unnorm = data_process.unNormalizeData(
            tars.data.cpu().numpy(), stat_3d['mean'], stat_3d['std'],
            stat_3d['dim_use'])  # (64, 96)
        dim_3d_use = np.array([
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24,
            25, 26, 36, 37, 38, 39, 40, 41, 45, 46, 47, 51, 52, 53, 54, 55, 56,
            57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83
        ])
        targets_use = targets_unnorm[:, dim_3d_use]  # (51, )

        ### Targets distance normalization
        targets_dist_norm, _ = data_process.output_norm(targets_use)
        targets_dist = torch.tensor(targets_dist_norm, dtype=torch.float32)

        inputs = Variable(input_dist.cuda())
        targets = Variable(targets_dist.cuda(async=True))

        outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        loss.backward()

        if max_norm:
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        # tqdm.set_postfix(loss='{:05.6f}'.format(losses.avg))
        pbar.set_postfix(tr_loss='{:05.6f}'.format(losses.avg))

    return glob_step, lr_now, losses.avg
示例#6
0
def train_model(model,
                dataloaders,
                criterion,
                optimizer,
                cmd,
                writer,
                is_inception=False,
                model_save_path="./"):
    print("-------------------sparse training-----------------------")
    num_epochs = opt.epoch
    log_dir = os.path.join(model_save_path, opt.expID)
    os.makedirs(log_dir, exist_ok=True)
    log_save_path = os.path.join(log_dir, "log.txt")
    since = time.time()
    best_weight = copy.deepcopy(model)
    val_acc_history, train_acc_history, val_loss_history, train_loss_history = [], [], [], []
    train_acc, val_acc, train_loss, val_loss, best_epoch, epoch_acc, epoch = 0, 0, float(
        "inf"), float("inf"), 0, 0, 0
    epoch_ls = []
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    # prune_idx,ignore_id,all_conv = parse_module_defs(model)
    # print(prune_idx)

    decay, decay_epoch = 0, []
    stop = False
    log_writer = open(log_save_path, "w")
    log_writer.write(cmd)
    log_writer.write("\n")
    lr = opt.LR

    train_log_name = log_save_path.replace("log.txt", "train_log.csv")
    train_log = open(train_log_name, "w", newline="")
    csv_writer = csv.writer(train_log)
    csv_writer.writerow(write_csv_title())

    os.makedirs("result", exist_ok=True)
    result = os.path.join("result",
                          "{}_result_{}.csv".format(opt.expFolder, computer))
    exist = os.path.exists(result)

    print(
        "----------------------------------------------------------------------------------------------------"
    )
    print(opt)
    print("Training backbone is: {}".format(opt.backbone))
    print("Warm up end at {}".format(warm_up_epoch))
    for k, v in config.bad_epochs.items():
        if v > 1:
            raise ValueError("Wrong stopping accuracy!")
    print(
        "----------------------------------------------------------------------------------------------------"
    )

    utils.draw_graph(epoch_ls, train_loss_history, val_loss_history,
                     train_acc_history, val_acc_history, log_dir)
    flops = utils.print_model_param_flops(model)
    print("FLOPs of current model is {}".format(flops))
    params = utils.print_model_param_nums(model)
    print("Parameters of current model is {}".format(params))
    inf_time = utils.get_inference_time(model,
                                        height=input_size,
                                        width=input_size)
    print("Inference time is {}".format(inf_time))
    print(
        "----------------------------------------------------------------------------------------------------"
    )

    for epoch in range(num_epochs):
        log_tmp = [opt.expID, epoch]

        if epoch < warm_up_epoch:
            optimizer, lr = warm_up_lr(optimizer, epoch)
        elif epoch == warm_up_epoch:
            lr = opt.LR
        elif epoch > num_epochs * 0.7 and epoch < num_epochs * 0.9:
            optimizer, lr = lr_decay(optimizer, lr)
        elif epoch > num_epochs * 0.9:
            optimizer, lr = lr_decay2(optimizer, lr)

        log_tmp.append(lr)
        log_tmp.append("")

        epoch_start_time = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 20)
        log_writer.write('Epoch {}/{}\n'.format(epoch, num_epochs - 1))
        log_writer.write('-' * 10 + "\n")

        writer.add_scalar("lr", lr, epoch)
        print("Current lr is {}".format(lr))

        for name, param in model.named_parameters():
            writer.add_histogram(name,
                                 param.clone().data.to("cpu").numpy(), epoch)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            cls_correct = [0] * class_nums
            cls_sum = [0] * class_nums
            cls_acc = [0] * class_nums

            print(phase)
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            batch_num = 0
            batch_start_time = time.time()
            for names, inputs, labels in dataloaders[phase]:

                inputs = inputs.to(device)
                labels = labels.to(device)

                # optimizer, lr = utils.adjust_lr(optimizer, epoch, opt.epoch)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    right = preds == labels
                    for r, l in zip(right, labels):
                        cls_sum[l] += 1
                        if r:
                            cls_correct[l] += 1

                    if phase == 'train':
                        if opt.mix_precision:
                            with amp.scale_loss(loss,
                                                optimizer) as scaled_loss:
                                scaled_loss.backward()
                        else:
                            loss.backward()

                        # sr_flag = True
                        # BNOptimizer.updateBN(sr_flag, model, s, prune_idx)
                        BNOptimizer.updateBN(model, opt.sparse_s)
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                if batch_num % 100 == 0:
                    print("batch num:", batch_num, "cost time:",
                          time.time() - batch_start_time)
                    batch_start_time = time.time()
                batch_num += 1

            for idx, (s, c) in enumerate(zip(cls_sum, cls_correct)):
                cls_acc[idx] = c / s

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(
                dataloaders[phase].dataset)

            bn_sum, bn_num = 0, 0
            for mod in model.modules():
                if isinstance(mod, nn.BatchNorm2d):
                    bn_num += mod.num_features
                    bn_sum += torch.sum(abs(mod.weight))
                    writer.add_histogram("bn_weight",
                                         mod.weight.data.cpu().numpy(), epoch)

            bn_ave = bn_sum / bn_num
            print("Current bn : {} --> {}".format(epoch, bn_ave))

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))
            log_writer.write('{} Loss: {:.4f} Acc: {:.4f}\n'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'val':
                log_tmp.insert(5, epoch_acc.tolist())
                log_tmp.insert(6, epoch_loss)

                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)
                val_loss = epoch_loss if epoch_loss < val_loss else val_loss

                writer.add_scalar("scalar/val_acc", epoch_acc, epoch)
                writer.add_scalar("Scalar/val_loss", epoch_loss, epoch)
                imgnames, pds = names[:3], [
                    label_dict[i] for i in preds[:record_num].tolist()
                ]
                for idx, (img_path, pd) in enumerate(zip(imgnames, pds)):
                    img = cv2.imread(img_path)
                    img = cv2.putText(img, pd, (20, 50),
                                      cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0),
                                      2)
                    #cv2.imwrite("tmp/{}_{}.jpg".format(epoch, idx), img)
                    tb_img = utils.image2tensorboard(img)
                    # images = torch.cat((images, torch.unsqueeze(tb_img, 0)), 0)
                    writer.add_image("pred_image_for_epoch{}".format(epoch),
                                     tb_img, epoch)

                if epoch % opt.save_interval == 0 and epoch != 0:
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            model_save_path, "{}_{}_{}cls_{}.pth".format(
                                opt.expID, opt.backbone, class_nums, epoch)))

                # writer.add_image("pred_image_for_epoch{}".format(epoch), images[1:, :, :, :])
                if epoch_acc > val_acc:
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            model_save_path, "{}_{}_{}cls_best.pth".format(
                                opt.expID, opt.backbone, class_nums)))
                    val_acc = epoch_acc
                    best_epoch = epoch
                    best_weight = copy.deepcopy(model)

            else:
                log_tmp.append(epoch_acc.tolist())
                log_tmp.append(epoch_loss)
                train_acc_history.append(epoch_acc)
                train_loss_history.append(epoch_loss)
                train_acc = epoch_acc if epoch_acc > train_acc else train_acc
                train_loss = epoch_loss if epoch_loss < train_loss else train_loss
                writer.add_scalar("scalar/train_acc", epoch_acc, epoch)
                writer.add_scalar("Scalar/train_loss", epoch_loss, epoch)
            log_tmp += log_of_each_class(cls_acc)

        epoch_ls.append(epoch)
        epoch_time_cost = time.time() - epoch_start_time
        print("epoch complete in {:.0f}m {:.0f}s".format(
            epoch_time_cost // 60, epoch_time_cost % 60))
        log_writer.write("epoch complete in {:.0f}m {:.0f}s\n".format(
            epoch_time_cost // 60, epoch_time_cost % 60))
        torch.save(opt, '{}/option.pth'.format(model_save_path))
        csv_writer.writerow(log_tmp)

    csv_writer.writerow([])
    csv_writer.writerow(csv_cls_num(dataloaders))
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:.4f}'.format(val_acc))

    log_writer.write('Training complete in {:.0f}m {:.0f}s\n'.format(
        time_elapsed // 60, time_elapsed % 60))
    log_writer.write('Best val Acc: {:.4f}\n'.format(val_acc))
    log_writer.close()

    with open(result, "a+") as f:
        if not exist:
            title_str = "id,backbone,params,flops,time,batch_size,optimizer,freeze_bn,freeze,sparse,sparse_decay," \
                        "epoch_num,LR,weightDecay,loadModel,location, ,folder_name,train_acc,train_loss,val_acc," \
                        "val_loss,training_time, best_epoch,total_epoch\n"
            title_str = write_decay_title(len(decay_epoch), title_str)
            f.write(title_str)
        info_str = "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}, ,{},{},{},{},{},{},{},{}\n".format(
            opt.expID, opt.backbone, params, flops, inf_time, opt.batch,
            opt.optMethod, opt.freeze_bn, opt.freeze, opt.sparse_s,
            opt.sparse_decay, opt.epoch, opt.LR,
            opt.weightDecay, opt.loadModel, computer,
            os.path.join(opt.expFolder, opt.expID), train_acc, train_loss,
            val_acc, val_loss, time_elapsed, best_epoch, epoch)
        info_str = write_decay_info(decay_epoch, info_str)
        f.write(info_str)
示例#7
0
def train_multiposenet(train_loader,
                       model,
                       criterion,
                       optimizer,
                       lr_init=None,
                       lr_now=None,
                       glob_step=None,
                       lr_decay=None,
                       gamma=None,
                       max_norm=True):

    model.train()
    l2_loss, cvae_loss, gsnn_loss, kl_loss = utils.AverageMeter(
    ), utils.AverageMeter(), utils.AverageMeter(), utils.AverageMeter()

    for i, (inps, tars, _) in enumerate(train_loader):

        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)

        # forward pass
        inputs = Variable(inps.cuda())
        targets = Variable(tars.cuda())
        out_cvae, out_gsnn, post_mu, post_logvar = model(inputs, targets)

        # backward pass
        optimizer.zero_grad()
        loss_l2, loss_cvae, loss_gsnn, loss_kl = loss_function(
            out_cvae, out_gsnn, targets, post_mu, post_logvar)
        loss_l2 = loss_l2 * option.weight_l2
        loss_cvae = loss_cvae * option.weight_l2
        loss_gsnn = loss_gsnn * option.weight_l2
        loss_kl = loss_kl * option.weight_kl

        l2_loss.update(loss_l2.item(), inputs.size(0))
        cvae_loss.update(loss_cvae.item(), inputs.size(0))
        gsnn_loss.update(loss_gsnn.item(), inputs.size(0))
        kl_loss.update(loss_kl.item(), inputs.size(0))

        loss = loss_kl + loss_l2
        loss.backward()

        if max_norm:
            nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
        optimizer.step()

        # update summary
        if (i % 100 == 0):

            print('({batch}/{size}) | loss l2: {loss_l2:.4f} | loss cvae: {loss_cvae:.4f} | loss gsnn: {loss_gsnn:.4f} | loss kl: {loss_kl:.4f}' \
                    .format(batch=i + 1,
                            size=len(train_loader),
                            loss_l2=l2_loss.avg,
                            loss_cvae=cvae_loss.avg,
                            loss_gsnn=gsnn_loss.avg,
                            loss_kl=kl_loss.avg))

            sys.stdout.flush()

    return glob_step, lr_now, l2_loss.avg
示例#8
0
def train(train_loader, model, criterion, optimizer,
          lr_init=None, lr_now=None, glob_step=None, lr_decay=None, gamma=None,
          max_norm=True):
    losses = utils.AverageMeter()

    model.train()

    start = time.time()
    batch_time = 0
    bar = Bar('>>>', fill='>', max=len(train_loader))

#
    train_loader = tqdm(train_loader, dynamic_ncols=True)
#

    for i, (inps, tars) in enumerate(train_loader):
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay, gamma)
        inputs = Variable(inps.cuda())
        targets = Variable(tars.cuda(async=True))

        outputs, outputs_inputs = model(inputs)
        # outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()

        # ###########
        # alpha = 0.0
        # loss1 = criterion(outputs[0], targets)
        # loss2 = criterion(outputs[1], targets)
        # loss = alpha*loss1 + (1.0-alpha)*loss2
        # ########

        loss = criterion(outputs, targets)
        loss_input  = criterion(outputs_inputs, inputs)
        loss = loss + loss_input

        losses.update(loss.item(), inputs.size(0))
        loss.backward()
        if max_norm:
            nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
        optimizer.step()

        # update summary
        if (i + 1) % 100 == 0:
            batch_time = time.time() - start
            start = time.time()

#        bar.suffix = '({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.4f}' \
#            .format(batch=i + 1,
#                    size=len(train_loader),
#                    batchtime=batch_time * 10.0,
#                    ttl=bar.elapsed_td,
#                    eta=bar.eta_td,
#                    loss=losses.avg)
#        bar.next()
#
        train_loader.set_description(
                    '({batch}/{size}) | batch: {batchtime:.4}ms | loss: {loss:.6f}'.format(
                        batch=i + 1,
                        size=len(train_loader),
                        batchtime=batch_time * 10.0,
                        loss=losses.avg)
                    )
    train_loader.close()
#

#    bar.finish()
    return glob_step, lr_now, losses.avg
def train(train_loader,
          model,
          criterion,
          optimizer,
          stat_2d,
          stat_3d,
          lr_init=None,
          lr_now=None,
          glob_step=None,
          lr_decay=None,
          gamma=None,
          max_norm=True):

    losses = utils.AverageMeter()

    model.train()

    # start = time.time()
    # batch_time = 0
    # bar = Bar('>>>', fill='>', max=len(train_loader))

    # for i, (inps, tars) in enumerate(train_loader): # inps = (64, 32)
    pbar = tqdm(train_loader)
    for i, (inps, tars) in enumerate(pbar):  # inps = (64, 32)
        glob_step += 1
        if glob_step % lr_decay == 0 or glob_step == 1:
            lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay,
                                    gamma)

        ### Input unnormalization
        inputs_unnorm = data_process.unNormalizeData(
            inps.data.cpu().numpy(), stat_2d['mean'], stat_2d['std'],
            stat_2d['dim_use'])  # 64, 64
        # unnorm size = 64, make zeros mtrx and just do unnorm, so (0 * stdMat) + meanMat => 64, 64 // junk values the other position except original 16 joints
        dim_2d_use = stat_2d['dim_use']
        # select useful 32 joint using dim_2d-use  => 64, 32
        inputs_use = inputs_unnorm[:, dim_2d_use]  # (64, 32)
        ### Input distance normalization
        inputs_dist_norm, _ = data_process.input_norm(
            inputs_use)  # (64, 32) , array
        input_dist = torch.tensor(inputs_dist_norm, dtype=torch.float32)

        ### Targets unnormalization
        targets_unnorm = data_process.unNormalizeData(
            tars.data.cpu().numpy(), stat_3d['mean'], stat_3d['std'],
            stat_3d['dim_use'])  # (64, 96)
        dim_3d_use = np.array([
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24,
            25, 26, 36, 37, 38, 39, 40, 41, 45, 46, 47, 51, 52, 53, 54, 55, 56,
            57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83
        ])
        targets_use = targets_unnorm[:, dim_3d_use]  # (51, )
        ### Targets distance normalization
        targets_dist_norm, _ = data_process.output_norm(targets_use)
        targets_dist = torch.tensor(targets_dist_norm, dtype=torch.float32)

        inputs = Variable(input_dist.cuda())
        targets = Variable(targets_dist.cuda(async=True))

        outputs = model(inputs)

        # calculate loss
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        loss.backward()

        if max_norm:
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        # tqdm.set_postfix(loss='{:05.6f}'.format(losses.avg))
        pbar.set_postfix(tr_loss='{:05.6f}'.format(losses.avg))

    #     # update summary
    #     if (i + 1) % 100 == 0:
    #         batch_time = time.time() - start
    #         start = time.time()
    #
    #     bar.suffix = '({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.4f}' \
    #         .format(batch=i + 1,
    #                 size=len(train_loader),
    #                 batchtime=batch_time * 10.0,
    #                 ttl=bar.elapsed_td,
    #                 eta=bar.eta_td,
    #                 loss=losses.avg)
    #     bar.next()
    #
    # bar.finish()

    return glob_step, lr_now, losses.avg
示例#10
0
def main(opt):
    err_best = 1000
    glob_step = 0
    lr_now = opt.lr
    lr_decay = opt.lr_decay
    lr_init = opt.lr
    lr_gamma = opt.lr_gamma
    start_epoch = 0

    file_path = os.path.join(opt.ckpt, 'opt.json')
    with open(file_path, 'w') as f:
        f.write(json.dumps(vars(opt), sort_keys=True, indent=4))

    # create model
    print(">>> creating model")
    model = LinearModel(opt.batch_size, opt.predict_14)
    # = refine_2d_model(opt.batch_size,opt.predict_14)
    model = model.cuda()
    model.apply(weight_init)

    #refine_2d_model = refine_2d_model.cuda()
    #refine_2d_model.apply(weight_init)
    print(">>> total params: {:.2f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0
    ))  #+ sum(p.numel() for p in refine_2d_model.parameters()) / 1000000.0))
    criterion = nn.MSELoss(size_average=True).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    #refine_2d_model_optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    # 加载checkpoint
    if opt.resume:
        print(">>> loading ckpt from '{}'".format(opt.load))
        ckpt = torch.load(opt.load)
        start_epoch = ckpt['epoch']
        err_best = ckpt['err']
        glob_step = ckpt['step']
        lr_now = ckpt['lr']
        model.load_state_dict(ckpt['state_dict'])
        #refine_2d_model.load_state_dict[ckpt['refine_state_dict']]
        optimizer.load_state_dict(ckpt['optimizer'])
        #refine_2d_model_optimizer.load_state_dict(ckpt['refine_optimizer'])
        print(">>> ckpt loaded (epoch: {} | err: {})".format(
            start_epoch, err_best))

    # 包含动作的 list
    actions = data_utils.define_actions(opt.action)
    num_actions = len(actions)
    print(">>> actions to use (total: {}):".format(num_actions))
    pprint(actions, indent=4)
    print(">>>")

    # data loading
    print(">>> loading data")

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(opt.cameras_path, SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, opt.data_dir, opt.camera_frame, rcams, opt.predict_14)

    # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections
    if opt.use_hg:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, opt.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, opt.data_dir, rcams)

    #gt_train_set_2d, gt_test_set_2d, gt_data_mean_2d, gt_data_std_2d, gt_dim_to_ignore_2d, gt_dim_to_use_2d = data_utils.create_2d_data( actions, opt.data_dir, rcams )

    print("done reading and normalizing data.")

    step_time, loss = 0, 0
    current_epoch = start_epoch
    log_every_n_batches = 100

    cudnn.benchmark = True
    best_error = 10000
    while current_epoch < opt.epochs:
        current_epoch = current_epoch + 1

        # === Load training batches for one epoch ===
        encoder_inputs, decoder_outputs = get_all_batches(opt,
                                                          train_set_2d,
                                                          train_set_3d,
                                                          training=True)

        nbatches = len(encoder_inputs)
        print("There are {0} train batches".format(nbatches))
        start_time = time.time()

        # === Loop through all the training batches ===
        current_step = 0
        for i in range(nbatches):

            if (i + 1) % log_every_n_batches == 0:
                # Print progress every log_every_n_batches batches
                print("Working on epoch {0}, batch {1} / {2}... \n".format(
                    current_epoch, i + 1, nbatches),
                      end="")

            model.train()

            if glob_step % lr_decay == 0 or glob_step == 1:
                lr_now = utils.lr_decay(optimizer, glob_step, lr_init,
                                        lr_decay, lr_gamma)
                #utils.lr_decay(refine_2d_model_optimizer, glob_step, lr_init, lr_decay, lr_gamma)

            enc_in = torch.from_numpy(encoder_inputs[i]).float()
            dec_out = torch.from_numpy(decoder_outputs[i]).float()

            inputs = Variable(enc_in.cuda())
            targets = Variable(dec_out.cuda())

            outputs = model(inputs)

            # calculate loss
            optimizer.zero_grad()

            step_loss = criterion(outputs, targets)
            step_loss.backward()

            if opt.max_norm:
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                #nn.utils.clip_grad_norm_(refine_2d_model.parameters(), max_norm=1)

            optimizer.step()

            loss += float(step_loss)

            current_step += 1
            glob_step += 1
            # === end looping through training batches ===

        loss = loss / nbatches

        print("=============================\n"
              "Global step:         %d\n"
              "Learning rate:       %.2e\n"
              "Train loss avg:      %.4f\n"
              "=============================" % (glob_step, lr_now, loss))
        # === End training for an epoch ===

        # clear useless chache
        torch.cuda.empty_cache()

        # === Testing after this epoch ===
        model.eval()
        if opt.evaluateActionWise:
            print("{0:=^12} {1:=^6}".format("Action",
                                            "mm"))  # line of 30 equal signs

            cum_err = 0
            record = ''
            for action in actions:

                print("{0:<12} ".format(action), end="")
                # Get 2d and 3d testing data for this action
                action_test_set_2d = get_action_subset(test_set_2d, action)
                action_test_set_3d = get_action_subset(test_set_3d, action)
                encoder_inputs, decoder_outputs = get_all_batches(
                    opt,
                    action_test_set_2d,
                    action_test_set_3d,
                    training=False)

                total_err, joint_err, step_time = evaluate_batches(
                    opt, criterion, model, data_mean_3d, data_std_3d,
                    dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d,
                    dim_to_use_2d, dim_to_ignore_2d, current_step,
                    encoder_inputs, decoder_outputs)
                cum_err = cum_err + total_err

                print("{0:>6.2f}".format(total_err))

                record = record + "{}   :   {}  (mm) \n".format(
                    action, total_err)
            avg_val = cum_err / float(len(actions))
            print("{0:<12} {1:>6.2f}".format("Average", avg_val))
            print("{0:=^19}".format(''))

            f = open("records.txt", 'a')
            f.write("epoch: {} , avg_error: {}  loss : {} \n".format(
                current_epoch, avg_val, loss))

            if best_error > avg_val:
                print("=============================")
                print("==== save best record   =====")
                print("=============================")
                best_error = avg_val
                # save ckpt
                file_path = os.path.join(opt.ckpt, 'ckpt_last.pth.tar')
                torch.save(
                    {
                        'epoch': current_epoch,
                        'lr': lr_now,
                        'step': glob_step,
                        'err': avg_val,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }, file_path)

                f.write("epoch: {} , avg_error: {} \n".format(
                    current_epoch, best_error))
                f.write(record)

            f.write("=======================================\n")
            f.close()

        else:

            n_joints = 17 if not (opt.predict_14) else 14

            encoder_inputs, decoder_outputs = get_all_batches(opt,
                                                              test_set_2d,
                                                              test_set_3d,
                                                              training=False)

            total_err, joint_err, step_time = evaluate_batches(
                opt, criterion, model, data_mean_3d, data_std_3d,
                dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d,
                dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs,
                decoder_outputs, current_epoch)

            print("=============================\n"
                  "Step-time (ms):      %.4f\n"
                  "Val loss avg:        %.4f\n"
                  "Val error avg (mm):  %.2f\n"
                  "=============================" %
                  (1000 * step_time, loss, total_err))

            for i in range(n_joints):
                # 6 spaces, right-aligned, 5 decimal places
                print("Error in joint {0:02d} (mm): {1:>5.2f}".format(
                    i + 1, joint_err[i]))

                if save_flag is True:
                    f.write("Error in joint {0:02d} (mm): {1:>5.2f} \n".format(
                        i + 1, joint_err[i]))
            print("=============================")

            save_flag = False
            f.close()

    print("done in {0:.2f} ms".format(1000 * (time.time() - start_time)))
    # Reset global time and loss
    step_time, loss = 0, 0