示例#1
0
 def dist(self, X, Y, *args, **kwargs):
     #X, Y batches [N, C, H, W]
     N = X.shape[0]
     d1 = torch.clamp_min(1 - (self.ssim_d(X, Y)).view(N, -1).mean(dim=1), 0.0)
     return d1
示例#2
0
def train(gpu, ngpus_per_node, args):
    print("Using GPU %d for training" % gpu)
    args.gpu = gpu

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=ngpus_per_node,
                                rank=args.gpu)

    model = EppFlowNet(args=args)
    if args.distributed:
        torch.cuda.set_device(args.gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        model = nn.SyncBatchNorm.convert_sync_batchnorm(module=model)
        model = model.to(f'cuda:{args.gpu}')
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.gpu],
            find_unused_parameters=True,
            output_device=args.gpu)
    else:
        model = torch.nn.DataParallel(model)
        model.cuda()

    logroot = os.path.join(args.logroot, args.name)
    print("Parameter Count: %d, saving location: %s" %
          (count_parameters(model), logroot))

    if args.restore_ckpt is not None:
        print("=> loading checkpoint '{}'".format(args.restore_ckpt))
        loc = 'cuda:{}'.format(args.gpu)
        checkpoint = torch.load(args.restore_ckpt, map_location=loc)
        model.load_state_dict(checkpoint, strict=False)

    with open(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
                'eppflownet/pose_bin{}.pickle'.format(
                    str(int(32 / args.num_angs)))), 'rb') as f:
        linlogdedge = pickle.load(f)
    minidx = np.argmin(np.abs(linlogdedge))
    print("Min index is :%d, val: %f" % (minidx, linlogdedge[minidx]))

    model.train()

    train_entries, evaluation_entries, seqmap = read_splits(ngpus_per_node)

    interval = np.floor(len(evaluation_entries) / ngpus_per_node).astype(
        np.int).item()
    if args.gpu == ngpus_per_node - 1:
        stidx = int(interval * args.gpu)
        edidx = len(evaluation_entries)
    else:
        stidx = int(interval * args.gpu)
        edidx = int(interval * (args.gpu + 1))

    print("GPU %d, eval fromm %d to %d, in total %d" %
          (gpu, stidx, edidx, edidx - stidx))

    train_dataset = KITTI_eigen(root=args.dataset_root,
                                inheight=args.inheight,
                                inwidth=args.inwidth,
                                entries=train_entries,
                                maxinsnum=args.maxinsnum,
                                linlogdedge=linlogdedge,
                                num_samples=args.num_angs,
                                depthvls_root=args.depthvlsgt_root,
                                prediction_root=args.prediction_root,
                                ins_root=args.ins_root,
                                mdPred_root=args.mdPred_root,
                                RANSACPose_root=args.RANSACPose_root,
                                istrain=True,
                                muteaug=False,
                                banremovedup=True,
                                isgarg=False)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset) if args.distributed else None
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   pin_memory=True,
                                   num_workers=int(args.num_workers /
                                                   ngpus_per_node),
                                   drop_last=True,
                                   sampler=train_sampler)

    eval_dataset = KITTI_odom(root=args.dataset_root,
                              inheight=args.evalheight,
                              inwidth=args.evalwidth,
                              entries=evaluation_entries[stidx:edidx],
                              maxinsnum=args.maxinsnum,
                              linlogdedge=linlogdedge,
                              num_samples=args.num_angs,
                              depthvls_root=args.depthvlsgt_root,
                              prediction_root=args.prediction_root,
                              ins_root=args.ins_root,
                              mdPred_root=args.mdPred_root,
                              RANSACPose_root=args.RANSACPose_root,
                              istrain=False,
                              isgarg=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=2,
                                  pin_memory=True,
                                  num_workers=3,
                                  drop_last=False)

    print(
        "Training splits contain %d images while test splits contain %d images"
        % (train_dataset.__len__(), eval_dataset.__len__()))

    if args.distributed:
        group = dist.new_group([i for i in range(ngpus_per_node)])

    optimizer, scheduler = fetch_optimizer(args, model,
                                           int(train_dataset.__len__() / 2))

    total_steps = 0

    if args.gpu == 0:
        logger = Logger(logroot)
        logger_evaluation = Logger(
            os.path.join(args.logroot, 'evaluation_eigen_background',
                         args.name))
        logger_evaluation_org = Logger(
            os.path.join(args.logroot, 'evaluation_eigen_background',
                         "{}_org".format(args.name)))
        logger.create_summarywriter()
        logger_evaluation.create_summarywriter()
        logger_evaluation_org.create_summarywriter()

    VAL_FREQ = 5000
    epoch = 0
    minabsl = 1e10

    ssim = SSIM()

    st = time.time()
    should_keep_training = True
    while should_keep_training:
        train_sampler.set_epoch(epoch)
        for i_batch, data_blob in enumerate(train_loader):
            optimizer.zero_grad()

            image1 = data_blob['img1'].cuda(gpu) / 255.0
            image2 = data_blob['img2'].cuda(gpu) / 255.0
            intrinsic = data_blob['intrinsic'].cuda(gpu)
            insmap = data_blob['insmap'].cuda(gpu)
            posepred = data_blob['posepred'].cuda(gpu)
            mD_pred = data_blob['mdDepth_pred'].cuda(gpu)
            ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu)
            scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu)
            mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu)
            rel_pose = data_blob['rel_pose'].cuda(gpu)

            posepred = posepred[:, :, 0]
            ang_decps_pad = ang_decps_pad[:, :, 0]
            scl_decps_pad = scl_decps_pad[:, :, 0]
            mvd_decps_pad = mvd_decps_pad[:, :, 0]

            # IMUlocations1 = data_blob['IMUlocations1'].cuda(gpu)
            # leftarrs1 = data_blob['leftarrs1'].cuda(gpu)
            # rightarrs1 = data_blob['rightarrs1'].cuda(gpu)
            # IMUlocations2 = data_blob['IMUlocations2'].cuda(gpu)
            # leftarrs2 = data_blob['leftarrs2'].cuda(gpu)
            # rightarrs2 = data_blob['rightarrs2'].cuda(gpu)

            gpsscale = torch.sqrt(torch.sum(rel_pose[:, 0:3, 3]**2, dim=1))

            mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)

            # tensor2disp(1/mD_pred_clipped, vmax=0.15, viewind=0).show()
            outputs = model(image1, image2, mD_pred_clipped, intrinsic,
                            posepred, ang_decps_pad, scl_decps_pad,
                            mvd_decps_pad, insmap)
            rpjloss_cale, rpjloss_fin = get_reprojection_loss(
                image1, outputs, ssim, args)
            scaleloss = get_scale_loss(gpsscale=gpsscale,
                                       outputs=outputs,
                                       num_angs=args.num_angs)
            seqloss = 0

            if args.enable_seqloss:
                loss = (rpjloss_cale + rpjloss_fin) / 2 + seqloss
            elif args.enable_scalelossonly:
                loss = (rpjloss_cale + rpjloss_fin) / 2 * 0 + scaleloss
            else:
                loss = (rpjloss_cale + rpjloss_fin) / 2 * 0.1 + scaleloss

            metrics = dict()
            metrics['rpjloss_cale'] = rpjloss_cale.item()
            metrics['rpjloss_fin'] = rpjloss_fin.item()
            metrics['scaleloss'] = scaleloss
            metrics['loss'] = loss

            if torch.sum(torch.isnan(loss)) > 0:
                print(data_blob['tag'])

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)

            optimizer.step()
            scheduler.step()

            # if args.gpu == 0:
            #     print(i_batch, loss.item(), scaleloss, torch.mean(image1))

            if args.gpu == 0:
                logger.write_dict(metrics, step=total_steps)
                if total_steps % SUM_FREQ == 0:
                    dr = time.time() - st
                    resths = (args.num_steps -
                              total_steps) * dr / (total_steps + 1) / 60 / 60
                    print("Step: %d, rest hour: %f, depthloss: %f" %
                          (total_steps, resths, loss.item()))
                    logger.write_vls(data_blob, outputs, total_steps)

            if total_steps % VAL_FREQ == 1:
                results = validate_kitti(model.module, args, eval_loader,
                                         group, seqmap)

                if args.gpu == 0:
                    logger_evaluation.write_dict(results, total_steps)
                    if minabsl > results['absl']:
                        minabsl = results['absl']
                        PATH = os.path.join(logroot, 'minabsl.pth')
                        torch.save(model.state_dict(), PATH)
                        print("model saved to %s" % PATH)

                # if args.gpu == 0:
                #     results = validate_kitti(model.module, args, eval_loader, None, group, total_steps, isorg=True)
                #     logger_evaluation_org.write_dict(results, total_steps)
                # else:
                #     validate_kitti(model.module, args, eval_loader, None, group, None, isorg=True)

                model.train()

            total_steps += 1

            if total_steps > args.num_steps:
                should_keep_training = False
                break

        if args.gpu == 0:
            PATH = os.path.join(logroot,
                                'epoch_{}.pth'.format(str(epoch).zfill(3)))
            torch.save(model.state_dict(), PATH)
            print("model saved to %s" % PATH)
        epoch = epoch + 1

    if args.gpu == 0:
        logger.close()
        PATH = os.path.join(logroot, 'final.pth')
        torch.save(model.state_dict(), PATH)

    return
示例#3
0
def validate_kitti(model,
                   args,
                   eval_loader,
                   logger,
                   group,
                   total_steps,
                   isdeepv2d=False):
    """ Peform validation using the KITTI-2015 (train) split """
    """ Peform validation using the KITTI-2015 (train) split """
    model.eval()
    gpu = args.gpu
    eval_measures_depth = torch.zeros(10).cuda(device=gpu)
    err_rec = list()
    err_rec_deepv2d = list()
    err_rec_md = list()
    mv_rec = list()
    for val_id, data_blob in enumerate(tqdm(eval_loader)):
        image1 = data_blob['img1'].cuda(gpu) / 255.0
        image2 = data_blob['img2'].cuda(gpu) / 255.0
        intrinsic = data_blob['intrinsic'].cuda(gpu)
        insmap = data_blob['insmap'].cuda(gpu)
        posepred = data_blob['posepred'].cuda(gpu)
        depthgt = data_blob['depthmap'].cuda(gpu)

        rel_pose = data_blob['rel_pose'][0].cpu().numpy()
        gps_scale = np.sqrt(np.sum(rel_pose[0:3, 3]**2))

        if not args.initbymD:
            mD_pred = data_blob['depthpred'].cuda(gpu)
        else:
            mD_pred = data_blob['mdDepth_pred'].cuda(gpu)

        mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)

        if not isdeepv2d:
            outputs = model(image1, image2, mD_pred_clipped, intrinsic,
                            posepred, insmap)
            predread = outputs[('depth', 2)]
        else:
            depthpred_deepv2d = data_blob['depthpred_deepv2d'].cuda(gpu)
            predread = depthpred_deepv2d
            # predread = data_blob['mdDepth_pred'].cuda(gpu)

        selector = ((depthgt > 0) * (predread > 0) *
                    (depthgt > args.min_depth_eval) *
                    (depthgt < args.max_depth_eval)).float()
        predread = torch.clamp(predread,
                               min=args.min_depth_eval,
                               max=args.max_depth_eval)
        depth_gt_flatten = depthgt[selector == 1].cpu().numpy()
        pred_depth_flatten = predread[selector == 1].cpu().numpy()
        deepv2d_depth_flatten = data_blob['depthpred_deepv2d'][
            selector == 1].cpu().numpy()
        mD_pred_clipped_flatten = mD_pred[selector == 1].cpu().numpy()

        eval_measures_depth_np = compute_errors(gt=depth_gt_flatten,
                                                pred=pred_depth_flatten)
        eval_measures_depth_deepv2d_np = compute_errors(
            gt=depth_gt_flatten, pred=deepv2d_depth_flatten)
        eval_measures_depth_md_np = compute_errors(
            gt=depth_gt_flatten, pred=mD_pred_clipped_flatten)

        err_rec.append(eval_measures_depth_np[-3])
        mv_rec.append(gps_scale)
        err_rec_deepv2d.append(eval_measures_depth_deepv2d_np[-3])
        err_rec_md.append(eval_measures_depth_md_np[-3])

    err_rec = np.array(err_rec)
    mv_rec = np.array(mv_rec)
    err_rec_deepv2d = np.array(err_rec_deepv2d)
    err_rec_md = np.array(err_rec_md)

    check_dist = np.linspace(0, 3, 200)
    dist = 0.4
    dist_ratio = 0.1

    plot_mv = list()
    plot_err = list()
    plot_std = list()
    plot_num = list()
    for d in check_dist:
        d_low = d * (1 - dist)
        d_hig = d * (1 + dist)

        selector = (mv_rec >= d_low) * (mv_rec <= d_hig)

        d_low = d * (1 - dist_ratio)
        d_hig = d * (1 + dist_ratio)

        selector_ratio = (mv_rec >= d_low) * (mv_rec <= d_hig)
        if np.sum(selector) < 5:
            continue
        else:
            err1 = np.mean(err_rec[selector])
            err2 = np.mean(err_rec_deepv2d[selector])
            err3 = np.mean(err_rec_md[selector])

            std1 = np.std(err_rec[selector])
            std2 = np.std(err_rec_deepv2d[selector])
            std3 = np.std(err_rec_md[selector])
            plot_err.append(np.array([err1, err2, err3]))
            plot_std.append(np.array([std1, std2, std3]))
            plot_mv.append(d)
            plot_num.append(np.sum(selector_ratio))

    plot_err = np.stack(plot_err, axis=0)
    plot_mv = np.array(plot_mv)
    plot_std = np.stack(plot_std, axis=0)
    plot_num = np.stack(plot_num, axis=0)
    plot_num = plot_num / np.sum(plot_num)

    thickness = 0.1

    fig, ax = plt.subplots()
    plt.plot(plot_mv, plot_err[:, 0])
    plt.plot(plot_mv, plot_err[:, 1])
    plt.plot(plot_mv, plot_err[:, 2])
    ax.fill_between(plot_mv,
                    plot_err[:, 0] - plot_std[:, 0] * thickness,
                    plot_err[:, 0] + plot_std[:, 0] * thickness,
                    alpha=0.5)
    ax.fill_between(plot_mv,
                    plot_err[:, 1] - plot_std[:, 1] * thickness,
                    plot_err[:, 1] + plot_std[:, 1] * thickness,
                    alpha=0.5)
    ax.fill_between(plot_mv,
                    plot_err[:, 2] - plot_std[:, 2] * thickness,
                    plot_err[:, 2] + plot_std[:, 2] * thickness,
                    alpha=0.5)
    plt.xlabel('scale in meters')
    plt.ylabel('a1')
    # plt.legend(['Ours', 'DeepV2D Eight View', 'Bts'], bbox_to_anchor=(0.1, 0.3))
    plt.legend(['Ours', 'DeepV2D Eight View', 'Bts'], loc='lower left')

    ax2 = ax.twinx()
    ax2.plot(plot_mv, plot_num, c='purple')
    # plt.legend(['Frame per Scale Percentage'], bbox_to_anchor=(0.6, 0.3))
    plt.legend(['Frame per Scale Percentage'], loc='lower right')
    plt.title("Error curve in KITTI")
    plt.savefig('/home/shengjie/Desktop/1.png',
                bbox_inches='tight',
                pad_inches=0,
                dpi=150)
    plt.close()
    plt.show()

    ave_err_rec = np.zeros((bins.shape[0], 3))
    ave_err_rec_count = np.zeros((bins.shape[0], 1))
    for idx, indice in enumerate(indices):
        ave_err_rec[indice, 0] += err_rec[idx]
        ave_err_rec[indice, 1] += err_rec_deepv2d[idx]
        ave_err_rec[indice, 2] += err_rec_md[idx]
        ave_err_rec_count[indice, 0] += 1
    ave_err_rec = ave_err_rec / (ave_err_rec_count + 1e-6)
    plt.figure()
    plt.plot(bins, ave_err_rec[:, 0])
    plt.plot(bins, ave_err_rec[:, 1])
    plt.show()

    plt.figure()
    plt.scatter(mv_rec, err_rec)
    plt.scatter(mv_rec, err_rec_deepv2d)
    plt.show()
示例#4
0
 def forward(self, x):
     return 0.5 * torch.clamp_min(x, 0) ** 2
示例#5
0
 def expmap0(self, u, c: Curvature):
     sqrt_c = c.c**0.5
     u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True),
                              self.min_norm)
     gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
     return gamma_1
示例#6
0
def _logmap0(y, c):
    sqrt_c = c**0.5
    y_norm = torch.clamp_min(y.norm(dim=-1, p=2, keepdim=True), 1e-5)
    return y / y_norm / sqrt_c * artanh(sqrt_c * y_norm)
    return (x >= 0).type_as(x) * torch.ones_like(x)


H = 4
W = 4
inp = torch.rand(1, 4, H, W) * 2  # 2 / 4
# inp = torch.load('mnist_sample.pt')[None].double()

# inference
clip = torch.tensor(1)
a1 = Variable(inp, requires_grad=True)
x = a1 - a1.mean([2, 3], keepdim=True)
x.retain_grad()
norm = torch.sqrt(torch.mean(x**2, dim=[2, 3], keepdim=True) + 1e-5)
norm.retain_grad()
inv_cnorm = 1 / torch.clamp_min(norm, clip)
inv_cnorm.retain_grad()
x_norm = x * inv_cnorm
x_norm.retain_grad()
c1 = x_norm.abs().sum()
c1.retain_grad()
c1.backward()

# instance norm
a2 = Variable(inp, requires_grad=True)
m = torch.nn.InstanceNorm2d(4)
c2 = m(a2).abs().sum()
c2.backward()

# calculate gradients
d_xnorm = x_norm.grad  # * weight
示例#8
0
def train(gpu, ngpus_per_node, args):
    print("Using GPU %d for training" % gpu)
    args.gpu = gpu

    model = EppFlowNet(args=args)
    model = torch.nn.DataParallel(model)
    model.cuda()

    print("=> loading checkpoint '{}'".format(args.restore_ckpt))
    loc = 'cuda:{}'.format(args.gpu)
    checkpoint = torch.load(args.restore_ckpt, map_location=loc)
    model.load_state_dict(checkpoint, strict=False)

    with open(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
                'eppflownet/pose_bin{}.pickle'.format(
                    str(int(32 / args.num_angs)))), 'rb') as f:
        linlogdedge = pickle.load(f)
    minidx = np.argmin(np.abs(linlogdedge))
    print("Min index is :%d, val: %f" % (minidx, linlogdedge[minidx]))

    entries = read_splits(args)

    stidx = 0
    edidx = len(entries)

    eval_dataset = KITTI_odom(root=args.dataset_root,
                              odomroot=args.odomroot,
                              inheight=args.evalheight,
                              inwidth=args.evalwidth,
                              entries=entries[stidx:edidx],
                              maxinsnum=args.maxinsnum,
                              linlogdedge=linlogdedge,
                              num_samples=args.num_angs,
                              prediction_root=args.prediction_root,
                              ins_root=args.ins_root,
                              mdPred_root=args.mdPred_root,
                              RANSACPose_root=args.RANSACPose_root,
                              istrain=False,
                              isgarg=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=1,
                                  pin_memory=True,
                                  num_workers=3,
                                  drop_last=False,
                                  shuffle=False)

    model.eval()

    totnum = 0
    dr = 0
    with torch.no_grad():
        for val_id, data_blob in enumerate(eval_loader):
            image1 = data_blob['img1'].cuda(gpu) / 255.0
            image2 = data_blob['img2'].cuda(gpu) / 255.0
            intrinsic = data_blob['intrinsic'].cuda(gpu)
            insmap = data_blob['insmap'].cuda(gpu)
            mD_pred = data_blob['mdDepth_pred'].cuda(gpu)
            ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu)
            scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu)
            mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu)
            posepred = data_blob['posepred'].cuda(gpu)

            posepred = posepred[:, :, 0]
            ang_decps_pad = ang_decps_pad[:, :, 0]
            scl_decps_pad = scl_decps_pad[:, :, 0]
            mvd_decps_pad = mvd_decps_pad[:, :, 0]

            mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)

            st = time.time()
            outputs = model(image1, image2, mD_pred_clipped, intrinsic,
                            posepred, ang_decps_pad, scl_decps_pad,
                            mvd_decps_pad, insmap)
            dr += time.time() - st
            totnum += 1
            print("%d Samples, Ave sec/frame: %f, Mem: %f Gb" %
                  (totnum, dr / totnum,
                   float(torch.cuda.memory_allocated() / 1024 / 1024 / 1024)))
    return
示例#9
0
def train(gpu, ngpus_per_node, args):
    print("Using GPU %d for training" % gpu)
    args.gpu = gpu

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=ngpus_per_node,
                                rank=args.gpu)

    model = EppFlowNet(args=args)
    if args.distributed:
        torch.cuda.set_device(args.gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        model = nn.SyncBatchNorm.convert_sync_batchnorm(module=model)
        model = model.to(f'cuda:{args.gpu}')
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.gpu],
            find_unused_parameters=True,
            output_device=args.gpu)
    else:
        model = torch.nn.DataParallel(model)
        model.cuda()

    logroot = os.path.join(args.logroot, args.name)
    print("Parameter Count: %d, saving location: %s" %
          (count_parameters(model), logroot))

    if args.restore_ckpt is not None:
        print("=> loading checkpoint '{}'".format(args.restore_ckpt))
        loc = 'cuda:{}'.format(args.gpu)
        checkpoint = torch.load(args.restore_ckpt, map_location=loc)
        model.load_state_dict(checkpoint, strict=False)

    model.train()

    train_entries, evaluation_entries = read_splits()

    train_dataset = KITTI_eigen(root=args.dataset_root,
                                inheight=args.inheight,
                                inwidth=args.inwidth,
                                entries=train_entries,
                                maxinsnum=args.maxinsnum,
                                depth_root=args.depth_root,
                                depthvls_root=args.depthvlsgt_root,
                                prediction_root=args.prediction_root,
                                ins_root=args.ins_root,
                                istrain=True,
                                muteaug=False,
                                banremovedup=False,
                                isgarg=False)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset) if args.distributed else None
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   pin_memory=True,
                                   num_workers=int(args.num_workers /
                                                   ngpus_per_node),
                                   drop_last=True,
                                   sampler=train_sampler)

    eval_dataset = KITTI_eigen(root=args.dataset_root,
                               inheight=args.evalheight,
                               inwidth=args.evalwidth,
                               entries=evaluation_entries,
                               maxinsnum=args.maxinsnum,
                               depth_root=args.depth_root,
                               depthvls_root=args.depthvlsgt_root,
                               prediction_root=args.prediction_root,
                               ins_root=args.ins_root,
                               istrain=False,
                               isgarg=True)
    eval_sampler = torch.utils.data.distributed.DistributedSampler(
        eval_dataset) if args.distributed else None
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=1,
                                  pin_memory=True,
                                  num_workers=3,
                                  drop_last=True,
                                  sampler=eval_sampler)

    print(
        "Training splits contain %d images while test splits contain %d images"
        % (train_dataset.__len__(), eval_dataset.__len__()))

    if args.distributed:
        group = dist.new_group([i for i in range(ngpus_per_node)])

    optimizer, scheduler = fetch_optimizer(args, model,
                                           int(train_dataset.__len__() / 2))

    total_steps = 0

    if args.gpu == 0:
        logger = Logger(logroot)
        logger_evaluation = Logger(
            os.path.join(args.logroot, 'evaluation_eigen_background',
                         args.name))
        logger_evaluation_org = Logger(
            os.path.join(args.logroot, 'evaluation_eigen_background',
                         "{}_org".format(args.name)))
        logger.create_summarywriter()
        logger_evaluation.create_summarywriter()
        logger_evaluation_org.create_summarywriter()

    VAL_FREQ = 5000
    epoch = 0
    maxa1 = 0

    silog_criterion = silog_loss(variance_focus=args.variance_focus)

    st = time.time()
    should_keep_training = True
    while should_keep_training:
        train_sampler.set_epoch(epoch)
        for i_batch, data_blob in enumerate(train_loader):
            optimizer.zero_grad()

            image1 = data_blob['img1'].cuda(gpu) / 255.0
            image2 = data_blob['img2'].cuda(gpu) / 255.0
            intrinsic = data_blob['intrinsic'].cuda(gpu)
            insmap = data_blob['insmap'].cuda(gpu)
            depthgt = data_blob['depthmap'].cuda(gpu)
            posepred = data_blob['posepred'].cuda(gpu)
            mD_pred = data_blob['depthpred'].cuda(gpu)

            mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)

            outputs = model(image1, image2, mD_pred_clipped, intrinsic,
                            posepred, insmap)
            depthloss, depthselector = get_depth_loss(
                depthgt=depthgt,
                mD_pred=mD_pred,
                outputs=outputs,
                silog_criterion=silog_criterion)

            metrics = dict()
            metrics['depthloss'] = depthloss.item()

            loss = depthloss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)

            optimizer.step()
            scheduler.step()

            if args.gpu == 0:
                logger.write_dict(metrics, step=total_steps)
                if total_steps % SUM_FREQ == 0:
                    dr = time.time() - st
                    resths = (args.num_steps -
                              total_steps) * dr / (total_steps + 1) / 60 / 60
                    print("Step: %d, rest hour: %f, depthloss: %f" %
                          (total_steps, resths, depthloss.item()))
                    logger.write_vls(data_blob, outputs, depthselector,
                                     total_steps)

            if total_steps % VAL_FREQ == 1:
                if args.gpu == 0:
                    results = validate_kitti(model.module,
                                             args,
                                             eval_loader,
                                             logger,
                                             group,
                                             total_steps,
                                             isorg=False)
                else:
                    results = validate_kitti(model.module,
                                             args,
                                             eval_loader,
                                             None,
                                             group,
                                             None,
                                             isorg=False)

                if args.gpu == 0:
                    logger_evaluation.write_dict(results, total_steps)
                    if maxa1 < results['d1']:
                        maxa1 = results['d1']
                        PATH = os.path.join(logroot, 'maxa1.pth')
                        torch.save(model.state_dict(), PATH)
                        print("model saved to %s" % PATH)

                if args.gpu == 0:
                    results = validate_kitti(model.module,
                                             args,
                                             eval_loader,
                                             None,
                                             group,
                                             total_steps,
                                             isorg=True)
                    logger_evaluation_org.write_dict(results, total_steps)
                else:
                    validate_kitti(model.module,
                                   args,
                                   eval_loader,
                                   None,
                                   group,
                                   None,
                                   isorg=True)

                model.train()

            total_steps += 1

            if total_steps > args.num_steps:
                should_keep_training = False
                break
        epoch = epoch + 1

    if args.gpu == 0:
        logger.close()
        PATH = os.path.join(logroot, 'final.pth')
        torch.save(model.state_dict(), PATH)

    return
示例#10
0
    def inference(self,
                  tokens,
                  token_lengths,
                  mels_for_prosody,
                  mel_lengths_for_prosody,
                  speakers,
                  mels_for_ge2e,
                  pitches,
                  pitch_lengths,
                  noise_scale=1.0,
                  length_scale=1.0):
        '''
        For inference.
        token: [Batch, Token_t] # Input text
        token_lengths: [Batch]  # Length of input text
        mels_for_prosody: [Batch, Mel_d, Mel_t] # Input of prosody encoder
        mel_lengths_for_prosody: [Batch]    # Length of input mel for prosody
        speakers: [Batch] or None   # Indice of speaker. Only when hp.Speaker_Embedding.Type.upper() == 'LUT'
        mels_for_ge2e: [Batch * Samples, Mel_d, Mel_SE_t]    # Input of speaker embedding
        noise_scale: scalar of float
        length_scale: scalar of float or [Batch]. (I may change this to matrix to control speed letter by letter later)
        '''
        if 'LUT' in self.layer_Dict.keys():
            speakers = self.layer_Dict['LUT'](speakers)
        elif 'GE2E' in self.layer_Dict.keys():
            speakers = self.layer_Dict['GE2E'](mels_for_ge2e)
            speakers = GE2E_Normalize(speakers)
        else:
            speakers = None

        if 'Prosody_Encoder' in self.layer_Dict.keys():
            prosodies = self.layer_Dict['Prosody_Encoder'](
                mels_for_prosody, mel_lengths_for_prosody)
        else:
            prosodies = None

        if hp.Device != '-1': torch.cuda.synchronize()

        token_Masks = self.Mask_Generate(token_lengths)
        mean, log_Std, log_Durations, mask = self.layer_Dict['Encoder'](
            tokens, token_Masks, speakers, prosodies)
        length_scale = length_scale.unsqueeze(-1).unsqueeze(-1)

        if hp.Device != '-1': torch.cuda.synchronize()

        durations = torch.ceil(torch.exp(log_Durations) * mask *
                               length_scale).squeeze(1)
        mel_Lengths = torch.clamp_min(torch.sum(durations, dim=1), 1.0).long()
        mel_Masks = self.Mask_Generate(mel_Lengths)

        attention_Masks = torch.unsqueeze(token_Masks, -1) * torch.unsqueeze(
            mel_Masks, 2)
        attention_Masks = attention_Masks.squeeze(1)

        attentions = self.Path_Generate(
            durations, attention_Masks)  # [Batch, Token_t, Mel_t]

        if hp.Device != '-1': torch.cuda.synchronize()

        mel_Mean = mean @ attentions  # [Batch, Mel_Dim, Token_t] @ [Batch, Token_t, Mel_t] -> [Batch, Mel_dim, Mel_t]
        mel_Log_Std = log_Std @ attentions  # [Batch, Mel_Dim, Token_t] @ [Batch, Token_t, Mel_t] -> [Batch, Mel_dim, Mel_t]
        noises = torch.randn_like(mel_Mean) * noise_scale

        if hp.Device != '-1': torch.cuda.synchronize()

        z = (mel_Mean + torch.exp(mel_Log_Std) * noises) * mel_Masks

        if 'Pitch_Interpolater' in self.layer_Dict.keys():
            pitches = self.layer_Dict['Pitch_Interpolater'](pitches,
                                                            pitch_lengths,
                                                            mel_Lengths)
        else:
            pitches = None

        mels, _, mel_Masks = self.layer_Dict['Decoder'](z,
                                                        mel_Masks,
                                                        speakers,
                                                        prosodies,
                                                        pitches,
                                                        reverse=True)

        if hp.Device != '-1': torch.cuda.synchronize()

        mels.masked_fill_(mel_Masks == 0.0, -hp.Sound.Max_Abs_Mel)

        return mels, mel_Lengths, attentions
示例#11
0
 def forward(self, input):
     return torch.clamp_min(input, self.min)
示例#12
0
 def _get_param(self, sp, sn):
     ap = torch.clamp_min(1 + self.m - sp, min=0.)
     an = torch.clamp_min(sn + self.m, min=0.)
     dp = 1 - self.m
     dn = self.m
     return ap, an, dp, dn
示例#13
0
    def _metrics(
        self,
        true_durs,
        true_text_len,
        pred_durs,
        true_pitch,
        pred_pitch,
        true_spect=None,
        pred_spect=None,
        true_spect_len=None,
        attn_logprob=None,
        attn_soft=None,
        attn_hard=None,
        attn_hard_dur=None,
    ):
        text_mask = get_mask_from_lengths(true_text_len)
        mel_mask = get_mask_from_lengths(true_spect_len)
        loss = 0.0

        # Dur loss and metrics
        durs_loss = F.mse_loss(pred_durs, (true_durs + 1).float().log(),
                               reduction='none')
        durs_loss = durs_loss * text_mask.float()
        durs_loss = durs_loss.sum() / text_mask.sum()

        durs_pred = pred_durs.exp() - 1
        durs_pred = torch.clamp_min(durs_pred, min=0)
        durs_pred = durs_pred.round().long()

        acc = ((true_durs == durs_pred) *
               text_mask).sum().float() / text_mask.sum() * 100
        acc_dist_1 = (((true_durs - durs_pred).abs() <= 1) *
                      text_mask).sum().float() / text_mask.sum() * 100
        acc_dist_3 = (((true_durs - durs_pred).abs() <= 3) *
                      text_mask).sum().float() / text_mask.sum() * 100

        pred_spect = pred_spect.transpose(1, 2)

        # Mel loss
        mel_loss = F.mse_loss(pred_spect, true_spect,
                              reduction='none').mean(dim=-2)
        mel_loss = mel_loss * mel_mask.float()
        mel_loss = mel_loss.sum() / mel_mask.sum()

        loss = loss + self.durs_loss_scale * durs_loss + self.mel_loss_scale * mel_loss

        # Aligner loss
        bin_loss, ctc_loss = None, None
        ctc_loss = self.forward_sum_loss(attn_logprob=attn_logprob,
                                         in_lens=true_text_len,
                                         out_lens=true_spect_len)
        loss = loss + ctc_loss
        if self.add_bin_loss:
            bin_loss = self.bin_loss(hard_attention=attn_hard,
                                     soft_attention=attn_soft)
            loss = loss + self.bin_loss_scale * bin_loss
        true_avg_pitch = average_pitch(true_pitch.unsqueeze(1),
                                       attn_hard_dur).squeeze(1)

        # Pitch loss
        pitch_loss = F.mse_loss(pred_pitch, true_avg_pitch,
                                reduction='none')  # noqa
        pitch_loss = (pitch_loss * text_mask).sum() / text_mask.sum()

        loss = loss + self.pitch_loss_scale * pitch_loss

        return loss, durs_loss, acc, acc_dist_1, acc_dist_3, pitch_loss, mel_loss, ctc_loss, bin_loss
示例#14
0
文件: loss.py 项目: nogu-atsu/NARF
 def trunc_mae(a, b, thres=0.01):  # mean absolute error
     return torch.clamp_min(torch.abs(a - b), thres).mean()
示例#15
0
    def inference(
        self,
        text: torch.Tensor,
        text_lengths: torch.Tensor,
        feats: Optional[torch.Tensor] = None,
        feats_lengths: Optional[torch.Tensor] = None,
        sids: Optional[torch.Tensor] = None,
        spembs: Optional[torch.Tensor] = None,
        lids: Optional[torch.Tensor] = None,
        dur: Optional[torch.Tensor] = None,
        noise_scale: float = 0.667,
        noise_scale_dur: float = 0.8,
        alpha: float = 1.0,
        max_len: Optional[int] = None,
        use_teacher_forcing: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """Run inference.

        Args:
            text (Tensor): Input text index tensor (B, T_text,).
            text_lengths (Tensor): Text length tensor (B,).
            feats (Tensor): Feature tensor (B, aux_channels, T_feats,).
            feats_lengths (Tensor): Feature length tensor (B,).
            sids (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]): Language index tensor (B,) or (B, 1).
            dur (Optional[Tensor]): Ground-truth duration (B, T_text,). If provided,
                skip the prediction of durations (i.e., teacher forcing).
            noise_scale (float): Noise scale parameter for flow.
            noise_scale_dur (float): Noise scale parameter for duration predictor.
            alpha (float): Alpha parameter to control the speed of generated speech.
            max_len (Optional[int]): Maximum length of acoustic feature sequence.
            use_teacher_forcing (bool): Whether to use teacher forcing.

        Returns:
            Tensor: Generated waveform tensor (B, T_wav).
            Tensor: Monotonic attention weight tensor (B, T_feats, T_text).
            Tensor: Duration tensor (B, T_text).

        """
        # encoder
        x, m_p, logs_p, x_mask = self.text_encoder(text, text_lengths)
        g = None
        if self.spks is not None:
            # (B, global_channels, 1)
            g = self.global_emb(sids.view(-1)).unsqueeze(-1)
        if self.spk_embed_dim is not None:
            # (B, global_channels, 1)
            g_ = self.spemb_proj(F.normalize(spembs.unsqueeze(0))).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_
        if self.langs is not None:
            # (B, global_channels, 1)
            g_ = self.lang_emb(lids.view(-1)).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_

        if use_teacher_forcing:
            # forward posterior encoder
            z, m_q, logs_q, y_mask = self.posterior_encoder(feats, feats_lengths, g=g)

            # forward flow
            z_p = self.flow(z, y_mask, g=g)  # (B, H, T_feats)

            # monotonic alignment search
            s_p_sq_r = torch.exp(-2 * logs_p)  # (B, H, T_text)
            # (B, 1, T_text)
            neg_x_ent_1 = torch.sum(
                -0.5 * math.log(2 * math.pi) - logs_p,
                [1],
                keepdim=True,
            )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_2 = torch.matmul(
                -0.5 * (z_p**2).transpose(1, 2),
                s_p_sq_r,
            )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_3 = torch.matmul(
                z_p.transpose(1, 2),
                (m_p * s_p_sq_r),
            )
            # (B, 1, T_text)
            neg_x_ent_4 = torch.sum(
                -0.5 * (m_p**2) * s_p_sq_r,
                [1],
                keepdim=True,
            )
            # (B, T_feats, T_text)
            neg_x_ent = neg_x_ent_1 + neg_x_ent_2 + neg_x_ent_3 + neg_x_ent_4
            # (B, 1, T_feats, T_text)
            attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
            # monotonic attention weight: (B, 1, T_feats, T_text)
            attn = self.maximum_path(
                neg_x_ent,
                attn_mask.squeeze(1),
            ).unsqueeze(1)
            dur = attn.sum(2)  # (B, 1, T_text)

            # forward decoder with random segments
            wav = self.decoder(z * y_mask, g=g)
        else:
            # duration
            if dur is None:
                logw = self.duration_predictor(
                    x,
                    x_mask,
                    g=g,
                    inverse=True,
                    noise_scale=noise_scale_dur,
                )
                w = torch.exp(logw) * x_mask * alpha
                dur = torch.ceil(w)
            y_lengths = torch.clamp_min(torch.sum(dur, [1, 2]), 1).long()
            y_mask = make_non_pad_mask(y_lengths).unsqueeze(1).to(text.device)
            attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
            attn = self._generate_path(dur, attn_mask)

            # expand the length to match with the feature sequence
            # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
            m_p = torch.matmul(
                attn.squeeze(1),
                m_p.transpose(1, 2),
            ).transpose(1, 2)
            # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
            logs_p = torch.matmul(
                attn.squeeze(1),
                logs_p.transpose(1, 2),
            ).transpose(1, 2)

            # decoder
            z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
            z = self.flow(z_p, y_mask, g=g, inverse=True)
            wav = self.decoder((z * y_mask)[:, :, :max_len], g=g)

        return wav.squeeze(1), attn.squeeze(1), dur.squeeze(1)
示例#16
0
    def forward(self, x: Tensor, targets: Tensor) -> Tensor:

        # cal sp, sn from x
        batchSize = x.size(0)
        reordered_x = torch.cat((x.narrow(0, int(batchSize // 3), int(batchSize // 3)), \
                                 x.narrow(0, int(2 * batchSize // 3), int(batchSize // 3)),
                                x.narrow(0, 0, int(batchSize // 3))), 0)

        # # # regularization loss

        pos = (x * reordered_x.data).sum(1).div_(self.T).exp_()

        # get all innerproduct, remove diag
        all_prob = torch.mm(x, x.t().data).div_(self.T).exp_() * self.diag_mat

        all_div = all_prob.sum(1)

        lnPmt = torch.div(pos, all_div)

        # negative probability
        Pon_div = all_div.repeat(batchSize, 1)
        lnPon = torch.div(all_prob, Pon_div.t())
        lnPon = -lnPon.add(-1)

        sp = lnPmt
        sn = lnPon
        lnPon = lnPon.log_()
        lnPmt = lnPmt.log_()
        # exit(0)
        # # ########################################  add alpha, beta
        ap = torch.clamp_min(-lnPmt.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(lnPon.detach() + self.m, min=0.)
        #
        delta_p = 1 - self.m
        delta_n = self.m
        #
        lnPmt = -ap * (sp - delta_p) * self.gamma
        lnPon = an * (sn - delta_n) * self.gamma

        lnPmt = lnPmt.exp_()
        lnPon = lnPon.exp_()
        #####################################################

        # equation 7 in ref. A (NCE paper)
        lnPon.log_()
        # also remove the pos term
        lnPon = lnPon.sum(1) - (-lnPmt.add(-1)).log_()
        lnPmt.log_()

        lnPmtsum = lnPmt.sum(0)
        lnPonsum = lnPon.sum(0)
        print(lnPmtsum, lnPonsum)
        loss = -(lnPmtsum + lnPonsum) / batchSize
        print("loss", loss)
        exit(0)

        sp = lnPmt
        sn = lnPon

        ap = torch.clamp_min(-sp.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)

        delta_p = 1 - self.m
        delta_n = self.m

        # logit_p = - ap * (sp - delta_p) * self.gamma
        # logit_n = an * (sn - delta_n) * self.gamma

        logit_p = -sp
        logit_n = sn

        print(logit_n)
        print(logit_p)

        print(logit_n.shape)
        print(logit_p.shape)
        loss = self.soft_plus(
            torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0))
        print("loss", loss / 300)
        exit(0)
示例#17
0
def _expmap0(u, c):
    sqrt_c = c**0.5
    u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), 1e-5)
    gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
    return gamma_1
示例#18
0
    def forward(self, predicts, targets, embeds, step):
        """Computes loss.

        Parameters
        ----------
        predicts: torch.Tensor
            Predicted labels
        targets: torch.Tensor
            True labels
        embeds: torch.Tensor
            Embeddings of the inputs
        step: int
            Value to compute the annealing factor for triplet loss.

        Returns
        -------
        torch.Tensor
            computed loss
        """
        alpha = self.params['alpha']
        margin = self.params['margin']
        n_mini_batch_size = embeds[0].shape[0] // 2

        # ce loss
        ce_loss = torch.nn.CrossEntropyLoss()(predicts, targets)

        Triplet_loss_weight = anneal_function('logistic', step,
                                              self.params['triplet_anneal_k'],
                                              self.params['triplet_anneal_b'])

        if self.params['type'] == 'sdtw':
            # DTWLoss (want to minimize dtw between duplicates and maximize dtw between non-duplicates)
            DTW_loss = torch.tensor([0]).float().to(embeds[0].device)
            for k in range(n_mini_batch_size):
                DTW_loss += torch.nn.functional.relu(
                    self.sdtw(embeds[0][k], embeds[1][k]) -
                    self.sdtw(embeds[0][k + n_mini_batch_size], embeds[1][
                        k + n_mini_batch_size]) + margin)

            DTW_loss /= (n_mini_batch_size)

            Triplet_loss = DTW_loss
            loss = alpha * ce_loss + (
                1. - alpha) * Triplet_loss * Triplet_loss_weight

        elif self.params['loss_type'] == 'l2':
            L2_loss = torch.nn.functional.relu(
                torch.sum((embeds[0][:n_mini_batch_size, -1, :] -
                           embeds[1][:n_mini_batch_size, -1, :])**2,
                          dim=-1) -
                torch.sum((embeds[0][:n_mini_batch_size, -1, :] -
                           embeds[1][n_mini_batch_size:, -1, :])**2,
                          dim=-1) + margin).sum()
            L2_loss /= n_mini_batch_size
            loss = alpha * ce_loss + (1. -
                                      alpha) * L2_loss * Triplet_loss_weight

        elif self.params['loss_type'] == 'cos_hinge':
            Cos_hinge_loss = torch.clamp_min(
                -torch.nn.CosineSimilarity(dim=-1)
                (embeds[0][:n_mini_batch_size, -1, :],
                 embeds[1][:n_mini_batch_size, -1, :]) +
                torch.nn.CosineSimilarity(dim=-1)(
                    embeds[0][:n_mini_batch_size, -1, :],
                    embeds[1][n_mini_batch_size:, -1, :]) + margin, 0).sum()

            Cos_hinge_loss += torch.clamp_min(
                -torch.nn.CosineSimilarity(dim=-1)
                (embeds[0][:n_mini_batch_size, -1, :],
                 embeds[1][:n_mini_batch_size, -1, :]) +
                torch.nn.CosineSimilarity(dim=-1)(
                    embeds[1][:n_mini_batch_size, -1, :],
                    embeds[1][n_mini_batch_size:, -1, :]) + margin, 0).sum()

            Cos_hinge_loss /= (2 * n_mini_batch_size)
            loss = alpha * ce_loss + (
                1. - alpha) * Cos_hinge_loss * Triplet_loss_weight
        elif self.params['loss_type'] == 'ce':
            loss = ce_loss
        else:
            raise KeyError(f"Unknown loss type: {self.params['loss_type']}")

        return loss
示例#19
0
def _project(x, c):
    norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), 1e-5)
    maxnorm = (1 - 1e-3)
    cond = norm > maxnorm
    projected = x / norm * maxnorm
    return torch.where(cond, projected, x)
示例#20
0
def tanh_grad(x):
    x = torch.clamp_min(1 - torch.tanh(x)**2, min=1e-5)
    return torch.log(x)
示例#21
0
    def forward(self,
                *,
                x,
                x_len,
                dur_target=None,
                pitch_target=None,
                energy_target=None,
                spec_len=None):
        """
        Args:
            x: Input from the encoder.
            x_len: Length of the input.
            dur_target:  Duration targets for the duration predictor. Needs to be passed in during training.
            pitch_target: Pitch targets for the pitch predictor. Needs to be passed in during training.
            energy_target: Energy targets for the energy predictor. Needs to be passed in during training.
            spec_len: Target spectrogram length. Needs to be passed in during training.
        """
        # Duration predictions (or ground truth) fed into Length Regulator to
        # expand the hidden states of the encoder embedding
        log_dur_preds = self.duration_predictor(x)
        log_dur_preds.masked_fill_(~get_mask_from_lengths(x_len), 0)
        # Output is Batch, Time
        if dur_target is not None:
            dur_out = self.length_regulator(x, dur_target)
        else:
            dur_preds = torch.clamp_min(
                torch.round(torch.exp(log_dur_preds)) - 1, 0).long()
            if not torch.sum(dur_preds, dim=1).bool().all():
                logging.error(
                    "Duration prediction failed on this batch. Settings to 1s")
                dur_preds += 1
            dur_out = self.length_regulator(x, dur_preds)
            spec_len = torch.sum(dur_preds, dim=1)
        out = dur_out
        out *= get_mask_from_lengths(spec_len).unsqueeze(-1)

        # Pitch
        pitch_preds = None
        if self.pitch:
            # Possible future work:
            #   Add pitch spectrogram prediction & conversion back to pitch contour using iCWT
            #   (see Appendix C of the FastSpeech 2/2s paper).
            pitch_preds = self.pitch_predictor(dur_out)
            pitch_preds.masked_fill_(~get_mask_from_lengths(spec_len), 0)
            if pitch_target is not None:
                pitch_out = self.pitch_lookup(
                    torch.bucketize(pitch_target, self.pitch_bins))
            else:
                pitch_out = self.pitch_lookup(
                    torch.bucketize(pitch_preds.detach(), self.pitch_bins))
            out += pitch_out
        out *= get_mask_from_lengths(spec_len).unsqueeze(-1)

        # Energy
        energy_preds = None
        if self.energy:
            energy_preds = self.energy_predictor(dur_out)
            if energy_target is not None:
                energy_out = self.energy_lookup(
                    torch.bucketize(energy_target, self.energy_bins))
            else:
                energy_out = self.energy_lookup(
                    torch.bucketize(energy_preds.detach(), self.energy_bins))
            out += energy_out
        out *= get_mask_from_lengths(spec_len).unsqueeze(-1)

        return out, log_dur_preds, pitch_preds, energy_preds, spec_len
示例#22
0
    def compute_loss_v3(self, preds, ground_truth):
        """
        :param preds:
            [batch_size, 125, 13, 13]
        :param ground_truth:
            [batch_size, 13, 13, 5, 25]
        :return:
        """
        # grid_size format is [h, w]
        grid_size = [preds.shape[2], preds.shape[3]]
        # ratio's format is [h, w]
        ratio = torch.tensor(
            [self.img_size[0] / grid_size[0], self.img_size[1] / grid_size[1]],
            dtype=torch.float32)
        ratio = ratio.to(opt.device)
        xy_offset, pred_bboxes, pred_confs, pred_classes = self.reorg_layer(
            preds)
        # obj_mask记录存在目标的cell
        # obj_mask: [batch_size, 13, 13, 5]
        obj_mask = ground_truth[..., 4].to(torch.bool)
        # ignore_mask:记录bbox iou不满足条件的预测框位置
        # ignore_mask: [batch_size, 13, 13, 5]
        ignore_mask = torch.empty_like(obj_mask)

        for i in range(preds.size(0)):
            # [13, 13, 5, 4] & [13, 13, 5] -> [M/4, 4]
            # valid_bbox: [M, 4]
            valid_bbox = ground_truth[i, ..., :4][obj_mask[i]]
            # valid_bbox = torch.masked_select(ground_truth[i, ..., :4], obj_mask[i, ..., None]).reshape(-1, 4)
            # ious: [13, 13, 5, M]
            # [13, 13, 5, 4] & [M, 4] -> [13, 13, 5, M]
            ious = yolov2_bbox_iou(pred_bboxes[i], valid_bbox)
            # best_iou: [13, 13, 5]
            best_iou = torch.max(ious, dim=-1)[0]
            ignore_mask[i] = torch.lt(best_iou, opt.best_iou_threshold)

        # pred_xy: [batch_size, 13, 13, 5, 2]
        # pred_xy's and label_xy's format is [w, h]
        # 因为pred_bboxes经过reorg_layer处理后rescale到了input_img的scale,在计算loss时需要把pred_xy的scale缩放到grid的scale
        pred_xy = pred_bboxes[..., 0:2] / ratio.flip((0, )) - xy_offset
        true_xy = ground_truth[..., 0:2] / ratio.flip((0, )) - xy_offset

        # pred_wh: [batch_size, 13, 13, 5, 2]
        # 这里除以anchor是因为reorg_layer函数处理后对predict_bbox_wh乘以了anchor,这里只是还原模型最初输出的预测值
        pred_twth = pred_bboxes[..., 2:4] / self.anchors
        true_twth = ground_truth[..., 2:4] / self.anchors
        # for numercial stability
        # 防止等于0的值在进行对数运算时得到负无穷
        pred_twth[(pred_twth == 0.).nonzero()] = 1.
        true_twth[(true_twth == 0.).nonzero()] = 1.
        pred_twth = torch.clamp_min(pred_twth, min=1e-9)
        true_twth = torch.clamp_min(true_twth, min=1e-9)
        # 这里取对数是因为reorg_layer对pred_wh进行了exponential运算
        pred_twth = torch.log(pred_twth)
        true_twth = torch.log(true_twth)

        # box with smaller area has higer weight
        # [batch_size, 13, 13, 5]
        box_loss_scale = 2. - (ground_truth[..., 2] / self.img_size[1]) * (
            ground_truth[..., 3] / self.img_size[0])

        # 对存在目标的预测框计算xy和wh损失
        # [batch_size, 13, 13, 5, 2] & [batch_size, 13, 13, 5, 1] & [batch_size, 13, 13, 5, 1] -> [batch_size,13,13,5,1]
        obj_mask = obj_mask[..., None].to(torch.float32)
        xy_loss = torch.sum(
            torch.pow(true_xy - pred_xy, 2.) * obj_mask *
            box_loss_scale[..., None])
        wh_loss = torch.sum(
            torch.pow(true_twth - pred_twth, 2.) * obj_mask *
            box_loss_scale[..., None])

        # 对存在目标的预测框计算置信度损失
        # [batch_size, 13, 13, 5, 1] & ([batch_size,13,13,5,1] & [batch_size,13,13,5,1] -> [batch_size,13,13,5,1]
        bce_loss = torch.nn.BCEWithLogitsLoss(reduction='none')
        conf_loss_obj = obj_mask * bce_loss(pred_confs, obj_mask)

        # 对不存在目标且bbox iou也不符合要求的预测框计算置信度损失
        # [batch_size,13,13,5,1] & [batch_size,13,13,5,1] & [batch_size,13,13,5,1] -> [batch_size,13,13,5,1]
        # ignore_mask: [batch_size, 13, 13, 5, 1]
        ignore_mask = ignore_mask[..., None].to(torch.float32)
        conf_loss_noobj = (1. - obj_mask) * ignore_mask * bce_loss(
            pred_confs, obj_mask)

        # 对不存在目标但与gt_bbox的iou满足条件的预测框不计入损失

        # total conf loss
        # [batch_size, 13, 13, 5, 1]
        conf_loss = conf_loss_obj + conf_loss_noobj
        if opt.use_focal_loss:
            focal_mask = self.focal_loss(labels=obj_mask, preds=pred_confs)
            conf_loss = torch.sum(focal_mask * conf_loss)
        else:
            conf_loss = torch.sum(conf_loss)

        # 对存在目标的预测框计算分类损失
        if opt.use_smooth_labels:
            true_classes = self.smooth_labels(ground_truth[..., 5:],
                                              opt.coco_class_num)
        else:
            true_classes = ground_truth[..., 5:]
        # [batch_size,13,13,5] & [batch_size,13,13,5,20] & [batch_size,13,13,5,20] -> [batch_size,13,13,5,20]
        class_loss = torch.sum(obj_mask * bce_loss(pred_classes, true_classes))

        # get loss of single img
        total_loss = (xy_loss + wh_loss + conf_loss +
                      class_loss) / opt.batch_size
        loss_dict = {
            'total_loss': total_loss,
            'xy_loss': xy_loss / opt.batch_size,
            'wh_loss': wh_loss / opt.batch_size,
            'conf_loss': conf_loss / opt.batch_size,
            'class_loss': class_loss / opt.batch_size
        }

        return loss_dict
示例#23
0
 def proj(self, x, c):
     norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
     maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
     cond = norm > maxnorm
     projected = x / norm * maxnorm
     return torch.where(cond, projected, x)
示例#24
0
 def forward(ctx, x: Tensor, min_val: float) -> Tensor:
     y = torch.clamp_min(x, min_val)
     return y
示例#25
0
def validate_kitti(model,
                   args,
                   eval_loader,
                   logger,
                   group,
                   total_steps,
                   isdeepv2d=False):
    """ Peform validation using the KITTI-2015 (train) split """
    """ Peform validation using the KITTI-2015 (train) split """
    model.eval()
    gpu = args.gpu
    eval_measures_depth = torch.zeros(10).cuda(device=gpu)
    for val_id, data_blob in enumerate(tqdm(eval_loader)):
        image1 = data_blob['img1'].cuda(gpu) / 255.0
        image2 = data_blob['img2'].cuda(gpu) / 255.0
        intrinsic = data_blob['intrinsic'].cuda(gpu)
        insmap = data_blob['insmap'].cuda(gpu)
        posepred = data_blob['posepred'].cuda(gpu)
        depthgt = data_blob['depthmap'].cuda(gpu)

        if not args.initbymD:
            mD_pred = data_blob['depthpred'].cuda(gpu)
        else:
            mD_pred = data_blob['mdDepth_pred'].cuda(gpu)

        mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)

        if not isdeepv2d:
            outputs = model(image1, image2, mD_pred_clipped, intrinsic,
                            posepred, insmap)
            predread = outputs[('depth', 2)]
        else:
            depthpred_deepv2d = data_blob['depthpred_deepv2d'].cuda(gpu)
            predread = depthpred_deepv2d
            # predread = data_blob['mdDepth_pred'].cuda(gpu)

        selector = ((depthgt > 0) * (predread > 0) *
                    (depthgt > args.min_depth_eval) *
                    (depthgt < args.max_depth_eval)).float()
        predread = torch.clamp(predread,
                               min=args.min_depth_eval,
                               max=args.max_depth_eval)
        depth_gt_flatten = depthgt[selector == 1].cpu().numpy()
        pred_depth_flatten = predread[selector == 1].cpu().numpy()

        pred_depth_flatten = np.median(
            depth_gt_flatten / pred_depth_flatten) * pred_depth_flatten

        eval_measures_depth_np = compute_errors(gt=depth_gt_flatten,
                                                pred=pred_depth_flatten)

        eval_measures_depth[:9] += torch.tensor(eval_measures_depth_np).cuda(
            device=gpu)
        eval_measures_depth[9] += 1

    if args.distributed:
        dist.all_reduce(tensor=eval_measures_depth,
                        op=dist.ReduceOp.SUM,
                        group=group)

    if args.gpu == 0:
        eval_measures_depth[
            0:9] = eval_measures_depth[0:9] / eval_measures_depth[9]
        eval_measures_depth = eval_measures_depth.cpu().numpy()
        print('Computing Depth errors for %f eval samples' %
              (eval_measures_depth[9].item()))
        print("{:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}".
              format('silog', 'abs_rel', 'log10', 'rms', 'sq_rel', 'log_rms',
                     'd1', 'd2', 'd3'))
        for i in range(8):
            print('{:7.3f}, '.format(eval_measures_depth[i]), end='')
        print('{:7.3f}'.format(eval_measures_depth[8]))

        return {
            'silog': float(eval_measures_depth[0]),
            'abs_rel': float(eval_measures_depth[1]),
            'log10': float(eval_measures_depth[2]),
            'rms': float(eval_measures_depth[3]),
            'sq_rel': float(eval_measures_depth[4]),
            'log_rms': float(eval_measures_depth[5]),
            'd1': float(eval_measures_depth[6]),
            'd2': float(eval_measures_depth[7]),
            'd3': float(eval_measures_depth[8])
        }
    else:
        return None
    def forward(self, x, x_mask, dr=None, g=None, reverse=False, noise_scale=1.0):
        """
        Shapes:
            - x: :math:`[B, C, T]`
            - x_mask: :math:`[B, 1, T]`
            - dr: :math:`[B, 1, T]`
            - g: :math:`[B, C]`
        """
        # condition encoder text
        x = self.pre(x)
        if g is not None:
            x = x + self.cond(g)
        x = self.convs(x, x_mask)
        x = self.proj(x) * x_mask

        if not reverse:
            flows = self.flows
            assert dr is not None

            # condition encoder duration
            h = self.post_pre(dr)
            h = self.post_convs(h, x_mask)
            h = self.post_proj(h) * x_mask
            noise = (
                torch.randn(dr.size(0), 2, dr.size(2)).to(
                    device=x.device, dtype=x.dtype
                )
                * x_mask
            )
            z_q = noise

            # posterior encoder
            logdet_tot_q = 0.0
            for idx, flow in enumerate(self.post_flows):
                z_q, logdet_q = flow(z_q, x_mask, g=(x + h))
                logdet_tot_q = logdet_tot_q + logdet_q
                if idx > 0:
                    z_q = torch.flip(z_q, [1])

            z_u, z_v = torch.split(z_q, [1, 1], 1)
            u = torch.sigmoid(z_u) * x_mask
            z0 = (dr - u) * x_mask

            # posterior encoder - neg log likelihood
            logdet_tot_q += torch.sum(
                (F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2]
            )
            nll_posterior_encoder = (
                torch.sum(
                    -0.5 * (math.log(2 * math.pi) + (noise ** 2)) * x_mask, [1, 2]
                )
                - logdet_tot_q
            )

            z0 = torch.log(torch.clamp_min(z0, 1e-5)) * x_mask
            logdet_tot = torch.sum(-z0, [1, 2])
            z = torch.cat([z0, z_v], 1)

            # flow layers
            for idx, flow in enumerate(flows):
                z, logdet = flow(z, x_mask, g=x, reverse=reverse)
                logdet_tot = logdet_tot + logdet
                if idx > 0:
                    z = torch.flip(z, [1])

            # flow layers - neg log likelihood
            nll_flow_layers = (
                torch.sum(0.5 * (math.log(2 * math.pi) + (z ** 2)) * x_mask, [1, 2])
                - logdet_tot
            )
            return nll_flow_layers + nll_posterior_encoder

        flows = list(reversed(self.flows))
        flows = flows[:-2] + [flows[-1]]  # remove a useless vflow
        z = (
            torch.rand(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype)
            * noise_scale
        )
        for flow in flows:
            z = torch.flip(z, [1])
            z = flow(z, x_mask, g=x, reverse=reverse)

        z0, _ = torch.split(z, [1, 1], 1)
        logw = z0
        return logw
示例#27
0
    def compute_loss_v3(self, preds, ground_truth, anchor_base, img_size):
        """
        :param preds: [N, 125, 13, 13]
        :param ground_truth: [N, 13, 13, 5, 25]
        :param anchor_base: [5, 2]
        :param img_size: 416
        :return:
        """
        # grid_size format is [h, w]
        N = preds.size(0)
        grid_size = preds.shape[2]
        bce_no_reduce = torch.nn.BCEWithLogitsLoss(reduction='none')
        # ratio's format is [h, w]
        ratio = (img_size / grid_size).float().to(self.opt.device)
        xy_offset, pred_bboxes, pred_confs, pred_classes = self.reorg_layer(
            preds, anchor_base, img_size)
        # obj_mask记录存在目标的cell
        # obj_mask: [N, 13, 13, 5]
        obj_mask = ground_truth[..., 4].bool()

        # ignore_mask:忽略那些与任一gt_box的iou值大于0.6的pred_box的conf损失
        # ignore_mask: [N, 13, 13, 5]
        ignore_mask = torch.zeros_like(obj_mask).bool()
        for i in range(N):
            # [13, 13, 5, 4] & [13, 13, 5] -> [M/4, 4]
            # valid_bbox: [M, 4] / [ctr_x, ctr_y, w, h]
            valid_bbox = ground_truth[i, ..., :4][obj_mask[i]]
            # valid_bbox = torch.masked_select(ground_truth[i, ..., :4], obj_mask[i, ..., None]).reshape(-1, 4)
            # ious: [13, 13, 5, M]
            # [13, 13, 5, 4] & [M, 4] -> [13, 13, 5, M]
            ious = yolov2_bbox_iou(pred_bboxes[i], valid_bbox)
            # best_iou: [13, 13, 5]
            max_iou, _ = torch.max(ious, dim=-1)
            ignore_mask[i] = max_iou.lt(self.opt.pos_iou_thresh)

        # pred_xy: [N, 13, 13, 5, 2]
        # pred_xy's and label_xy's format is [w, h]
        # 因为pred_bboxes经过reorg_layer处理后rescale到了input_img的scale,在计算loss时需要把pred_xy的scale缩放到grid的scale
        pred_dxdy = (pred_bboxes[..., 0:2] / ratio) - xy_offset
        true_dxdy = (ground_truth[..., 0:2] / ratio) - xy_offset

        # pred_wh: [N, 13, 13, 5, 2]
        # 这里除以anchor是因为reorg_layer函数处理后对predict_bbox_wh乘以了anchor,这里只是还原模型最初输出的预测值
        pred_twth = pred_bboxes[..., 2:4] / anchor_base
        true_twth = ground_truth[..., 2:4] / anchor_base
        # for numercial stability
        # 防止等于0的值在进行对数运算时得到负无穷
        pred_twth[pred_twth == 0.] = 1.
        true_twth[true_twth == 0.] = 1.
        # 这里取对数是因为reorg_layer对pred_wh进行了exponential运算
        pred_twth = torch.clamp_min(pred_twth, min=1e-9).log()
        true_twth = torch.clamp_min(true_twth, min=1e-9).log()

        # box with smaller area has higer weight
        # [N, 13, 13, 5]
        loc_loss_weight = 1.5 - (ground_truth[..., 2] /
                                 img_size) * (ground_truth[..., 3] / img_size)
        assert (loc_loss_weight <= 1.5).all()
        # 对存在目标的预测框计算xy和wh损失
        # [N, 13, 13, 5, 2] & [N, 13, 13, 5, 1] & [N, 13, 13, 5, 1] -> [N,13,13,5,1]
        obj_mask = obj_mask[..., None].float()
        dxdy_loss = torch.pow(true_dxdy - pred_dxdy,
                              2.) * obj_mask * loc_loss_weight[..., None]
        dxdy_loss = self.opt.reg_scale * dxdy_loss.sum() / N
        twth_loss = torch.pow(true_twth - pred_twth,
                              2.) * obj_mask * loc_loss_weight[..., None]
        twth_loss = self.opt.reg_scale * twth_loss.sum() / N

        # 对存在目标的预测框计算置信度损失
        # [N, 13, 13, 5, 1] & ([N,13,13,5,1] & [N,13,13,5,1] -> [N,13,13,5,1]
        conf_loss_obj = obj_mask * bce_no_reduce(pred_confs, obj_mask)
        # 不存在目标且与任一gt_box之间的iou值小于0.6的预测框计算置信度损失
        # [N,13,13,5,1] & [N,13,13,5,1] & [N,13,13,5,1] -> [N,13,13,5,1]
        # ignore_mask: [N, 13, 13, 5, 1]
        ignore_mask = ignore_mask[..., None].float()
        conf_loss_noobj = (1. - obj_mask) * ignore_mask * bce_no_reduce(
            pred_confs, obj_mask)

        # total conf loss
        # [batch_size, 13, 13, 5, 1]
        conf_loss = self.opt.obj_scale * conf_loss_obj + self.opt.noobj_scale * conf_loss_noobj
        if self.opt.use_focal_loss:
            focal_mask = self.focal_loss(labels=obj_mask, preds=pred_confs)
            conf_loss = (focal_mask * conf_loss).sum() / N
        else:
            conf_loss = conf_loss.sum() / N

        # 对存在目标的预测框计算分类损失
        if self.opt.use_smooth_labels:
            true_classes = self.smooth_labels(ground_truth[..., 5:],
                                              self.opt.voc_class_num)
        else:
            true_classes = ground_truth[..., 5:]
        # [batch_size,13,13,5] & [batch_size,13,13,5,20] & [batch_size,13,13,5,20] -> [batch_size,13,13,5,20]
        class_loss = obj_mask * bce_no_reduce(pred_classes, true_classes)
        class_loss = self.opt.cls_scale * class_loss.sum() / N

        total_loss = dxdy_loss + twth_loss + conf_loss + class_loss
        loss_list = [dxdy_loss, twth_loss, conf_loss, class_loss, total_loss]
        self.update_meters(loss_list)
        return total_loss
示例#28
0
 def forward(self, pred, target):
     losses = -(((1 - pred)**self.gamma) * target *
                torch.clamp_min(torch.log(pred), -100) + (pred**self.zeta) *
                (1 - target) * torch.clamp_min(torch.log(1 - pred), -100))
     return torch.mean(losses)
示例#29
0
def validate_kitti(model, args, eval_loader, group, seqmap):
    """ Peform validation using the KITTI-2015 (train) split """
    """ Peform validation using the KITTI-2015 (train) split """
    model.eval()
    gpu = args.gpu

    pred_pose_recs = dict()
    for k in seqmap.keys():
        local_eval_num = int(seqmap[k]['enid']) - int(seqmap[k]['stid'])
        pred_pose_recs[k] = torch.zeros(local_eval_num, 4, 4).cuda(device=gpu)

    for val_id, data_blob in enumerate(tqdm(eval_loader)):
        image1 = data_blob['img1'].cuda(gpu) / 255.0
        image2 = data_blob['img2'].cuda(gpu) / 255.0
        intrinsic = data_blob['intrinsic'].cuda(gpu)
        insmap = data_blob['insmap'].cuda(gpu)
        posepred = data_blob['posepred'].cuda(gpu)
        mD_pred = data_blob['mdDepth_pred'].cuda(gpu)
        ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu)
        scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu)
        mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu)

        if args.banins:
            insmap = insmap * 0

        mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred)
        posepred = posepred[:, :, 0]
        ang_decps_pad = ang_decps_pad[:, :, 0]
        scl_decps_pad = scl_decps_pad[:, :, 0]
        mvd_decps_pad = mvd_decps_pad[:, :, 0]

        outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, ang_decps_pad, scl_decps_pad, mvd_decps_pad, insmap)

        for k in range(len(data_blob['tag'])):
            posepred = outputs[('afft_all', 2)][k, -1]
            tag = data_blob['tag'][k]
            seq = tag.split(' ')[0].split('/')[1][0:21]
            frmid = int(tag.split(' ')[1]) - int(seqmap[seq]['stid'])
            pred_pose_recs[seq][frmid] = posepred

    for k in seqmap.keys():
        dist.all_reduce(tensor=pred_pose_recs[k], op=dist.ReduceOp.SUM, group=group)

    if args.gpu == 0:

        tot_err = dict()
        tot_err['positions_pred'] = 0
        tot_err['positions_RANSAC'] = 0
        tot_err['positions_Deepv2d'] = 0
        tot_err['positions_RANSAC_Deepv2dscale'] = 0
        tot_err['positions_RANSAC_Odomscale'] = 0

        for s in seqmap.keys():
            posrec = dict()

            pred_poses = pred_pose_recs[s].cpu().numpy()

            RANSAC_poses = list()
            for k in range(int(seqmap[s]['stid']), int(seqmap[s]['enid'])):
                RANSAC_pose_path = os.path.join(args.RANSACPose_root, "000", s[0:10], s + "_sync", 'image_02', "{}.pickle".format(str(k).zfill(10)))
                RANSAC_pose = pickle.load(open(RANSAC_pose_path, "rb"))
                RANSAC_poses.append(RANSAC_pose[0])

            Deepv2d_poses = list()
            for k in range(int(seqmap[s]['stid']), int(seqmap[s]['enid'])):
                Deepv2d_pose_path = os.path.join(args.deepv2dPose_root, s[0:10], s + "_sync", 'posepred', "{}.txt".format(str(k).zfill(10)))
                Deepv2d_pose = read_deepv2d_pose(Deepv2d_pose_path)
                Deepv2d_poses.append(Deepv2d_pose)

            gtposes_sourse = readlines(os.path.join(project_rootdir, 'exp_poses/kittiodom_gt/poses', "{}.txt".format(str(seqmap[s]['mapid']).zfill(2))))
            gtposes = list()
            for gtpose_src in gtposes_sourse:
                gtpose = np.eye(4).flatten()
                for numstridx, numstr in enumerate(gtpose_src.split(' ')):
                    gtpose[numstridx] = float(numstr)
                gtpose = np.reshape(gtpose, [4, 4])
                gtposes.append(gtpose)

            relposes = list()
            for k in range(len(gtposes) - 1):
                relposes.append(np.linalg.inv(gtposes[k + 1]) @ gtposes[k])

            calib_dir = os.path.join(args.dataset_root, "{}".format(s[0:10]))
            cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt'))
            velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt'))
            imu2cam = read_calib_file(os.path.join(calib_dir, 'calib_imu_to_velo.txt'))
            intrinsic, extrinsic = get_intrinsic_extrinsic(cam2cam, velo2cam, imu2cam)

            positions_odom = list()
            scale_odom = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for r in relposes:
                accumP = r @ accumP
                positions_odom.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
                scale_odom.append(np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10))
            positions_odom = np.array(positions_odom)
            scale_odom = np.array(scale_odom)

            positions_pred = list()
            scale_pred = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for p in pred_poses:
                accumP = p @ accumP
                positions_pred.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
                scale_pred.append(np.sqrt(np.sum(p[0:3, 3] ** 2) + 1e-10))
            positions_pred = np.array(positions_pred)
            scale_pred = np.array(scale_pred)

            positions_RANSAC = list()
            scale_RANSAC = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for r in RANSAC_poses:
                accumP = r @ accumP
                positions_RANSAC.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
                scale_RANSAC.append(np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10))
            positions_RANSAC = np.array(positions_RANSAC)
            scale_RANSAC = np.array(scale_RANSAC)

            positions_Deepv2d = list()
            scale_Deepv2d = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for d in Deepv2d_poses:
                accumP = d @ accumP
                positions_Deepv2d.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
                scale_Deepv2d.append(np.sqrt(np.sum(d[0:3, 3] ** 2) + 1e-10))
            positions_Deepv2d = np.array(positions_Deepv2d)
            scale_Deepv2d = np.array(scale_Deepv2d)

            positions_RANSAC_Deepv2dscale = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for i, r in enumerate(RANSAC_poses):
                r[0:3, 3] = r[0:3, 3] / np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10) * np.sqrt(
                    np.sum(Deepv2d_poses[i][0:3, 3] ** 2) + 1e-10)
                accumP = r @ accumP
                positions_RANSAC_Deepv2dscale.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
            positions_RANSAC_Deepv2dscale = np.array(positions_RANSAC_Deepv2dscale)

            positions_RANSAC_Odomscale = list()
            stpos = np.array([[0, 0, 0, 1]]).T
            accumP = np.eye(4)
            for i, r in enumerate(RANSAC_poses):
                r[0:3, 3] = r[0:3, 3] / np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10) * np.sqrt(
                    np.sum(relposes[i][0:3, 3] ** 2) + 1e-10)
                accumP = r @ accumP
                positions_RANSAC_Odomscale.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0])
            positions_RANSAC_Odomscale = np.array(positions_RANSAC_Odomscale)

            posrec['positions_pred'] = positions_pred
            posrec['positions_RANSAC'] = positions_RANSAC
            posrec['positions_Deepv2d'] = positions_Deepv2d
            posrec['positions_RANSAC_Deepv2dscale'] = positions_RANSAC_Deepv2dscale
            posrec['positions_RANSAC_Odomscale'] = positions_RANSAC_Odomscale

            scalerec = dict()
            scalerec['scale_pred'] = scale_pred
            scalerec['scale_RANSAC'] = scale_RANSAC
            scalerec['scale_Deepv2d'] = scale_Deepv2d

            print("============= %s ============" % (s))
            print("In total %d images," % positions_odom.shape[0])
            for k in posrec.keys():
                err_odom = np.mean(np.sqrt(np.sum((posrec[k] - positions_odom) ** 2, axis=1)))

                if 'scale_{}'.format(k.split('_')[1]) in scalerec.keys():
                    err_scale = np.mean(np.abs(scalerec['scale_{}'.format(k.split('_')[1])] - scale_odom))
                else:
                    err_scale = np.nan

                tot_err[k] += err_odom * len(pred_poses)
                print("%s, err_odom: %f, err_scale: %f" % (k, err_odom.item(), err_scale.item()))
        return {'absl': float(tot_err['positions_pred'].item()),}
    else:
        return None
示例#30
0
    def backprop(self, hps, obs, actions, old_logprobs, returns,
                 value_loss_scale, advantages, old_values, action_masks,
                 old_probs, privileged_obs, split_reward):
        if self.fp16:
            advantages = advantages.half()
            returns = returns.half()
            action_masks = action_masks.half()
            old_logprobs = old_logprobs.half()

        action_masks = action_masks[:, :self.agents, :]
        x, (pitems, pmask) = self.latents(obs, privileged_obs)
        batch_size = x.size()[0]

        vin = x.max(dim=1).values.view(batch_size,
                                       self.d_agent * self.hps.dff_ratio)
        if self.hps.use_privileged:
            pitems_max = pitems.max(dim=1).values
            pitems_avg = pitems.sum(dim=1) / torch.clamp_min(
                (~pmask).float().sum(dim=1), min=1).unsqueeze(-1)
            vin = torch.cat([vin, pitems_max, pitems_avg], dim=1)
        values = self.value_head(vin).view(-1)

        logits = self.policy_head(x)
        probs = F.softmax(logits, dim=2)
        probs = probs.view(-1, self.agents, self.naction)

        # add small value to prevent degenerate probability distribution when no action is possible
        # gradients still get blocked by the action mask
        # TODO: mask actions by setting logits to -inf?
        probs = probs * action_masks + self.epsilon

        active_agents = torch.clamp_min(
            (action_masks.sum(dim=2) > 0).float().sum(dim=1), min=1)

        dist = distributions.Categorical(probs)
        entropy = dist.entropy()
        logprobs = dist.log_prob(actions)
        ratios = torch.exp(logprobs - old_logprobs)
        advantages = advantages.view(-1, 1)
        if split_reward:
            advantages = advantages / active_agents.view(-1, 1)
        vanilla_policy_loss = advantages * ratios
        clipped_policy_loss = advantages * torch.clamp(
            ratios, 1 - hps.cliprange, 1 + hps.cliprange)
        if hps.ppo:
            policy_loss = -torch.min(vanilla_policy_loss,
                                     clipped_policy_loss).mean()
        else:
            policy_loss = -vanilla_policy_loss.mean()

        # TODO: do over full distribution, not just selected actions?
        approxkl = 0.5 * (old_logprobs - logprobs).pow(2).mean()
        clipfrac = ((ratios - 1.0).abs() > hps.cliprange).sum().type(
            torch.float32) / ratios.numel()

        clipped_values = old_values + torch.clamp(
            values - old_values, -hps.cliprange, hps.cliprange)
        vanilla_value_loss = (values - returns)**2
        clipped_value_loss = (clipped_values - returns)**2
        if hps.clip_vf:
            value_loss = torch.max(vanilla_value_loss,
                                   clipped_value_loss).mean()
        else:
            value_loss = vanilla_value_loss.mean()

        entropy_loss = -hps.entropy_bonus * entropy.mean()

        loss = policy_loss + value_loss_scale * value_loss + entropy_loss
        loss /= hps.batches_per_update
        loss.backward()
        return policy_loss.data.tolist(), value_loss.data.tolist(
        ), approxkl.data.tolist(), clipfrac.data.tolist()