示例#1
0
def train(args):
    torch.backends.cudnn.benchmark=True
    # Setup Augmentations

    loss_rec=[0]
    best_error=2
    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path, is_transform=True,
                           split='train', img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path, is_transform=True,
                           split='eval', img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(
        v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False)
    train_length=t_loader.length//2
    test_length=v_loader.length//2
    trainloader = data.DataLoader(
        t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True)
    valloader = data.DataLoader(
        v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False)
    model = get_model(args.arch)
    # parameters=model.named_parameters()
    # for name,param in parameters:
    #     print(name)
    #     print(param.grad)
    # exit()

    model = torch.nn.DataParallel(
        model, device_ids=[2,3])
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda(2)

    saved_model_path=r'/home/lidong/Documents/CMF/trained/test/'
    saved_model_dir=os.listdir(saved_model_path)
    saved_model_dir.sort()
    for s in range(len(saved_model_dir)):
        print("Loading model and optimizer from checkpoint '{}'".format(os.path.join(saved_model_path,saved_model_dir[s])))
        checkpoint = torch.load(os.path.join(saved_model_path,saved_model_dir[s]))
        model.load_state_dict(checkpoint['model_state'])
        print("Loaded checkpoint '{}' (epoch {})"
              .format(os.path.join(saved_model_path,saved_model_dir[s]), checkpoint['epoch']))
        epoch=checkpoint['epoch']
        error=10
        error_rec=[]
        error_rec_non=[]
        error_rec_true=[]
        error_rec_3=[]
        #trained
        print('testing!')
        model.eval()
        ones=torch.ones(1).cuda(2)
        zeros=torch.zeros(1).cuda(2)
        for i, (left, right,disparity,image) in enumerate(valloader):
            with torch.no_grad():
                start_time=time.time()
                left = left.cuda(2)
                right = right.cuda(2)
                disparity = disparity.cuda(2)
                local=torch.arange(disparity.shape[-1]).repeat(disparity.shape[0],disparity.shape[1],1).view_as(disparity).float().cuda(2)
                mask_non = (disparity < 192) & (disparity > 0) &((local-disparity)>=0)
                mask_true = (disparity < 192) & (disparity > 0)&((local-disparity)>=0)
                mask = (disparity < 192) & (disparity > 0)
                mask.detach_()
                mask_non.detach_()
                mask_true.detach_()
                #print(P.shape)
                #print(left.shape)
                output1, output2, output3 = model(left,right)
                #output3 = model(left,right)
                #print(output3.shape)
                output1=output3
                output1 = torch.squeeze(output1, 1)
                loss=torch.mean(torch.abs(output1[mask] - disparity[mask]))
                loss_non=torch.mean(torch.abs(output1[mask_non] - disparity[mask_non]))
                loss_true=torch.mean(torch.abs(output1[mask_true] - disparity[mask_true]))
                error_map=torch.where((torch.abs(output1[mask] - disparity[mask])<3) | (torch.abs(output1[mask] - disparity[mask])<0.05*disparity[mask]),ones,zeros)
                total=torch.where(disparity[mask]>0,ones,zeros)
                loss_3=100-torch.sum(error_map)/torch.sum(total)*100
                #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
                error_rec.append(loss.item())
                error_rec_non.append(loss_non.item())
                error_rec_3.append(loss_3.item())
                print(time.time()-start_time)
            print(np.mean(error_rec_3))
            print("data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, loss_3: %.4f" % (i, test_length,epoch, args.n_epoch,loss.item(),loss_non.item(),loss_3.item()))
            #break
        error=np.mean(error_rec)
        error_non=np.mean(error_rec_non)
        error_3=np.mean(error_rec_3)
        np.save('/home/lidong/Documents/CMF/test/kitti_sub4/epoch:%d_error%.4f_non%.4f_error_3_%.4f.npy'%(epoch-1,error,error_non,error_3),[error_rec,error_rec_non,error_rec_3])
def train(args):
    torch.backends.cudnn.benchmark = True
    # Setup Augmentations
    loss_rec = [0]
    best_error = 2
    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path,
                           is_transform=True,
                           split='train',
                           img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='test',
                           img_size=(args.img_rows, args.img_cols))

    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  shuffle=True)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=4)

    # Setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom()
        # old_window = vis.line(X=torch.zeros((1,)).cpu(),
        #                        Y=torch.zeros((1)).cpu(),
        #                        opts=dict(xlabel='minibatches',
        #                                  ylabel='Loss',
        #                                  title='Trained Loss',
        #                                  legend=['Loss']))
        loss_window = vis.line(X=torch.zeros((1, )).cpu(),
                               Y=torch.zeros((1)).cpu(),
                               opts=dict(xlabel='minibatches',
                                         ylabel='Loss',
                                         title='Training Loss',
                                         legend=['Loss']))
        pre_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='predict!', caption='predict.'),
        )
        ground_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='ground!', caption='ground.'),
        )
        image_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='image!', caption='image.'),
        )
    # Setup Model
    model = get_model(args.arch)
    # parameters=model.named_parameters()
    # for name,param in parameters:
    #     print(name)
    #     print(param.grad)
    # exit()

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda()

    # Check if model has custom optimizer / loss
    # modify to adam, modify the learning rate
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.l_rate,
                                 betas=(0.9, 0.999))
    # optimizer = torch.optim.SGD(
    #     model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5)
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True)
    loss_fn = l1
    trained = 0
    scale = 100

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            #model_dict=model.state_dict()
            #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl')
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            #opt=None
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            trained = checkpoint['epoch']
            #trained=0

    else:
        print("No checkpoint found at '{}'".format(args.resume))
        print('Initialize from resnet34!')
        resnet34 = torch.load(
            '/home/lidong/Documents/CMF/20_bilinear_cmf_flying3d_best_model.pkl'
        )
        #optimizer.load_state_dict(resnet34['optimizer_state'])
        #model
        #model.load_state_dict(resnet34['state_dict'])
        model_dict = model.state_dict()
        pre_dict = {
            k: v
            for k, v in resnet34['model_state'].items() if k in model_dict
        }
        # print(pre_dict)
        # exit()
        # for k,v in resnet34['state_dict'].items():
        #     #print('.'.join(k.split('.')[1:]))
        #     print(k)
        # for k,v in model_dict.items():
        #     print(k)
        model_dict.update(pre_dict)
        model.load_state_dict(model_dict)
        #optimizer
        # opti_dict=optimizer.state_dict()
        # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict}
        # # for k,v in pre_dict.items():
        # #     print(k)
        # #     if k=='state':
        # #         for a,b in v.items():
        # #             print(a)
        # #             for c,d in b.items():
        # #                 print(c,d)
        # exit()
        # #pre_dict=resnet34['optimizer_state']
        # opti_dict.update(pre_dict)
        # optimizer.load_state_dict(opti_dict)
        print('load success!')
        trained = 0

    #best_error=5
    # it should be range(checkpoint[''epoch],args.n_epoch)
    for epoch in range(trained, args.n_epoch):
        #for epoch in range(0, args.n_epoch):

        #trained
        print('training!')
        model.train()
        for i, (left, right, disparity, image) in enumerate(trainloader):
            #with torch.no_grad():
            #print(left.shape)
            #print(torch.max(image),torch.min(image))
            start_time = time.time()
            left = left.cuda()
            right = right.cuda()
            disparity = disparity.cuda()
            mask = (disparity < 192) & (disparity >= 0)
            mask.detach_()
            optimizer.zero_grad()
            #print(P.shape)
            output1, output2, output3 = model(left, right)
            #print(output3.shape)
            output1 = torch.squeeze(output1, 1)
            output2 = torch.squeeze(output2, 1)
            output3 = torch.squeeze(output3, 1)
            # #outputs=outputs
            loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') \
                 + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='elementwise_mean') \
                 + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
            #loss=loss/2.2
            #output3 = model(left,right)

            #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
            loss.backward()
            #parameters=model.named_parameters()
            optimizer.step()

            #torch.cuda.empty_cache()
            #print(loss.item)
            if args.visdom == True:
                vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() *
                         (epoch - trained) * 5457,
                         Y=loss.item() * torch.ones(1).cpu() / 2.2,
                         win=loss_window,
                         update='append')
                #print(torch.max(output3).item(),torch.min(output3).item())
                if i % 15 == 0:
                    #print(output3.shape)
                    pre = output3.data.cpu().numpy().astype('float32')
                    pre = pre[0, :, :]
                    #print(np.max(pre))
                    #print(pre.shape)
                    pre = np.reshape(pre, [256, 512]).astype('float32')
                    vis.image(
                        pre,
                        opts=dict(title='predict!', caption='predict.'),
                        win=pre_window,
                    )

                    ground = disparity.data.cpu().numpy().astype('float32')
                    ground = ground[0, :, :]
                    ground = np.reshape(ground, [256, 512]).astype('float32')
                    vis.image(
                        ground,
                        opts=dict(title='ground!', caption='ground.'),
                        win=ground_window,
                    )
                    image = image.data.cpu().numpy().astype('float32')
                    image = image[0, ...]
                    #image=image[0,...]
                    #print(image.shape,np.min(image))
                    image = np.reshape(image, [3, 256, 512]).astype('float32')
                    vis.image(
                        image,
                        opts=dict(title='image!', caption='image.'),
                        win=image_window,
                    )
            loss_rec.append(loss.item())
            print(time.time() - start_time)
            print("data [%d/5457/%d/%d] Loss: %.4f" %
                  (i, epoch, args.n_epoch, loss.item() / 2.2))

        state = {
            'epoch': epoch + 1,
            'model_state': model.state_dict(),
            'optimizer_state': optimizer.state_dict(),
        }
        np.save('loss.npy', loss_rec)
        torch.save(
            state, "{}_{}_{}_best_model.pkl".format(epoch, args.arch,
                                                    args.dataset))
def train(args):
    torch.backends.cudnn.benchmark=True
    # Setup Augmentations

    loss_rec=[0]
    best_error=2
    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path, is_transform=True,
                           split='train', img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path, is_transform=True,
                           split='eval', img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(
        v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False)
    train_length=t_loader.length//2
    test_length=v_loader.length//2
    trainloader = data.DataLoader(
        t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True)
    valloader = data.DataLoader(
        v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False)


    # Setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env='kitti_sub_4')
        error_window = vis.line(X=torch.zeros((1,)).cpu(),
                               Y=torch.zeros((1)).cpu(),
                               opts=dict(xlabel='minibatches',
                                         ylabel='error',
                                         title='test error',
                                         legend=['Error']))
        loss_window = vis.line(X=torch.zeros((1,)).cpu(),
                               Y=torch.zeros((1)).cpu(),
                               opts=dict(xlabel='minibatches',
                                         ylabel='Loss',
                                         title='Training Loss',
                                         legend=['Loss']))
        pre_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='predict!', caption='predict.'),
        )
        ground_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='ground!', caption='ground.'),
        )
        image_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='image!', caption='image.'),
        )
    # Setup Model
    model = get_model(args.arch)
    # parameters=model.named_parameters()
    # for name,param in parameters:
    #     print(name)
    #     print(param.grad)
    # exit()

    model = torch.nn.DataParallel(
        model, device_ids=[2,3])
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda(2)

    # Check if model has custom optimizer / loss
    # modify to adam, modify the learning rate
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,betas=(0.9,0.999))
    optimizer = torch.optim.SGD(
        model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5)
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True)
    loss_fn = l1
    trained=0
    scale=100

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            #model_dict=model.state_dict()  
            #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl')
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            #opt=None
            print("Loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            trained=checkpoint['epoch']
            best_error=10
            # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy')
            # loss_rec=list(loss_rec)
            # print(train_length)
            # loss_rec=loss_rec[:train_length*trained]
            
    else:
        print("No checkpoint found at '{}'".format(args.resume))
        print('Initialize from resnet34!')
        resnet34=torch.load('/home/lidong/Documents/CMF/9_cm_sub_4_flying3d_best_model.pkl')
        #optimizer.load_state_dict(resnet34['optimizer_state'])
        #model
        #model.load_state_dict(resnet34['state_dict'])
        model_dict=model.state_dict()            
        pre_dict={k: v for k, v in resnet34['model_state'].items() if k in model_dict}
        key=[]
        for k,v in pre_dict.items():
            if v.shape!=model_dict[k].shape:
                key.append(k)
        for k in key:
            pre_dict.pop(k)
        model_dict.update(pre_dict)
        model.load_state_dict(model_dict)
        #optimizer
        # opti_dict=optimizer.state_dict()
        # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict}
        # # for k,v in pre_dict.items():
        # #     print(k)
        # #     if k=='state':
        # #         for a,b in v.items():
        # #             print(a)
        # #             for c,d in b.items():
        # #                 print(c,d)            
        # exit()
        # #pre_dict=resnet34['optimizer_state']
        # opti_dict.update(pre_dict)
        # optimizer.load_state_dict(opti_dict)
        print('load success!')
        trained=0



    #best_error=5
    # it should be range(checkpoint[''epoch],args.n_epoch)
    for epoch in range(trained, args.n_epoch):

        # print('training!')
        # model.train()
        # for i, (left, right,disparity,image) in enumerate(trainloader):
        #     #break
        #     #with torch.no_grad():
        #     #print(left.shape)
        #     #print(torch.max(image),torch.min(image))
        #     start_time=time.time()
        #     left = left.cuda(2)
        #     right = right.cuda(2)
        #     disparity = disparity.cuda(2)
        #     mask = (disparity < 192) & (disparity >0)
        #     mask.detach_()
        #     optimizer.zero_grad()
        #     #print(P.shape)
        #     output1, output2, output3 = model(left,right)
        #     #print(output3.shape)
        #     # output1 = torch.squeeze(output1, 1)
        #     # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean')
        #     output1 = torch.squeeze(output1, 1)
        #     output2 = torch.squeeze(output2, 1)
        #     output3 = torch.squeeze(output3, 1)
        #     # #outputs=outputs
        #     loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') \
        #          + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='elementwise_mean') \
        #          + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
        #     #loss=loss/2.2
        #     #output3 = model(left,right)

        #     #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
        #     loss.backward()
        #     #parameters=model.named_parameters()
        #     optimizer.step()
            
            
        #     #torch.cuda.empty_cache()
        #     #print(loss.item)
        #     if args.visdom ==True:
        #         vis.line(
        #             X=torch.ones(1).cpu() * i+torch.ones(1).cpu() *(epoch-trained)*train_length,
        #             Y=loss.item()*torch.ones(1).cpu(),
        #             win=loss_window,
        #             update='append')
        #         #print(torch.max(output3).item(),torch.min(output3).item())
        #         if i%1==0:
        #             #print(output3.shape)
        #             pre = output3.data.cpu().numpy().astype('float32')
        #             pre = pre[0,:,:]
        #             #print(np.max(pre))
        #             #print(pre.shape)
        #             pre = np.reshape(pre, [256,512]).astype('float32')
        #             vis.image(
        #                 pre,
        #                 opts=dict(title='predict!', caption='predict.'),
        #                 win=pre_window,
        #             )

        #             ground=disparity.data.cpu().numpy().astype('float32')
        #             ground = ground[0, :, :]
        #             ground = np.reshape(ground, [256,512]).astype('float32')
        #             vis.image(
        #                 ground,
        #                 opts=dict(title='ground!', caption='ground.'),
        #                 win=ground_window,
        #             )
        #             image=image.data.cpu().numpy().astype('float32')
        #             image = image[0,...]
        #             #image=image[0,...]
        #             #print(image.shape,np.min(image))
        #             image = np.reshape(image, [3,256,512]).astype('float32')
        #             vis.image(
        #                 image,
        #                 opts=dict(title='image!', caption='image.'),
        #                 win=image_window,
        #             )            
        #     loss_rec.append(loss.item())
        #     print(time.time()-start_time)
        #     print("data [%d/%d/%d/%d] Loss: %.4f" % (i,train_length, epoch, args.n_epoch,loss.item()/2.2))
        error=10
        error_rec=[]
        error_rec_non=[]
        error_rec_true=[]
        #trained
        print('testing!')
        model.eval()
        for i, (left, right,disparity,image) in enumerate(valloader):
            with torch.no_grad():
                start_time=time.time()
                left = left.cuda(2)
                right = right.cuda(2)
                disparity = disparity.cuda(2)
                local=torch.arange(disparity.shape[-1]).repeat(disparity.shape[0],disparity.shape[1],1).view_as(disparity).float().cuda(2)
                mask_non = (disparity < 192) & (disparity > 0) &((local-disparity)>=0)
                mask_true = (disparity < 192) & (disparity > 0)&((local-disparity)>=0)
                mask = (disparity < 192) & (disparity > 0)
                mask.detach_()
                mask_non.detach_()
                mask_true.detach_()
                #print(P.shape)
                #print(left.shape)
                output1, output2, output3 = model(left,right)
                #output3 = model(left,right)
                #print(output3.shape)
                output1=output3
                output1 = torch.squeeze(output1, 1)
                loss=torch.mean(torch.abs(output1[mask] - disparity[mask]))
                loss_non=torch.mean(torch.abs(output1[mask_non] - disparity[mask_non]))
                loss_true=torch.mean(torch.abs(output1[mask_true] - disparity[mask_true]))
                #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
                error_rec.append(loss.item())
                error_rec_non.append(loss_non.item())
                error_rec_true.append(loss_true.item())
                if args.visdom ==True:
                    vis.line(
                        X=torch.ones(1).cpu() * i+torch.ones(1).cpu() *(epoch-trained)*train_length,
                        Y=loss.item()*torch.ones(1).cpu(),
                        win=loss_window,
                        update='append')
            print(time.time()-start_time)
            print("data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, Loss_true: %.4f" % (i, test_length,epoch, args.n_epoch,loss.item(),loss_non.item(),loss_true.item()))

        error=np.mean(error_rec)
        error_non=np.mean(error_rec_non)
        error_true=np.mean(error_rec_true)
        if error<best_error:
            best_error=error
            state = {'epoch': epoch+1,
             'model_state': model.state_dict(),
             'optimizer_state': optimizer.state_dict(),
             'error':best_error}
            np.save('loss_4.npy',loss_rec)
            torch.save(state, "{}_{}_{}_{}_best_model.pkl".format(epoch,args.arch,args.dataset,best_error))
        if epoch%15==0:
            state = {'epoch': epoch+1,
             'model_state': model.state_dict(),
             'optimizer_state': optimizer.state_dict(),
             'error':best_error}
            np.save('loss_4.npy',loss_rec)
            torch.save(state, "{}_{}_{}_{}_best_model.pkl".format(epoch,args.arch,args.dataset,best_error))
示例#4
0
def train(args):
    torch.backends.cudnn.benchmark = True
    # Setup Augmentations
    loss_rec = [0]
    best_error = 2
    # Setup Dataloader
    data_path = get_data_path(args.dataset)
    data_loader = get_loader(args.dataset)
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='test',
                           img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=2,
                                shuffle=False)
    # Setup Model
    model = get_model(args.arch)

    model = torch.nn.DataParallel(model, device_ids=range(2))
    test_length = v_loader.__len__() / 2
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda()

    saved_model_path = r'/home/lidong/Documents/CMF/all_data/test/'
    saved_model_dir = os.listdir(saved_model_path)
    saved_model_dir.sort()
    for s in range(len(saved_model_dir)):
        print("Loading model and optimizer from checkpoint '{}'".format(
            os.path.join(saved_model_path, saved_model_dir[s])))
        checkpoint = torch.load(
            os.path.join(saved_model_path, saved_model_dir[s]))
        model.load_state_dict(checkpoint['model_state'])
        print("Loaded checkpoint '{}' (epoch {})".format(
            os.path.join(saved_model_path, saved_model_dir[s]),
            checkpoint['epoch']))
        epoch = checkpoint['epoch']
        error = 0
        error_rec = []
        error_rec_non = []
        error_rec_true = []
        #trained
        print('training!')
        model.eval()
        for i, (left, right, disparity, image) in enumerate(valloader):
            with torch.no_grad():
                start_time = time.time()
                left = left.cuda()
                right = right.cuda()
                disparity = disparity.cuda()[:, :540, :960]
                local = torch.arange(disparity.shape[-1]).repeat(
                    disparity.shape[0], disparity.shape[1],
                    1).view_as(disparity).float().cuda()
                mask_non = (disparity < 192) & (disparity >= 0) & (
                    (local - disparity) >= 0)
                mask_true = (disparity < 192) & (disparity > 0) & (
                    (local - disparity) >= 0)
                mask = (disparity < 192) & (disparity >= 0)
                mask.detach_()
                mask_non.detach_()
                mask_true.detach_()
                #print(P.shape)
                #print(left.shape)
                output1, output2, output3 = model(left, right)
                #output3 = model(left,right)
                #print(output3.shape)
                output1 = output3
                output1 = torch.squeeze(output1, 1)[:, :540, :960]
                # print(output3.shape,disparity.shape)
                # exit()
                # print(torch.sum(torch.where(disparity==0,torch.ones(1).cuda(),torch.zeros(1).cuda())))
                # print(torch.sum(torch.where(disparity<=1,torch.ones(1).cuda(),torch.zeros(1).cuda())))
                # print(torch.sum(torch.where(disparity<=2,torch.ones(1).cuda(),torch.zeros(1).cuda())))
                # print(torch.sum(torch.where(disparity<=3,torch.ones(1).cuda(),torch.zeros(1).cuda())))
                # print(disparity.shape)
                #output3=torch.where(output3<1,torch.zeros(1).cuda(),output3)
                loss = torch.mean(torch.abs(output1[mask] - disparity[mask]))
                loss_non = torch.mean(
                    torch.abs(output1[mask_non] - disparity[mask_non]))
                loss_true = torch.mean(
                    torch.abs(output1[mask_true] - disparity[mask_true]))
                #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean')
                error_rec.append(loss.item())
                error_rec_non.append(loss_non.item())
                error_rec_true.append(loss_true.item())
            print(time.time() - start_time)
            print(
                "data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, Loss_true: %.4f"
                % (i, test_length, epoch, args.n_epoch, loss.item(),
                   loss_non.item(), loss_true.item()))
            #break
        error = np.mean(error_rec)
        error_non = np.mean(error_rec_non)
        error_true = np.mean(error_rec_true)
        np.save(
            '/home/lidong/Documents/CMF/all_data_test/4_sub/epoch:%d_error%.4f_non%.4f_true%.4f.npy'
            % (epoch - 1, error, error_non, error_true),
            [error_rec, error_rec_non, error_rec_true])
示例#5
0
def train(args):
    torch.backends.cudnn.benchmark = True
    # Setup Augmentations
    loss_rec = [0]
    best_error = 2
    # Setup Dataloader
    data_path = get_data_path(args.dataset)
    data_loader = get_loader(args.dataset)
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='test',
                           img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=4,
                                shuffle=False)
    # Setup Model
    model = get_model(args.arch)

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            #model_dict=model.state_dict()
            #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl')
            model.load_state_dict(checkpoint['model_state'])
            #optimizer.load_state_dict(checkpoint['optimizer_state'])
            #opt=None
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            epoch = checkpoint['epoch']
            #trained=0

    error = 0
    error_rec = []

    #trained
    print('training!')
    model.eval()
    for i, (left, right, disparity, image) in enumerate(valloader):
        with torch.no_grad():
            start_time = time.time()
            left = left.cuda()
            right = right.cuda()
            disparity = disparity.cuda()[:540, ...]
            mask = (disparity < 192) & (disparity >= 0)
            mask.detach_()
            #print(P.shape)
            output1, output2, output3 = model(left, right)
            output3 = torch.squeeze(output3, 1)[:540, ...]
            loss = F.l1_loss(output3[mask],
                             disparity[mask],
                             reduction='elementwise_mean')
            error_rec.append(loss)
        print(time.time() - start_time)
        print("data [%d/1062/%d/%d] Loss: %.4f" %
              (i, epoch, args.n_epoch, loss.item()))
        break
    error = np.mean(error_rec)
    np.save(
        '/home/lidong/Documents/CMF/test/cmf/error:%.4f,epoch:%d.npy' %
        (error, epoch), error_rec)
示例#6
0
def train(args):
    torch.backends.cudnn.benchmark = True
    # Setup Augmentations

    loss_rec = [0]
    best_error = 2
    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path,
                           is_transform=True,
                           split='test',
                           img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='eval',
                           img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=1,
                                shuffle=False)
    train_length = t_loader.length
    test_length = v_loader.length
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=1,
                                  shuffle=False)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=1,
                                shuffle=False)

    with torch.no_grad():
        # Setup Model
        model = get_model(args.arch)
        # parameters=model.named_parameters()
        # for name,param in parameters:
        #     print(name)
        #     print(param.grad)
        # exit()

        model = torch.nn.DataParallel(model, device_ids=[0])
        #model = torch.nn.DataParallel(model, device_ids=[0])
        model.cuda()

    # Check if model has custom optimizer / loss
    # modify to adam, modify the learning rate
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,betas=(0.9,0.999))
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.l_rate,
                                momentum=0.90,
                                weight_decay=5e-5)
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True)
    loss_fn = l1
    trained = 0
    scale = 100

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            #model_dict=model.state_dict()
            #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl')
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            #opt=None
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            trained = checkpoint['epoch']
            trained = 0
            best_error = 5
            # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy')
            # loss_rec=list(loss_rec)
            # print(train_length)
            # loss_rec=loss_rec[:train_length*trained]

    else:
        print("No checkpoint found at '{}'".format(args.resume))
        print('Initialize from resnet34!')
        resnet34 = torch.load(
            '/home/lidong/Documents/CMF/9_cm_sub_4_flying3d_best_model.pkl')
        #optimizer.load_state_dict(resnet34['optimizer_state'])
        #model
        #model.load_state_dict(resnet34['state_dict'])
        model_dict = model.state_dict()
        pre_dict = {
            k: v
            for k, v in resnet34['model_state'].items() if k in model_dict
        }
        key = []
        for k, v in pre_dict.items():
            if v.shape != model_dict[k].shape:
                key.append(k)
        for k in key:
            pre_dict.pop(k)
        model_dict.update(pre_dict)
        model.load_state_dict(model_dict)
        #optimizer
        # opti_dict=optimizer.state_dict()
        # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict}
        # # for k,v in pre_dict.items():
        # #     print(k)
        # #     if k=='state':
        # #         for a,b in v.items():
        # #             print(a)
        # #             for c,d in b.items():
        # #                 print(c,d)
        # exit()
        # #pre_dict=resnet34['optimizer_state']
        # opti_dict.update(pre_dict)
        # optimizer.load_state_dict(opti_dict)
        print('load success!')
        trained = 0

    #best_error=5
    # it should be range(checkpoint[''epoch],args.n_epoch)

    print('training!')
    model.eval()
    loss_3_rec = []
    ones = torch.ones(1).cuda()
    zeros = torch.zeros(1).cuda()
    for i, (left, right, disparity, image, name, h,
            w) in enumerate(trainloader):
        #break
        with torch.no_grad():
            #print(left.shape)
            print(name[0])
            #print(torch.max(image),torch.min(image))
            h = h.data.cpu().numpy().astype('int32')
            #h=h.astype('int')
            w = w.data.cpu().numpy().astype('int32')
            start_time = time.time()
            left = left.cuda()
            right = right.cuda()
            disparity = disparity.cuda()
            mask = (disparity < 192) & (disparity > 0)
            mask.detach_()
            optimizer.zero_grad()
            #print(P.shape)
            output1, output2, output3 = model(left, right)
            #print(output3.shape)
            # output1 = torch.squeeze(output1, 1)
            # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean')
            output1 = torch.squeeze(output1, 1)
            output2 = torch.squeeze(output2, 1)
            output3 = torch.squeeze(output3, 1)
            #output3=torch.where(output3>ones*128,torch.mean(output3),output3)
            print(torch.max(output3), torch.min(output3))
            output3 = output3 * 256
            pre = output3.data.cpu().numpy().astype('uint16')
            pre = pre[0, -h[0]:, -w[0]:]
            #print(np.max(pre))
            #print(pre.shape)
            pre = np.reshape(pre, [h[0], w[0]])
            cv2.imwrite(
                os.path.join(
                    '/home/lidong/Documents/datasets/kitti12/disp_occ',
                    name[0] + '.png'), pre)
def train(args):
    torch.backends.cudnn.benchmark = True
    # Setup Augmentations

    loss_rec = [0]
    best_error = 5
    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path,
                           is_transform=True,
                           split='train',
                           img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='eval',
                           img_size=(args.img_rows, args.img_cols))

    train_length = t_loader.length // args.batch_size
    test_length = v_loader.length // args.batch_size
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=args.batch_size,
                                  shuffle=True)
    evalloader = data.DataLoader(v_loader,
                                 batch_size=args.batch_size,
                                 num_workers=args.batch_size,
                                 shuffle=False)

    train_length = len(trainloader)
    test_length = len(evalloader)
    # Setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env='kitti_sub_4')
        error_window = vis.line(X=torch.zeros((1, )).cpu(),
                                Y=torch.zeros((1)).cpu(),
                                opts=dict(xlabel='minibatches',
                                          ylabel='error',
                                          title='test error',
                                          legend=['Error']))
        loss_window = vis.line(X=torch.zeros((1, )).cpu(),
                               Y=torch.zeros((1)).cpu(),
                               opts=dict(xlabel='minibatches',
                                         ylabel='Loss',
                                         title='Training Loss',
                                         legend=['Loss']))
        pre_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='predict!', caption='predict.'),
        )
        ground_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='ground!', caption='ground.'),
        )
        image_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='image!', caption='image.'),
        )
        error3_window = vis.image(
            np.random.rand(256, 512),
            opts=dict(title='error!', caption='error.'),
        )
    # Setup Model
    model = get_model(args.arch)
    # parameters=model.named_parameters()
    # for name,param in parameters:
    #     print(name)
    #     print(param.grad)
    # exit()

    model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])
    #model = torch.nn.DataParallel(model, device_ids=[0])
    model.cuda(0)

    # Check if model has custom optimizer / loss
    # modify to adam, modify the learning rate
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.l_rate,
                                 betas=(0.9, 0.999))
    # optimizer = torch.optim.SGD(
    #     model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5)
    # optimizer = torch.optim.Adam(
    #     model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True)
    loss_fn = l1
    trained = 0
    scale = 100

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            #model_dict=model.state_dict()
            #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl')
            model.load_state_dict(checkpoint['model_state'])
            #optimizer.load_state_dict(checkpoint['optimizer_state'])
            #opt=None
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            trained = checkpoint['epoch']
            #best_error=checkpoint['error']+1
            #mean_loss=checkpoint['error']
            best_error = 100
            mean_loss = 100
            print(mean_loss)
            #trained=0
            # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy')
            # loss_rec=list(loss_rec)
            # print(train_length)
            # loss_rec=loss_rec[:train_length*trained]

    else:
        print("No checkpoint found at '{}'".format(args.resume))
        print('Initialize from resnet34!')
        resnet34 = torch.load(
            '/home/lidong/Documents/CMF/466_cmfsm_kitti_0.591322544373964_error3_1.8297886095548932_six_best_model.pkl'
        )
        #optimizer.load_state_dict(resnet34['optimizer_state'])
        #model
        #model.load_state_dict(resnet34['state_dict'])
        model_dict = model.state_dict()
        pre_dict = {
            k: v
            for k, v in resnet34['model_state'].items() if k in model_dict
        }
        key = []
        for k, v in pre_dict.items():
            if v.shape != model_dict[k].shape:
                key.append(k)
        for k in key:
            pre_dict.pop(k)

        model_dict.update(pre_dict)
        model.load_state_dict(model_dict)
        #optimizer
        # opti_dict=optimizer.state_dict()
        # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict}
        # # for k,v in pre_dict.items():
        # #     print(k)
        # #     if k=='state':
        # #         for a,b in v.items():
        # #             print(a)
        # #             for c,d in b.items():
        # #                 print(c,d)
        # exit()
        # #pre_dict=resnet34['optimizer_state']
        # opti_dict.update(pre_dict)
        # optimizer.load_state_dict(opti_dict)
        print('load success!')
        trained = 0

    #best_error=5
    # it should be range(checkpoint[''epoch],args.n_epoch)
    for epoch in range(trained, args.n_epoch):
        ones = torch.ones(1).cuda(0)
        zeros = torch.zeros(1).cuda(0)
        print('training!')
        model.train()
        epe_rec = []
        loss_3_re = []
        for i, (left, right, disparity, image) in enumerate(trainloader):
            # if epoch==trained:
            #     break
            #break
            #with torch.no_grad():
            #print(left.shape)
            #print(torch.max(image),torch.min(image))
            flag = 1
            count = 0
            start_time = time.time()
            left = left.cuda(0)
            right = right.cuda(0)
            disparity = disparity.cuda(0)
            mask = (disparity < 192) & (disparity > 0)
            mask.detach_()
            iterative_count = 0
            while (flag):
                optimizer.zero_grad()
                #print(P.shape)
                output1, output2, output3 = model(left, right)
                #print(output3.shape)
                # output1 = torch.squeeze(output1, 1)
                # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean')
                output1 = torch.squeeze(output1, 1)
                output2 = torch.squeeze(output2, 1)
                output3 = torch.squeeze(output3, 1)
                # #outputs=outputs
                #test the l2 loss to reduce the error3
                #increase the weight for the error more than 3.
                # loss = 0.5 * softl1loss(output1[mask], disparity[mask]) \
                #      + 0.7 * softl1loss(output2[mask], disparity[mask]) \
                #      + softl1loss(output3[mask], disparity[mask])
                loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean') \
                     + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='mean') \
                     + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='mean')
                #loss=loss/2.2

                #output3 = model(left,right)
                #output1=output3+0
                output3 = torch.squeeze(output3, 1)
                epe = torch.mean(torch.abs(output3[mask] - disparity[mask]))
                error_map = torch.where(
                    (torch.abs(output3[mask] - disparity[mask]) < 3) |
                    (torch.abs(output3[mask] - disparity[mask]) <
                     0.05 * disparity[mask]), ones, zeros)
                #total=torch.where(disparity[mask]>0,ones,zeros)
                loss_3 = 100 - torch.sum(error_map) / torch.sum(mask) * 100
                #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='mean')
                #loss.backward()
                #parameters=model.named_parameters()
                #optimizer.step()
                if args.visdom:
                    if iterative_count > 0:
                        error_map = torch.where(
                            (torch.abs(output3 - disparity) >= 3) |
                            (torch.abs(output3 - disparity) >=
                             0.05 * disparity), ones, zeros) * mask.float()
                        #print(output3.shape)
                        pre = output3.data.cpu().numpy().astype('float32')
                        pre = pre[0, :, :]
                        #print(np.max(pre))
                        #print(pre.shape)
                        pre = np.reshape(pre, [256, 512]).astype('float32')
                        vis.image(
                            pre,
                            opts=dict(title='predict!', caption='predict.'),
                            win=pre_window,
                        )

                        error_map = error_map.data.cpu().numpy().astype(
                            'float32')
                        error_map = error_map[0, ...]
                        #image=image[0,...]
                        #print(image.shape,np.min(image))
                        error_map = np.reshape(error_map,
                                               [256, 512]).astype('float32')
                        vis.image(
                            error_map,
                            opts=dict(title='error!', caption='error.'),
                            win=error3_window,
                        )
                    else:
                        error_map = torch.where(
                            (torch.abs(output3 - disparity) >= 3) |
                            (torch.abs(output3 - disparity) >=
                             0.05 * disparity), ones, zeros) * mask.float()
                        #print(output3.shape)
                        pre = output3.data.cpu().numpy().astype('float32')
                        pre = pre[0, :, :]
                        #print(np.max(pre))
                        #print(pre.shape)
                        pre = np.reshape(pre, [256, 512]).astype('float32')
                        vis.image(
                            pre,
                            opts=dict(title='predict!', caption='predict.'),
                            win=pre_window,
                        )

                        ground = disparity.data.cpu().numpy().astype('float32')
                        ground = ground[0, :, :]
                        ground = np.reshape(ground,
                                            [256, 512]).astype('float32')
                        vis.image(
                            ground,
                            opts=dict(title='ground!', caption='ground.'),
                            win=ground_window,
                        )
                        image = image.data.cpu().numpy().astype('float32')
                        image = image[0, ...]
                        #image=image[0,...]
                        #print(image.shape,np.min(image))
                        image = np.reshape(image,
                                           [3, 256, 512]).astype('float32')
                        vis.image(
                            image,
                            opts=dict(title='image!', caption='image.'),
                            win=image_window,
                        )
                        error_map = error_map.data.cpu().numpy().astype(
                            'float32')
                        error_map = error_map[0, ...]
                        #image=image[0,...]
                        #print(image.shape,np.min(image))
                        error_map = np.reshape(error_map,
                                               [256, 512]).astype('float32')
                        vis.image(
                            error_map,
                            opts=dict(title='error!', caption='error.'),
                            win=error3_window,
                        )

                if iterative_count == 0:
                    #min_loss3_t=epe
                    min_loss3_t = loss_3
                if epoch <= trained + 1000:
                    loss_bp = loss
                    loss.backward()
                    epe_rec.append(epe.item())
                    optimizer.step()
                    break
                # else:
                #     loss_bp=0.1*loss
                #     loss.backward()
                #     epe_rec.append(epe.item())
                #     optimizer.step()
                #     break
                #if (epe<=1.25*mean_loss) :
                if (loss_3 <= 1.25):
                    #loss_bp=loss*torch.pow(100,-(mean_loss-lin)/mean_loss)
                    #loss_bp=loss*zero
                    print('no back')
                    # if epe<=0.75*mean_loss:
                    #     loss_bp=0.1*loss
                    # else:
                    #     loss_bp=0.1*loss
                    #optimizer.step()
                    loss_bp.backward()
                    epe_rec.append(epe.item())
                    optimizer.step()
                    break
                else:
                    #print(torch.pow(10,torch.min(one,(lin-mean_loss)/mean_loss)).item())
                    print('back')
                    #loss=loss*torch.pow(10,torch.min(one,(lin-mean_loss)/mean_loss))
                    # if epe>1.5*mean_loss:
                    #     loss_bp=10*loss
                    # else:
                    #     loss_bp=loss
                    if loss_3 > 2:
                        loss_bp = loss
                    else:
                        loss_bp = loss
                    loss_bp.backward()
                    optimizer.step()
                #if epe<=mean_loss or iterative_count>5 :
                if loss_3 <= 1.25 or iterative_count > 8:

                    if loss_3 < min_loss3_t:
                        epe_rec.append(epe.item())
                        # mean_loss=np.mean(epe_rec)
                        break
                    else:
                        min_loss3_t = torch.min(loss_3, min_loss3_t)
                        #if lin<1.5*mean_loss:
                        iterative_count += 1
                        print(
                            "repeat data [%d/%d/%d/%d] Loss: %.4f error_3: %.4f "
                            % (i, train_length, epoch, args.n_epoch,
                               epe.item(), loss_3.item()))
                else:
                    min_loss3_t = torch.min(loss_3, min_loss3_t)
                    #if lin<1.5*mean_loss:
                    iterative_count += 1
                    print(
                        "repeat data [%d/%d/%d/%d] Loss: %.4f error_3: %.4f " %
                        (i, train_length, epoch, args.n_epoch, epe.item(),
                         loss_3.item()))

            #torch.cuda.empty_cache()
            #print(loss.item)
            if args.visdom == True:
                vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() *
                         (epoch - trained) * train_length,
                         Y=epe.item() * torch.ones(1).cpu(),
                         win=loss_window,
                         update='append')
                #print(torch.max(output3).item(),torch.min(output3).item())
                # if i%1==0:
                #     error_map=torch.where((torch.abs(output3 - disparity)>=3) | (torch.abs(output3 - disparity)>=0.05*disparity),ones,zeros) * mask.float()
                #     #print(output3.shape)
                #     pre = output3.data.cpu().numpy().astype('float32')
                #     pre = pre[0,:,:]
                #     #print(np.max(pre))
                #     #print(pre.shape)
                #     pre = np.reshape(pre, [256,512]).astype('float32')
                #     vis.image(
                #         pre,
                #         opts=dict(title='predict!', caption='predict.'),
                #         win=pre_window,
                #     )

                #     ground=disparity.data.cpu().numpy().astype('float32')
                #     ground = ground[0, :, :]
                #     ground = np.reshape(ground, [256,512]).astype('float32')
                #     vis.image(
                #         ground,
                #         opts=dict(title='ground!', caption='ground.'),
                #         win=ground_window,
                #     )
                #     image=image.data.cpu().numpy().astype('float32')
                #     image = image[0,...]
                #     #image=image[0,...]
                #     #print(image.shape,np.min(image))
                #     image = np.reshape(image, [3,256,512]).astype('float32')
                #     vis.image(
                #         image,
                #         opts=dict(title='image!', caption='image.'),
                #         win=image_window,
                #     )
                #     error_map=error_map.data.cpu().numpy().astype('float32')
                #     error_map = error_map[0,...]
                #     #image=image[0,...]
                #     #print(image.shape,np.min(image))
                #     error_map = np.reshape(error_map, [256,512]).astype('float32')
                #     vis.image(
                #         error_map,
                #         opts=dict(title='error!', caption='error.'),
                #         win=error3_window,
                #     )
            loss_rec.append(loss.item())
            print(time.time() - start_time)
            print("data [%d/%d/%d/%d] Loss: %.4f, loss_3:%.4f" %
                  (i, train_length, epoch, args.n_epoch, epe.item(),
                   loss_3.item()))
            loss_3_re.append(loss_3.item())
        print('epe:', np.mean(epe_rec))
        print('loss_3:', np.mean(loss_3_re))
        mean_loss = np.mean(epe_rec)
        #eval
        print('testing!')
        model.eval()
        epe_rec = []
        loss_3_re = []
        for i, (left, right, disparity, image) in tqdm(enumerate(evalloader)):
            #break
            #with torch.no_grad():
            #print(left.shape)
            #print(torch.max(image),torch.min(image))
            with torch.no_grad():

                count = 0
                start_time = time.time()
                left = left.cuda(0)
                right = right.cuda(0)
                disparity = disparity.cuda(0)
                mask = (disparity < 192) & (disparity > 0)
                mask.detach_()
                iterative_count = 0

                optimizer.zero_grad()
                #print(P.shape)
                output1, output2, output3 = model(left, right)
                #print(output3.shape)
                # output1 = torch.squeeze(output1, 1)
                # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean')
                # output1 = torch.squeeze(output1, 1)
                # output2 = torch.squeeze(output2, 1)
                # output3 = torch.squeeze(output3, 1)
                # # #outputs=outputs
                # loss = 0.5 * F.mse_loss(output1[mask], disparity[mask],reduction='mean') \
                #      + 0.7 * F.mse_loss(output2[mask], disparity[mask], reduction='mean') \
                #      + F.mse_loss(output3[mask], disparity[mask], reduction='mean')
                #loss=loss/2.2
                #output3 = model(left,right)
                #output1=output3
                output3 = torch.squeeze(output3, 1)
                error_map = torch.where(
                    (torch.abs(output3[mask] - disparity[mask]) < 3) |
                    (torch.abs(output3[mask] - disparity[mask]) <
                     0.05 * disparity[mask]), ones, zeros)
                #total=torch.where(disparity[mask]>0,ones,zeros)
                loss_3 = 100 - torch.sum(error_map) / torch.sum(mask) * 100
                epe = torch.mean(torch.abs(output3[mask] - disparity[mask]))
                epe_rec.append(epe.item())
                loss_3_re.append(loss_3.item())
            if args.visdom == True:
                vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() *
                         (epoch - trained) * test_length,
                         Y=loss_3.item() * torch.ones(1).cpu(),
                         win=error_window,
                         update='append')
                #print(torch.max(output3).item(),torch.min(output3).item())

            #loss_rec.append(loss.item())
            print(time.time() - start_time)
            print("data [%d/%d/%d/%d] Loss: %.4f, loss_3:%.4f" %
                  (i, test_length, epoch, args.n_epoch, epe.item(),
                   loss_3.item()))
            if loss_3.item() > 10:
                pre = output3.data.cpu().numpy().astype('float32')
                pre = pre[0, :, :]
                cv2.imwrite(
                    os.path.join('/home/lidong/Documents/CMF/visual/', str(i),
                                 'pre.png'), pre)
                ground = disparity.data.cpu().numpy().astype('float32')
                ground = ground[0, :, :]
                cv2.imwrite(
                    os.path.join('/home/lidong/Documents/CMF/visual/', str(i),
                                 'ground.png'), ground)
                image = image.data.cpu().numpy().astype('uint8')
                image = image[0, ...]
                print(image.shape)
                image = np.transpose(image, [1, 2, 0])[..., ::-1]
                cv2.imwrite(
                    os.path.join('/home/lidong/Documents/CMF/visual/', str(i),
                                 'image.png'), image)
                #exit()
        print('epe:', np.mean(epe_rec))
        print('loss_3:', np.mean(loss_3_re))
        error = np.mean(loss_3_re)
        # if epoch>400:
        #     optimizer = torch.optim.Adam(
        #     model.parameters(), lr=args.l_rate/10,betas=(0.9,0.999))
        if error < best_error:
            best_error = error
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'error': np.mean(epe_rec),
                'error3': np.mean(loss_3_re)
            }
            #np.save('loss_4.npy',loss_rec)
            torch.save(
                state, "{}_{}_{}_{}_error3_{}_four_disparity_model.pkl".format(
                    epoch, args.arch, args.dataset, np.mean(epe_rec),
                    np.mean(loss_3_re)))
        if epoch % 50 == 0:
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'error': np.mean(epe_rec),
                'error3': np.mean(loss_3_re)
            }
            #np.save('loss_4.npy',loss_rec)
            torch.save(
                state, "{}_{}_{}_{}_error3_{}_four_disparity_model.pkl".format(
                    epoch, args.arch, args.dataset, np.mean(epe_rec),
                    np.mean(loss_3_re)))