Пример #1
0
def work(model, dataset, args, cam_out_dir):
    #databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = torch.utils.data.DataLoader(dataset=dataset, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False)
    
    with torch.no_grad():
        model.cuda()
        for iter, pack in tqdm.tqdm(enumerate(data_loader)):
            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']
            
            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)
            #outputs = [model(F.interpolate(img[0].cuda(non_blocking=True), scale_factor=args.cam_scale_factor, mode='bilinear', align_corners=True)) for img in pack['img']]
            outputs = [model(img[0].cuda(non_blocking=True)) for img in pack['img']]
            
            strided_cam = torch.sum(torch.stack([F.interpolate(torch.unsqueeze(out, 0), strided_size, mode='bilinear', align_corners=False)[0] for out in outputs]), 0)
            
            highres_cam = [F.interpolate(torch.unsqueeze(out, 1), strided_up_size, mode='bilinear', align_corners=False) for out in outputs]
            highres_cam = torch.sum(torch.stack(highres_cam, 0), 0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.nonzero(label)[:, 0]
            
            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_avg_pool2d(strided_cam, (1, 1)) + 1e-5
            
            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_avg_pool2d(highres_cam, (1, 1)) + 1e-5
            
            
            np.save(os.path.join(cam_out_dir, img_name + '.npy'),
                    {'keys': valid_cat, 'cam': strided_cam.cpu(), 'high_res': highres_cam.cpu().numpy()})
Пример #2
0
def my_work(args):
    maskroot = args.mask_root
    imname = os.listdir(maskroot)
    with torch.no_grad():
        for img_name in imname:
            size = torch.tensor([256, 256])
            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)
            valid_cat = torch.zeros(80)
            maskpath = maskroot + img_name
            mask = np.asarray(Image.open(maskpath))
            mask = torch.from_numpy(np.array(mask)).float()
            mask2 = torch.unsqueeze(mask, 0)
            mask2 = torch.unsqueeze(mask2, 0)
            # mask2 = mask2.cuda()
            strided_cams = F.upsample(mask2, strided_size, mode='bilinear')
            highres_cams = F.upsample(mask2, strided_up_size, mode='bilinear')
            strided_cams = strided_cams.squeeze(0)
            highres_cams = highres_cams.squeeze(0)
            print(img_name)
            # save cam
            np.save(
                os.path.join(args.cam_out_dir, img_name[:-4] + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cams.cpu(),
                    "high_res": highres_cams.cpu().numpy()
                })
Пример #3
0
def mywork(model, args):
    img_dir = "/home/pfc/code/object_detect/irn/voc12/data/VOC12/infer/JPEGImages"
    img_normal = voc12.dataloader.TorchvisionNormalize()
    with torch.no_grad():
        for img_name in os.listdir(img_dir):
            curimg_path = os.path.join(img_dir, img_name)
            img = imageio.imread(curimg_path)
            size = (img.shape[0], img.shape[1])

            ms_img_list = []
            for s in args.cam_scales:
                if s == 1:
                    s_img = img
                else:
                    s_img = imutils.pil_rescale(img, s, order=3)
                s_img = img_normal(s_img)
                s_img = imutils.HWC_to_CHW(s_img)
                ms_img_list.append(
                    np.stack([s_img, np.flip(s_img, -1)], axis=0))
            if len(args.cam_scales) == 1:
                ms_img_list = ms_img_list[0]

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)

            # img_variable = Variable(img.unsqueeze(0))
            outputs = [model(torch.Tensor(img)) for img in ms_img_list]

            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs
                ]), 0)
            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.zeros(1).type(torch.uint8)

            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5

            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            # save cams
            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })
Пример #4
0
def _work(process_id, model, dataset, args):

    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False)

    with torch.no_grad(), cuda.device(process_id):

        model.cuda()

        for iter, pack in enumerate(data_loader):

            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)       

            outputs = [model(img[0].cuda(non_blocking=True))
                       for img in pack['img']]

            strided_cam = torch.sum(torch.stack(
                [F.interpolate(torch.unsqueeze(o, 0), strided_size, mode='bilinear', align_corners=False)[0] for o
                 in outputs]), 0)

            highres_cam = [F.interpolate(torch.unsqueeze(o, 1), strided_up_size,
                                         mode='bilinear', align_corners=False) for o in outputs]
            highres_cam = torch.sum(torch.stack(highres_cam, 0), 0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.nonzero(label)[:, 0]

            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5

            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5


            # save cams
            
            cam_dict = {}
            for i, k in enumerate(valid_cat):
                cam_dict[k.item()] = strided_cam.cpu().numpy()[i]          
            np.save(os.path.join("./CAM", img_name + '.npy'), cam_dict)
            
            # save cams
            np.save(os.path.join(args.cam_out_dir, img_name + '.npy'),
                    {"keys": valid_cat, "cam": strided_cam.cpu(), "high_res": highres_cam.cpu().numpy()})

            if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0:
                print("%d " % ((5*iter+1)//(len(databin) // 20)), end='')
Пример #5
0
def work(model, databin, args):
    data_loader = DataLoader(databin,
                             shuffle=False,
                             num_workers=args.num_workers,
                             pin_memory=False)

    with torch.no_grad():
        for iter, pack in enumerate(data_loader):
            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)

            outputs = [model(img[0]) for img in pack['img']]

            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs
                ]), 0)
            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.nonzero(label)[:, 0]

            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5

            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            # save cams
            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })

            if iter % (len(databin) // 20) == 0:
                print("%d " % ((5 * iter + 1) // (len(databin) // 20)), end='')
def make_cam(args, model2, data_loader, cam_out_dir):
    with torch.no_grad():
        for iter, pack in enumerate(data_loader):
            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']
            img = pack['img'][0]
            img = torch.squeeze(img, 0)[0]
            # print('img.shape:{}'.format(img.shape))  # (2,3,128,128)
            # print(pack['img'][0].shape, pack['img'][1].shape, pack['img'][2].shape)  # (1,2,3,128,128)
            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)
            outputs = [
                model2(img[0].to('cuda'))  # 网络输入大小为[2,3,281,500]
                for img in pack['img']
            ]
            # print(len(outputs), outputs[0].shape)  #  4, 8, 8
            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs
                ]), 0)
            highres_cam = [
                F.interpolate(
                    torch.unsqueeze(o, 1),
                    strided_up_size,  # [2, 1, 18, 32]到[2,1, 288, 512]
                    mode='bilinear',
                    align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]
            # print(len(highres_cam)) # 4
            valid_cat = torch.tensor([0, 1])
            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam,
                                                 (1, 1)) + 1e-5  # 归一化
            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            np.save(
                os.path.join(cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })
def _work(process_id, model, dataset, args):
    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin,
                             shuffle=False,
                             num_workers=args.num_workers // n_gpus,
                             pin_memory=True)
    print("dcpu", args.num_workers // n_gpus)
    cam_sizes = [[], [], [], []]  # scale 0,1,2,3
    with cuda.device(process_id):
        model.cuda()
        gcam = GradCAM(model=model, candidate_layers=[args.target_layer])
        for iter, pack in enumerate(data_loader):
            img_name = pack['name'][0]
            if os.path.exists(os.path.join(args.cam_out_dir,
                                           img_name + '.npy')):
                continue
            size = pack['size']
            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)
            outputs_cam = []
            n_classes = len(list(torch.nonzero(pack['label'][0])[:, 0]))

            for s_count, size_idx in enumerate([1, 0, 2, 3]):
                orig_img = pack['img'][size_idx].clone()
                for c_idx, c in enumerate(
                        list(torch.nonzero(pack['label'][0])[:, 0])):
                    pack['img'][size_idx] = orig_img
                    img_single = pack['img'][size_idx].detach()[
                        0]  # [:, 1]: flip

                    if size_idx != 1:
                        total_adv_iter = args.adv_iter
                    else:  # size_idx == 0
                        if args.adv_iter > 10:
                            total_adv_iter = args.adv_iter // 2
                            mul_for_scale = 2
                        elif args.adv_iter < 6:
                            total_adv_iter = args.adv_iter
                            mul_for_scale = 1
                        else:
                            total_adv_iter = 5
                            mul_for_scale = float(total_adv_iter) / 5

                    for it in range(total_adv_iter):
                        img_single.requires_grad = True

                        outputs = gcam.forward(
                            img_single.cuda(non_blocking=True))

                        if c_idx == 0 and it == 0:
                            cam_all_classes = torch.zeros([
                                n_classes, outputs.shape[2], outputs.shape[3]
                            ])

                        gcam.backward(ids=c)

                        regions = gcam.generate(target_layer=args.target_layer)
                        regions = regions[0] + regions[1].flip(-1)

                        if it == 0:
                            init_cam = regions.detach()

                        cam_all_classes[c_idx] += regions[0].data.cpu(
                        ) * mul_for_scale
                        logit = outputs
                        logit = F.relu(logit)
                        logit = torchutils.gap2d(logit, keepdims=True)[:, :, 0,
                                                                       0]

                        valid_cat = torch.nonzero(pack['label'][0])[:, 0]
                        logit_loss = -2 * (logit[:,
                                                 c]).sum() + torch.sum(logit)

                        expanded_mask = torch.zeros(regions.shape)
                        expanded_mask = add_discriminative(
                            expanded_mask, regions, score_th=args.score_th)

                        L_AD = torch.sum((torch.abs(regions - init_cam)) *
                                         expanded_mask.cuda())

                        loss = -logit_loss - L_AD * args.AD_coeff

                        model.zero_grad()
                        img_single.grad.zero_()
                        loss.backward()

                        data_grad = img_single.grad.data

                        perturbed_data = adv_climb(img_single,
                                                   args.AD_stepsize, data_grad)
                        img_single = perturbed_data.detach()

                outputs_cam.append(cam_all_classes)

            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs_cam
                ]), 0)
            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs_cam
            ]

            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })
Пример #8
0
def _work(process_id, model, dataset, args):

    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin, shuffle=False, pin_memory=False)

    with torch.no_grad(), cuda.device(process_id):

        model.cuda()

        with tqdm(total=len(data_loader)) as pbar:
            for iter, pack in enumerate(data_loader):

                img_name = pack['name'][0]
                size = pack['size']

                strided_size = imutils.get_strided_size(size, 4)
                strided_up_size = imutils.get_strided_up_size(size, 16)

                if args.dataset in ['adp_morph', 'adp_func']:
                    outputs, labels = zip(*[
                        model(img.cuda(
                            non_blocking=True), orig_img.cuda(
                                non_blocking=True))
                        for img, orig_img in zip(pack['img'], pack['orig_img'])
                    ])
                else:
                    outputs, labels = zip(*[
                        model(img.cuda(non_blocking=True))
                        for img in pack['img']
                    ])
                if 'train' in args.split:
                    label = pack['label'][0]
                else:
                    label = labels[0][args.use_cls]

                valid_cat = torch.nonzero(label)[:, 0]
                if args.dataset in ['adp_morph', 'adp_func']:
                    if torch.cuda.is_available():
                        valid_cat = torch.cat(
                            (torch.from_numpy(
                                np.array(range(len(args.class_names['bg'])),
                                         dtype=np.int64)).cuda(),
                             valid_cat.cuda() + len(args.class_names['bg'])))
                    else:
                        valid_cat = torch.cat(
                            (torch.from_numpy(
                                np.array(range(len(args.class_names['bg'])),
                                         dtype=np.int64)),
                             valid_cat + len(args.class_names['bg'])))

                if len(valid_cat) > 0:
                    strided_cam = torch.sum(
                        torch.stack([
                            F.interpolate(torch.unsqueeze(o, 0),
                                          strided_size,
                                          mode='bilinear',
                                          align_corners=False)[0]
                            for o in outputs
                        ]), 0)

                    highres_cam = [
                        F.interpolate(torch.unsqueeze(o, 1),
                                      strided_up_size,
                                      mode='bilinear',
                                      align_corners=False) for o in outputs
                    ]
                    highres_cam = torch.sum(torch.stack(tuple(highres_cam), 0),
                                            0)[:, 0, :size[0], :size[1]]

                    strided_cam = strided_cam[valid_cat]
                    strided_cam /= F.adaptive_max_pool2d(strided_cam,
                                                         (1, 1)) + 1e-5

                    highres_cam = highres_cam[valid_cat]
                    highres_cam /= F.adaptive_max_pool2d(highres_cam,
                                                         (1, 1)) + 1e-5

                    # save cams
                    if args.dataset not in ['deepglobe', 'deepglobe_balanced']:
                        np.save(
                            os.path.join(args.cam_out_dir, img_name + '.npy'),
                            {
                                "keys": valid_cat.cpu().numpy(),
                                "cam": strided_cam.cpu().numpy(),
                                "high_res": highres_cam.cpu().numpy()
                            })
                    else:
                        np.save(
                            os.path.join(args.cam_out_dir, img_name + '.npy'),
                            {
                                "keys": valid_cat.cpu().numpy(),
                                "cam": strided_cam.cpu().numpy()
                            })
                else:
                    np.save(
                        os.path.join(args.cam_out_dir, img_name + '.npy'), {
                            "keys": np.empty(0),
                            "cam": np.empty(0),
                            "high_res": np.empty(0)
                        })
                pbar.update(1)
Пример #9
0
def _work(process_id, model, dataset, args):

    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin,
                             shuffle=False,
                             num_workers=args.num_workers // n_gpus,
                             pin_memory=False)

    with torch.no_grad(), cuda.device(process_id):

        model.cuda()

        for iter, pack in enumerate(tqdm(data_loader)):

            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)

            # Run through each scale of image
            outputs = [
                model(img[0].cuda(non_blocking=True)) for img in pack['img']
            ]

            # Each output is resized to strided_size (lower than original) and summed
            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs
                ]), 0)

            # Each output is resized to strided_up_size (which should be orignal size?)
            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]

            # Pick the cams corresponding to image-level labels
            # Normalize by max value across H x W dimension for each channel
            valid_cat = torch.nonzero(label, as_tuple=False)[:, 0]

            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5

            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            # save cams
            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })

            if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0:
                print("%d " % ((5 * iter + 1) // (len(databin) // 20)), end='')
                sys.stdout.flush()
Пример #10
0
def _work(process_id, model, dataset, args):

    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin,
                             shuffle=False,
                             num_workers=args.num_workers // n_gpus,
                             pin_memory=False)

    with torch.no_grad(), cuda.device(process_id):

        model.cuda()

        for iter, pack in enumerate(data_loader):
            #每个iter其实只有一张图片,在四个不同比例下的数组,label只有一个
            # print(len(pack['img']))4
            # print(pack['img'][0].shape)torch.Size([1, 2, 3, 375, 500])
            # print(pack['img'][1].shape)torch.Size([1, 2, 3, 188, 250])
            # print(pack['img'][2].shape)torch.Size([1, 2, 3, 562, 750])
            # print(pack['img'][3].shape)torch.Size([1, 2, 3, 750, 1000])
            # print(len(pack['size']))2
            # print(pack['size'])[tensor([375]), tensor([500])]
            # print(pack['label'])tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 1.]])
            exit(0)

            img_name = pack['name'][0]
            label = pack['label'][0]
            size = pack['size']

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)

            outputs = [
                model(img[0].cuda(non_blocking=True)) for img in pack['img']
            ]

            strided_cam = torch.sum(
                torch.stack([
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0] for o in outputs
                ]), 0)

            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.nonzero(label)[:, 0]

            strided_cam = strided_cam[valid_cat]
            strided_cam /= F.adaptive_max_pool2d(strided_cam, (1, 1)) + 1e-5

            highres_cam = highres_cam[valid_cat]
            highres_cam /= F.adaptive_max_pool2d(highres_cam, (1, 1)) + 1e-5

            # save cams
            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })

            if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0:
                print("%d " % ((5 * iter + 1) // (len(databin) // 20)), end='')
Пример #11
0
def _work(process_id, model, dataset, args):

    databin = dataset[process_id]
    n_gpus = torch.cuda.device_count()
    data_loader = DataLoader(databin,
                             shuffle=False,
                             num_workers=args.num_workers // n_gpus,
                             pin_memory=False)

    with torch.no_grad(), cuda.device(process_id):

        model.cuda()

        for iter, pack in enumerate(data_loader):

            img_name = pack['name'][0]
            label = pack['label'][0]
            # print(len(pack['label']))
            # print(label.shape, label)
            size = pack['size']

            strided_size = imutils.get_strided_size(size, 4)
            strided_up_size = imutils.get_strided_up_size(size, 16)

            # print(len(pack['img']))
            outputs = []
            for img in pack['img']:
                # print(img.shape)
                img = img.permute(1, 0, 2, 3)
                o = model(img.cuda(non_blocking=True))
                # print(o.shape)
                outputs.append(o)
            # outputs = [model(img.cuda(non_blocking=True))
            #            for img in pack['img']]

            temp = []
            for o in outputs:
                temp.append(
                    F.interpolate(torch.unsqueeze(o, 0),
                                  strided_size,
                                  mode='bilinear',
                                  align_corners=False)[0])
            strided_cam = torch.sum(torch.stack(temp), 0)

            highres_cam = [
                F.interpolate(torch.unsqueeze(o, 1),
                              strided_up_size,
                              mode='bilinear',
                              align_corners=False) for o in outputs
            ]
            highres_cam = torch.sum(torch.stack(highres_cam, 0),
                                    0)[:, 0, :size[0], :size[1]]

            valid_cat = torch.nonzero(label)[:, 0]
            # print(valid_cat, bool(valid_cat))
            # print(strided_cam.shape, valid_cat, label)
            # raise EOFError()
            if valid_cat.nelement() != 0:
                strided_cam = strided_cam[valid_cat]
                strided_cam /= F.adaptive_max_pool2d(strided_cam,
                                                     (1, 1)) + 1e-5

                highres_cam = highres_cam[valid_cat]
                highres_cam /= F.adaptive_max_pool2d(highres_cam,
                                                     (1, 1)) + 1e-5
            else:
                strided_cam = torch.zeros_like(strided_cam[0])
                highres_cam = torch.zeros_like(highres_cam[0])

            # save cams
            np.save(
                os.path.join(args.cam_out_dir, img_name + '.npy'), {
                    "keys": valid_cat,
                    "cam": strided_cam.cpu(),
                    "high_res": highres_cam.cpu().numpy()
                })
            # print(process_id, n_gpus, iter, process_id == n_gpus - 1, iter % (len(databin) // 20) == 0, (len(databin) // 20))
            if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0:
                print("%d " % ((5 * iter + 1) // (len(databin) // 20)), end='')
                print()