def transform_tr(self, sample): composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size, fill=255), tr.RandomGaussianBlur(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): composed_transforms = transforms.Compose([ # tr.RandomHorizontalFlip(), # tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size, fill=255), # tr.RandomCrop2(crop_size=self.args.crop_size), tr.Rescale(ratio=self.args.ratio), tr.RandomGaussianBlur(), tr.Normalize(mean=(0.279, 0.293, 0.290), std=(0.197, 0.198, 0.201)), tr.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): composed_transforms = transforms.Compose([ # transforms.ColorJitter(brightness=(-1,1),contrast=(-1, 1),saturation=(-0.3, 0.3), hue=(-0.3, 0.3)), # transforms.ColorJitter(brightness=0.4, contrast=0.4,saturation=0.4), tr.RandomHorizontalFlip(), tr.GaussianNoise(), tr.RandomGaussianBlur(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.PatchToTensor() ]) return composed_transforms(sample)
def transform_ts(self, sample): """ composed transformers for testing dataset :param sample: {'image': image, 'label': label} :return: """ composed_transforms = transforms.Compose([ ct.FixScaleCrop(crop_size=self.crop_size), ct.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ct.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): #print(sample) composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.FixScaleCrop(crop_size=self.args.crop_size), #tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size), tr.RandomGaussianBlur(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor(), tr.Lablize(high_confidence=self.args.high_confidence) ]) return composed_transforms(sample)
def transform_ts(self, sample): ''' Transform the given test sample. @param sample: The given test sample. ''' composed_transforms = transforms.Compose([ tr.FixedResize(size=self.args.crop_size), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tf.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomScaleCrop(base_size=513, crop_size=513), tr.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3, gamma=0.3), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) return composed_transforms(sample)
def transform_train(self, sample): composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomVerticalFlip(), # tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size, fill=255), # tr.FixedResize(size=self.args.crop_size), tr.RandomRotate(), tr.RandomGammaTransform(), tr.RandomGaussianBlur(), tr.RandomNoise(), tr.Normalize(mean=(0.544650, 0.352033, 0.384602, 0.352311), std=(0.249456, 0.241652, 0.228824, 0.227583)), tr.ToTensor()]) return composed_transforms(sample)
def build_data(self): self.normalize_dict = { 'crop_image': 255.0, 'bifilter_crop_image': 255.0 } self.composed_transforms_ts = transforms.Compose([ tr.FixedResize({ 'image': self.resolution, 'gt': self.resolution }, flagvals={ 'image': cv2.INTER_LINEAR, 'gt': cv2.INTER_NEAREST }), tr.CropRandom(crop_size=self.resolution, keys=['image', 'gt'], drop_origin=True), tr.BilateralFiltering(['crop_image']), tr.ToTensor(), tr.Normalize('crop_image', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) self.valset = cityscapes_full.cityscapesFullLoader( split='train_val', transform=self.composed_transforms_ts) self.valset_num = len(self.valset) self.val_loader = DataLoader( self.valset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, drop_last=True, ) self.val_loader_num = len(self.val_loader) self.trainset = cityscapes_full.cityscapesFullLoader( split='train', transform=self.composed_transforms_ts) self.trainset_num = len(self.trainset) self.train_loader = DataLoader( self.trainset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, drop_last=True, ) self.train_loader_num = len(self.train_loader)
def transform_tr(self, sample): if self.csplit == 'all': ignores = [] elif self.csplit == 'seen': ignores = classes['unseen'] else: raise RuntimeError("Training Unseen data is not legal.") composed_transforms = transforms.Compose([ tr.MaskIgnores(ignores=ignores, mask=255), tr.RandomHorizontalFlip(), # tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size), # tr.RandomGaussianBlur(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), seg=True), tr.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): temp = [] if self.args.rotate > 0: temp.append(tr.RandomRotate(self.args.rotate)) temp.append(tr.RandomScale(rand_resize=self.args.rand_resize)) temp.append(tr.RandomCrop(self.args.input_size)) temp.append(tr.RandomHorizontalFlip()) temp.append( tr.Normalize(mean=self.args.normal_mean, std=self.args.normal_std)) if self.args.noise_param is not None: temp.append( tr.GaussianNoise(mean=self.args.noise_param[0], std=self.args.noise_param[1])) temp.append(tr.ToTensor()) composed_transforms = transforms.Compose(temp) return composed_transforms(sample)
def transform_tr(self, sample): composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size), # tr.RandomGaussianBlur(), # tr.FixedResize(self.args.crop_size), # tr.RandomCrop(self.args.crop_size), # tr.RandomCutout(n_holes=1, cut_size=128), # tr.RandomRotate(30), tr.RandomRotate_v2(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) return composed_transforms(sample)
def explain_image(self, path, counter): self.model.eval() img_path = path lbl_path = os.path.join( os.path.split(os.path.split(path)[0])[0], 'lbl', os.path.split(path)[1]) image = Image.open(img_path).convert('RGB') # width x height x 3 _tmp = np.array(Image.open(lbl_path), dtype=np.uint8) _tmp[_tmp == 255] = 1 _tmp[_tmp == 0] = 0 _tmp[_tmp == 128] = 2 _tmp = Image.fromarray(_tmp) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) composed_transforms = transforms.Compose([ tr.FixedResize(size=513), tr.Normalize(mean=mean, std=std), tr.ToTensor() ]) sample = {'image': image, 'label': _tmp} sample = composed_transforms(sample) image, target = sample['image'], sample['label'] image = torch.unsqueeze(image, dim=0) # if self.args.cuda: # image, target = image.cuda(), target.cuda() # with torch.no_grad(): # output = self.model(image) # inn_model = InnvestigateModel(self.model, lrp_exponent=1, # method="b-rule", # beta=0, epsilon=1e-6) # # inn_model.eval() # model_prediction, heatmap = inn_model.innvestigate(in_tensor=image) # model_prediction = np.argmax(model_prediction, axis=1) # def run_guided_backprop(net, image_tensor): # return interpretation.guided_backprop(net, image_tensor, cuda=True, verbose=False, apply_softmax=False) # # def run_LRP(net, image_tensor): # return inn_model.innvestigate(in_tensor=image_tensor, rel_for_class=1) print('hold')
def make_mask(in_flist, model, args): composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) pbar = tqdm(in_flist, total=len(in_flist)) for fname in pbar: outpath = os.path.join(args.out_dir, fname) if os.path.exists(outpath): pbar.set_description(fname + ' exists') continue fpath = os.path.join(args.in_dir, fname) pbar.set_description('') image = Image.open(fpath).convert('RGB') target = image.convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) if args.cuda: tensor_in = tensor_in.cuda() with torch.no_grad(): output = model(tensor_in) grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) grid_image = grid_image[0:1, ...] + grid_image[1:2, ...] + grid_image[2:3, ...] grid_image[grid_image > 0] = 255 # area = ((grid_image / 255.).sum()) / grid_image.shape[0] / grid_image.shape[1] _outdir = os.path.split(outpath)[:-1] _outdir = '/'.join(_outdir) if not os.path.exists(_outdir): os.makedirs(_outdir) assert grid_image.shape[0] == image.size[0] assert grid_image.shape[1] == image.size[1] save_image(grid_image, outpath)
def transform_val(self, sample): if len(sample) == 2: composed_transforms = transforms.Compose([ tr.FixScaleCrop(crop_size=self.args.crop_size), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) return composed_transforms(sample) else: composed_transforms = transforms.Compose([ tr_sp.FixScaleCrop(crop_size=self.args.crop_size), tr_sp.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr_sp.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): color_transforms = [ transforms.RandomApply([transforms.ColorJitter(brightness=0.1) ]), # brightness transforms.RandomApply([transforms.ColorJitter(contrast=0.1) ]), # contrast transforms.RandomApply([transforms.ColorJitter(saturation=0.1) ]), # saturation transforms.RandomApply([transforms.ColorJitter(hue=0.05)]) ] # hue joint_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size, fill=255), tr.equalize(), tr.RandomGaussianBlur(), tr.RandomRotate(degree=7) ]) image_transforms = transforms.Compose([ transforms.RandomOrder(color_transforms), transforms.RandomGrayscale(p=0.3) ]) normalize_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) tmp_sample = joint_transforms(sample) tmp_sample['image'] = image_transforms(tmp_sample['image']) tmp_sample = normalize_transforms(tmp_sample) return tmp_sample
def transform_val(self, sample): if self.csplit == 'all': ignores = [] remap = {} elif self.csplit == 'unseen': ignores = classes['seen'] remap = {16: 1, 17: 2, 18: 3, 19: 4, 20: 5} elif self.csplit == 'seen': ignores = classes['unseen'] remap = {} else: raise RuntimeError("{} ???".format(self.csplit)) composed_transforms = transforms.Compose([ # tr.FixScaleCrop(crop_size=self.args.crop_size), tr.MaskIgnores(ignores=ignores, mask=0), tr.ReMask(remap=remap), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), seg=True), tr.ToTensor() ]) return composed_transforms(sample)
def transform_tr(self, sample): """ composed transformers for training dataset :param sample: {'image': image, 'label': label} :return: """ img = sample['image'] img = transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.2)(img) sample = {'image': img, 'label': sample['label']} composed_transforms = transforms.Compose([ ct.RandomHorizontalFlip(), ct.RandomScaleCrop(base_size=self.base_size, crop_size=self.crop_size), # ct.RandomChangeBackground(), ct.RandomGaussianBlur(), ct.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ct.ToTensor() ]) return composed_transforms(sample)
device_ids=range(torch.cuda.device_count())) if resume_epoch != nEpochs: # Logging into Tensorboard log_dir = os.path.join( save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) optimizer = optim.Adam(net.parameters(), lr=p['lr'], weight_decay=p['wd']) p['optimizer'] = str(optimizer) composed_transforms_tr = transforms.Compose([ tr.RandomSized(512), tr.RandomRotate(15), tr.RandomHorizontalFlip(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) composed_transforms_ts = transforms.Compose([ tr.FixedResize(size=(512, 512)), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) # voc_train = pascal.VOCSegmentation(split='train', transform=composed_transforms_tr) # voc_val = pascal.VOCSegmentation(split='val', transform=composed_transforms_ts) ROOT = 'dataset/ORIGA' voc_train = ImageFolder(root_path=ROOT, datasets='ORIGA') voc_val = ImageFolder(root_path=ROOT, datasets='ORIGA', mode='test')
def main(): parser = argparse.ArgumentParser( description="PyTorch DeeplabV3Plus Training") parser.add_argument('--in-path', type=str, required=True, help='image to test') # parser.add_argument('--out-path', type=str, required=True, help='mask image to save') parser.add_argument('--backbone', type=str, default='resnet', choices=['resnet', 'xception', 'drn', 'mobilenet'], help='backbone name (default: resnet)') parser.add_argument('--ckpt', type=str, default='deeplab-resnet.pth', help='saved model') parser.add_argument('--out-stride', type=int, default=16, help='network output stride (default: 8)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--gpu-ids', type=str, default='0', help='use which gpu to train, must be a \ comma-separated list of integers only (default=0)') parser.add_argument('--dataset', type=str, default='pascal', choices=['pascal', 'coco', 'cityscapes', 'invoice'], help='dataset name (default: pascal)') parser.add_argument('--crop-size', type=int, default=513, help='crop image size') parser.add_argument('--num_classes', type=int, default=2, help='crop image size') parser.add_argument('--sync-bn', type=bool, default=None, help='whether to use sync bn (default: auto)') parser.add_argument( '--freeze-bn', type=bool, default=False, help='whether to freeze bn parameters (default: False)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: try: args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')] except ValueError: raise ValueError( 'Argument --gpu_ids must be a comma-separated list of integers only' ) if args.sync_bn is None: if args.cuda and len(args.gpu_ids) > 1: args.sync_bn = True else: args.sync_bn = False model_s_time = time.time() model = DeepLab(num_classes=args.num_classes, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) ckpt = torch.load(args.ckpt, map_location='cpu') model.load_state_dict(ckpt['state_dict']) model = model.cuda() model_u_time = time.time() model_load_time = model_u_time - model_s_time print("model load time is {}".format(model_load_time)) composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) for name in os.listdir(args.in_path): s_time = time.time() image = Image.open(args.in_path + "/" + name).convert('RGB') # image = Image.open(args.in_path).convert('RGB') target = Image.open(args.in_path + "/" + name).convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) model.eval() if args.cuda: tensor_in = tensor_in.cuda() with torch.no_grad(): output = model(tensor_in) grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) save_image(grid_image, args.in_path + "/" + "{}_mask.png".format(name[0:-4])) u_time = time.time() img_time = u_time - s_time print("image:{} time: {} ".format(name, img_time)) # save_image(grid_image, args.out_path) # print("type(grid) is: ", type(grid_image)) # print("grid_image.shape is: ", grid_image.shape) print("image save in in_path.")
return _img, _target def __str__(self): return 'SBDSegmentation(split=' + str(self.split) + ')' if __name__ == '__main__': from dataloaders import custom_transforms as tr from torch.utils.data import DataLoader from torchvision import transforms import matplotlib.pyplot as plt composed_transforms_tr = transforms.Compose([ tr.RandomResizedCrop(size=513), tr.RandomHorizontalFlip(), tr.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(1.0, 1.0, 1.0)), tr.ToTensor() ]) sbd_train = SBDSegmentation(split='train', transform=composed_transforms_tr) dataloader = DataLoader(sbd_train, batch_size=2, shuffle=True, num_workers=2) for ii, sample in enumerate(dataloader): for jj in range(sample["image"].size()[0]): img = sample['image'].numpy() gt = sample['gt'].numpy()
from torchvision import transforms transform = transforms.Compose([ tr.RandomHorizontalFlip(), tr.ScaleNRotate(rots=(-20, 20), scales=(.75, 1.25)), # tr.CropFromMask(crop_elems=('image', 'gt'), relax=30, zero_pad=False, jitters_bound=(10, 30)), tr.CropFromMask(crop_elems=('image', 'gt'), relax=30, zero_pad=False, jitters_bound=(30, 31)), # tr.CropFromMask(crop_elems=('image', 'gt'), relax=30, zero_pad=False, jitters_bound=None), tr.FixedResize(resolutions={ 'crop_image': (256, 256), 'crop_gt': (256, 256) }), tr.Normalize(elems='crop_image'), # tr.ToImage(norm_elem=('pos_map', 'neg_map')), ]) dataset = VOCSegmentation(split=['val'], transform=transform, retname=True) # dataset = VOCSegmentation(split=['train', 'val'], retname=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) for i, sample in enumerate(dataloader): while 1: cv2.imshow('image', sample['crop_image'][0].numpy()) cv2.imshow('crop_gt', sample['crop_gt'][0].numpy()) if cv2.waitKey(1) & 0xff == ord('q'):
def main(): parser = argparse.ArgumentParser( description="PyTorch DeeplabV3Plus Training") parser.add_argument('--in-path', type=str, required=True, help='directory of images to test') parser.add_argument('--out-path', type=str, required=True, help='directory of mask image to save') parser.add_argument('--backbone', type=str, default='resnet', choices=['resnet', 'xception', 'drn', 'mobilenet'], help='backbone name (default: resnet)') parser.add_argument('--ckpt', type=str, default='deeplab-resnet.pth', help='saved model') parser.add_argument('--out-stride', type=int, default=16, help='network output stride (default: 8)') parser.add_argument('--no-cuda', action='store_true', default=True, help='disables CUDA training') parser.add_argument('--gpu-ids', type=str, default='0', help='use which gpu to train, must be a \ comma-separated list of integers only (default=0)') parser.add_argument('--dataset', type=str, default='pascal', choices=['pascal', 'coco', 'cityscapes'], help='dataset name (default: pascal)') parser.add_argument('--crop-size', type=int, default=512, help='crop image size') parser.add_argument('--sync-bn', type=bool, default=None, help='whether to use sync bn (default: auto)') parser.add_argument( '--freeze-bn', type=bool, default=False, help='whether to freeze bn parameters (default: False)') args = parser.parse_args() print(args) args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: try: args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')] except ValueError: raise ValueError( 'Argument --gpu_ids must be a comma-separated list of integers only' ) if args.sync_bn is None: if args.cuda and len(args.gpu_ids) > 1: args.sync_bn = True else: args.sync_bn = False model = DeepLab(num_classes=3, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) print(f"The args.ckpt is : {args.ckpt}") ckpt = torch.load(args.ckpt, map_location='cpu') model.load_state_dict(ckpt['state_dict']) composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) for img_path in os.listdir(args.in_path): if os.path.splitext(img_path)[-1] not in ['.jpg']: print('skip {}'.format(img_path)) continue img_path = os.path.join(args.in_path, img_path) output_path = os.path.join( args.out_path, os.path.splitext(os.path.split(img_path)[-1])[-2] + "-seg" + ".jpg") # print("output path is {}".format(output_path)) combine_path = os.path.join( args.out_path, os.path.splitext(os.path.split(img_path)[-1])[-2] + "-blend" + ".png") # print("blend path is {}".format(combine_path)) image = Image.open(img_path).convert('RGB') target = Image.open(img_path).convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) model.eval() if args.cuda: image = image.cuda() with torch.no_grad(): output = model(tensor_in) grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) print("type(grid) is:{}".format(type(grid_image))) print("grid_image.shape is:{}".format(grid_image.shape)) save_image(grid_image, output_path) print("saved {}".format(output_path)) blend_two_images(img_path, output_path, combine_path) print("blended {}\n".format(combine_path))
def transform_norm(self, sample): composed_transforms = transforms.Compose([ tr.Normalize(mean=self.data_mean, std=self.data_std), tr.ToTensor() ]) return composed_transforms(sample)
def main(): args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: try: args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')] except ValueError: raise ValueError( 'Argument --gpu_ids must be a comma-separated list of integers only' ) if args.sync_bn is None: if args.cuda and len(args.gpu_ids) > 1: args.sync_bn = True else: args.sync_bn = False model = DeepLab(num_classes=21, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) ckpt = torch.load(args.ckpt, map_location='cpu') model.load_state_dict(ckpt['state_dict']) composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) # 打开TXT,获取待测试的图片名 filename = args.test_folder + 'test.txt' file = open(filename) for line in file: # 生成输入图片的路径和存储的路径 image_name = line.strip('\n') input_image = args.test_folder + image_name + '.jpg' print(input_image) save_result = args.save_testresult_folder + 'result_' + image_name + '.jpg' print(save_result) # 开始测试~~ image = Image.open(input_image).convert('RGB') target = Image.open(input_image).convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) model.eval() if args.cuda: image = image.cuda() with torch.no_grad(): output = model(tensor_in) grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) print("type(grid) is: ", type(grid_image)) print("grid_image.shape is: ", grid_image.shape) save_image(grid_image, save_result)
def main(): args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # args.cuda = torch.cuda.is_available() if args.cuda: try: args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')] except ValueError: raise ValueError( 'Argument --gpu_ids must be a comma-separated list of integers only' ) if args.sync_bn is None: if args.cuda and len(args.gpu_ids) > 1: args.sync_bn = True else: args.sync_bn = False model = DeepLab(num_classes=21, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) ckpt = torch.load(args.ckpt, map_location='cpu') model.load_state_dict(ckpt['state_dict']) composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) # filename = args.test_folder + 'test.txt' file = open(filename) # init time parameters forward_min = math.inf forward_time = 0 model_time_min = math.inf model_time = 0 image_number = 0 # init model cuda and image index model.eval() image_width = 1920 image_heigth = 1080 if args.cuda: model.cuda() # 第一次挂载到CUDA需要消耗很长时间,大概4.1208秒 index = torch.arange(1, image_heigth + 1).t().reshape( -1, 1) # 生成1-1080的数组 代表一行中的每一个像素的索引 # print("index = ", index, index.size()) img_index = index.repeat( 1, image_width).cuda().float() # 重复1920次,得到1080行,1920列的数组 # print("img_index = ", img_index[0], img_index, img_index.size()) else: index = torch.arange(1, image_heigth + 1).t() # 生成1-1000的数组 代表一行中的每一个像素的索引 img_index = index.repeat(image_width, 1).reshape( image_heigth, image_width) # 重复838次,得到838行,1000列的数组 for line in file: # 生成输入图片的路径和存储的路径 image_name = line.strip('\n') input_image = args.test_folder + image_name + '.jpg' print(input_image) save_result = args.save_testresult_folder + 'result_' + image_name + '.jpg' print(save_result) image_number = image_number + 1 # 开始测试~~ image = Image.open(input_image).convert('RGB') target = Image.open(input_image).convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) # 图片转换成数组,两个数组是因为需要大光斑(红色)和小光斑(绿色) image_array = np.array(image) # 存储大光斑(红色) image_array2 = np.copy(image_array) # 存储小光斑(绿色) image_tensor = torch.from_numpy(image_array) # image_tensor2 = torch.from_numpy(image_array2) # model.eval() if args.cuda: # model.cuda() # 第一次挂载到CUDA需要消耗很长时间,大概4.1208秒 tensor_in = tensor_in.cuda() image_tensor = image_tensor.cuda() # image_tensor2 = image_tensor2.cuda() start = time.time() with torch.no_grad(): output = model(tensor_in) #.cpu() # convert to TensorRT feeding sample data as input # model_trt = torch2trt(model, [tensor_in]) # output = model_trt(tensor_in).cpu() # find background index # torch.max output max value as dim 0, and index of max value as dim 1; use this we can get the class of each pixel image_tensor[torch.max(output[:3], 1)[1][0] != 1] = 0 # get red channel and use red channel to calculate gray gravity image_tensor_gray = image_tensor[:, :, 0] # red channel # save_result_roi = args.save_testresult_folder + 'result_roitemp' + image_name + '.jpg' # cv2.imwrite(save_result_roi, image_tensor_gray.cpu().numpy()) elapsed = time.time() - start model_time_min = min(model_time_min, elapsed) model_time += elapsed # claculate gray center pixel # roi_image = cv2.merge([image_tensor.cpu().numpy()]) # gray_center = Cvpointgray(roi_image) # gray_center = Cvpointgray_fast(roi_image) gray_center = Cvpointgray_gpu(image_tensor_gray, img_index) elapsed = time.time() - start forward_min = min(forward_min, elapsed) forward_time += elapsed print("elapsed = ", elapsed) # 再读一次图片,用作展示 image_orign = cv2.imread( input_image) #Image.open(input_image).convert('RGB') gray_center = gray_center.cpu().numpy() print("len(gray_center) = ", len(gray_center)) for i in range(len(gray_center)): if np.isnan(gray_center[i]) or np.isinf(gray_center[i]): continue else: cv2.circle(image_orign, (int(i), int(gray_center[i])), 2, (0, 255, 0)) # 画大光条,易于理解的灰度重心法 # for i in range(len(gray_center)): # cv2.circle(image_orign, ( int(gray_center[i][0]), int(gray_center[i][1]) ), 2, (0,0,255)) save_result_fin = args.save_testresult_folder + 'result_fin_' + image_name + '.jpg' cv2.imwrite(save_result_fin, image_orign) # test time # lp = LineProfiler() # lp_wrapper = lp(Cvpointgray_gpu) # lp_wrapper(image_tensor_gray, img_index) # lp.print_stats() # save class heatmap grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) print("type(grid) is: ", type(grid_image)) print("grid_image.shape is: ", grid_image.shape) save_image(grid_image, save_result) forward_average = forward_time / image_number model_average = model_time / image_number print('Forward: {0:.3f}/{1:.3f}'.format(forward_min, forward_average), 's') print('Model: {0:.3f}/{1:.3f}'.format(model_time_min, model_average), 's')
def pred_single_image(self, path, counter): self.model.eval() img_path = path lbl_path = os.path.join( os.path.split(os.path.split(path)[0])[0], 'lbl', os.path.split(path)[1]) activations = collections.defaultdict(list) def save_activation(name, mod, input, output): activations[name].append(output.cpu()) for name, m in self.model.named_modules(): if type(m) == nn.ReLU: m.register_forward_hook(partial(save_activation, name)) input = cv2.imread(path) # bkg = cv2.createBackgroundSubtractorMOG2() # back = bkg.apply(input) # cv2.imshow('back', back) # cv2.waitKey() input = cv2.resize(input, (513, 513), interpolation=cv2.INTER_CUBIC) image = Image.open(img_path).convert('RGB') # width x height x 3 # _tmp = np.array(Image.open(lbl_path), dtype=np.uint8) _tmp = np.array(Image.open(img_path), dtype=np.uint8) _tmp[_tmp == 255] = 1 _tmp[_tmp == 0] = 0 _tmp[_tmp == 128] = 2 _tmp = Image.fromarray(_tmp) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) composed_transforms = transforms.Compose([ tr.FixedResize(size=513), tr.Normalize(mean=mean, std=std), tr.ToTensor() ]) sample = {'image': image, 'label': _tmp} sample = composed_transforms(sample) image, target = sample['image'], sample['label'] image = torch.unsqueeze(image, dim=0) if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) see = Analysis('module.decoder.last_conv.6', activations) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) pred = np.reshape(pred, (513, 513)) # prediction = np.append(target, pred, axis=1) prediction = pred rgb = np.zeros((prediction.shape[0], prediction.shape[1], 3)) r = prediction.copy() g = prediction.copy() b = prediction.copy() g[g != 1] = 0 g[g == 1] = 255 r[r != 2] = 0 r[r == 2] = 255 b = np.zeros(b.shape) rgb[:, :, 0] = b rgb[:, :, 1] = g rgb[:, :, 2] = r prediction = np.append(input, rgb.astype(np.uint8), axis=1) result = np.append(input, prediction.astype(np.uint8), axis=1) cv2.line(rgb, (513, 0), (513, 1020), (255, 255, 255), thickness=1) cv2.line(rgb, (513, 0), (513, 1020), (255, 255, 255), thickness=1) cv2.imwrite( '/home/robot/git/pytorch-deeplab-xception/run/cropweed/deeplab-resnet/experiment_41/samples/synthetic_{}.png' .format(counter), prediction)
def pred_single_image(self, path): self.model.eval() img_path = path lbl_path = os.path.join( os.path.split(os.path.split(path)[0])[0], 'lbl', os.path.split(path)[1]) activations = collections.defaultdict(list) def save_activation(name, mod, input, output): activations[name].append(output.cpu()) for name, m in self.model.named_modules(): if type(m) == nn.ReLU: m.register_forward_hook(partial(save_activation, name)) input = cv2.imread(path) label = cv2.imread(lbl_path) # bkg = cv2.createBackgroundSubtractorMOG2() # back = bkg.apply(input) # cv2.imshow('back', back) # cv2.waitKey() input = cv2.resize(input, (513, 513), interpolation=cv2.INTER_CUBIC) image = Image.open(img_path).convert('RGB') # width x height x 3 # _tmp = np.array(Image.open(lbl_path), dtype=np.uint8) _tmp = np.array(Image.open(img_path), dtype=np.uint8) _tmp[_tmp == 255] = 1 _tmp[_tmp == 0] = 0 _tmp[_tmp == 128] = 2 _tmp = Image.fromarray(_tmp) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) composed_transforms = transforms.Compose([ tr.FixedResize(size=513), tr.Normalize(mean=mean, std=std), tr.ToTensor() ]) sample = {'image': image, 'label': _tmp} sample = composed_transforms(sample) image, target = sample['image'], sample['label'] image = torch.unsqueeze(image, dim=0) if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) # output = output.data.cpu().numpy().squeeze(0).transpose([1, 2, 0]) # output = np.argmax(output, axis=2) * 255 output = output.data.cpu().numpy() prediction = np.argmax(output, axis=1) prediction = np.squeeze(prediction, axis=0) prediction[prediction == 1] = 255 if np.any(prediction == 2): prediction[prediction == 2] = 128 if np.any(prediction == 1): prediction[prediction == 1] = 255 print(np.unique(prediction)) see = Analysis(activations, label=1, path=self.saver.experiment_dir) see.backtrace(output) # for key in keys: # # see.visualize_tensor(see.image) # see.save_tensor(see.image, self.saver.experiment_dir) cv2.imwrite(os.path.join(self.saver.experiment_dir, 'rgb.png'), input) cv2.imwrite(os.path.join(self.saver.experiment_dir, 'lbl.png'), label) cv2.imwrite(os.path.join(self.saver.experiment_dir, 'prediction.png'), prediction)
def __init__(self, base_dir='/home/timy90022/dataset/istg/', split='train' ): """ :param split: train/val/test """ super().__init__() self._base_dir = base_dir self._image_dir = os.path.join(self._base_dir, 'JPEGImages') self.rotate = True self.gaussian_bump = True self.gaussian_rad = -1 self.gaussian_iou = 0.7 self.split = split self.max_ratio = 1 self.min_ratio = 1 self.record_ratio_door = [] self.record_ratio_window = [] self.record_area_door = [] self.record_area_window = [] self.mean = (0.5,0.5,0.5) self.std = (0.5,0.5,0.5) self.transform = composed_transforms_tr = transforms.Compose([ tr.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)) ]) self.output_size = [128,256] self.input_size = [512,1024] self.width_ratio = self.output_size[1] / self.input_size[1] self.height_ratio = self.output_size[0] / self.input_size[0] self.all_image = [] self.all_label = [] self.all_assist = [] first = sorted(glob.glob(self._base_dir + '/data_obj2d/istg' + '/*')) for i in first: second = sorted(glob.glob(i + '/*')) for j in second: # print(j) file_path_one = j + '/color.png' file_path_two = j + '/obj2d.png' file_path_three = j.replace('data_obj2d','fcmaps') + '/fcmap.png' # print(os.path.isfile(file_path_two)) # print(file_path_two) if ( os.path.isfile(file_path_one) and os.path.isfile(file_path_two) and os.path.isfile(file_path_three)): self.all_image.append(file_path_one) self.all_label.append(file_path_two) self.all_assist.append(file_path_three) first = sorted(glob.glob(self._base_dir + '/data_obj2d/sun360' + '/*')) for i in first: # print(j) file_path_one = i + '/color.png' file_path_two = i + '/obj2d.png' file_path_three = i.replace('data_obj2d','fcmaps') + '/fcmap.png' # print(os.path.isfile(file_path_two)) # print(file_path_two) if ( os.path.isfile(file_path_one) and os.path.isfile(file_path_two) and os.path.isfile(file_path_three)): self.all_image.append(file_path_one) self.all_label.append(file_path_two) self.all_assist.append(file_path_three) assert (len(self.all_image) == len(self.all_label)) if self.split =='train': stay = [0,1,2,3,4,5,6,7,8] else: stay = [9] self.all_image = [self.all_image[e] for e in range(len(self.all_image)) if e%10 in stay] self.all_label = [self.all_label[e] for e in range(len(self.all_label)) if e%10 in stay] self.all_assist = [self.all_assist[e] for e in range(len(self.all_assist)) if e%10 in stay] # Display stats print('Number of images in {}: {:d}'.format(split, len(self.all_image)))
def main(): parser = argparse.ArgumentParser( description="PyTorch DeeplabV3Plus Training") parser.add_argument('--in-path', type=str, required=True, help='image folder path to test') parser.add_argument('--out-path', type=str, default='inference_results', help='mask image folder to save') parser.add_argument('--backbone', type=str, default='resnet', choices=['resnet'], help='backbone name (default: resnet)') parser.add_argument('--ckpt', type=str, default='modeling/deeplab-resnet.pth.tar', help='saved model') parser.add_argument('--out-stride', type=int, default=16, help='network output stride (default: 16)') parser.add_argument('--no-cuda', action='store_true', default=True, help='disables CUDA training') parser.add_argument('--gpu-ids', type=str, default='0', help='use which gpu to train, must be a \ comma-separated list of integers only (default=0)') parser.add_argument('--dataset', type=str, default='coco', choices=['coco'], help='dataset name (default: coco)') parser.add_argument('--crop-size', type=int, default=513, help='crop image size') parser.add_argument('--sync-bn', type=bool, default=None, help='whether to use sync bn (default: auto)') parser.add_argument( '--freeze-bn', type=bool, default=False, help='whether to freeze bn parameters (default: False)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if args.cuda: try: args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')] except ValueError: raise ValueError( 'Argument --gpu_ids must be a comma-separated list of integers only' ) if args.sync_bn is None: if args.cuda and len(args.gpu_ids) > 1: args.sync_bn = True else: args.sync_bn = False model = DeepLab(num_classes=21, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) ckpt = torch.load(args.ckpt, map_location='cpu') model.load_state_dict(ckpt['state_dict']) composed_transforms = transforms.Compose([ tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) if not os.path.exists(args.out_path): os.makedirs(args.out_path) for img_path in glob.glob(args.in_path + '/*.jpg'): image = Image.open(img_path).convert('RGB') target = Image.open(img_path).convert('L') sample = {'image': image, 'label': target} tensor_in = composed_transforms(sample)['image'].unsqueeze(0) model.eval() if args.cuda: image = image.cuda() with torch.no_grad(): output = model(tensor_in) grid_image = make_grid(decode_seg_map_sequence( torch.max(output[:3], 1)[1].detach().cpu().numpy()), 3, normalize=False, range=(0, 255)) print("type(grid) is: ", type(grid_image)) print("grid_image.shape is: ", grid_image.shape) img_save = img_path.replace(args.in_path, '') img_save = 'inference_' + img_save[1:] save_image(grid_image, args.out_path + '/' + img_save)