def setup(gpu): global segmentation_module # Network Builders net_encoder = ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights=downloads.download_data_file( 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth', 'semantic-segmentation-pytorch/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth' )) net_decoder = ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights=downloads.download_data_file( 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth', 'semantic-segmentation-pytorch/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth' ), use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.eval() segmentation_module.cuda()
def model_builder(self, imode="sphe"): if imode == "sphe": # Network Builders net_encoder = seg_sphe.ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth') net_decoder = seg_sphe.ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth', use_softmax=True) elif imode == "persp": # encoder_epoch = 'ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth' # decoder_epoch = 'ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth' encoder_epoch = 'ckpt_nef/r50d_ppm_rot_e40_nef_30/encoder_epoch_40.pth' decoder_epoch = 'ckpt_nef/r50d_ppm_rot_e40_nef_30/decoder_epoch_40.pth' net_encoder = seg_persp.ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights=encoder_epoch) net_decoder = seg_persp.ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights=decoder_epoch, use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) semseg_model = SegmentationModule(net_encoder, net_decoder, crit) semseg_model.eval() semseg_model.cuda() return semseg_model
def _load_model(self, cfg_file, gpu): if gpu is not None: torch.cuda.set_device(0) basepath = rospkg.RosPack().get_path('object_segmentation') cfg.merge_from_file(basepath + "/" + cfg_file) logger = setup_logger(distributed_rank=0) logger.info(f"Loaded configuration file {cfg_file}") logger.info("Running with config:\n{}".format(cfg)) cfg.MODEL.arch_encoder = cfg.MODEL.arch_encoder.lower() cfg.MODEL.arch_decoder = cfg.MODEL.arch_decoder.lower() # absolute paths of model weights cfg.MODEL.weights_encoder = ( Path(basepath) / cfg.DIR / ('encoder_' + cfg.TEST.checkpoint)).as_posix() cfg.MODEL.weights_decoder = ( Path(basepath) / cfg.DIR / ('decoder_' + cfg.TEST.checkpoint)).as_posix() if not os.path.exists(cfg.MODEL.weights_encoder) or not os.path.exists( cfg.MODEL.weights_decoder): download.ycb(Path(basepath) / 'ckpt') assert os.path.exists( cfg.MODEL.weights_encoder ), f"checkpoint {cfg.MODEL.weights_encoder} does not exitst!" assert os.path.exists( cfg.MODEL.weights_decoder ), f"checkpoint {cfg.MODEL.weights_decoder} does not exitst!" # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) if self.gpu is not None: segmentation_module.cuda() segmentation_module.eval() self.model = segmentation_module
def initialize(): colors = scipy.io.loadmat('data/color150.mat')['colors'] names = {} with open('data/object150_info.csv') as f: reader = csv.reader(f) next(reader) for row in reader: names[int(row[0])] = row[5].split(";")[0] # Network Builders net_encoder = ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth') net_decoder = ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth', use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module = segmentation_module.eval() x = torch.rand(3,672,504) output_size = x.shape # segmentation_module.cuda() with torch.no_grad(): with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}): segmentation_module = torch.jit.trace(segmentation_module, x[None]) # torch.jit.save(segmentation_module, 'traced.pt') return segmentation_module
def load_model(): net_encoder = ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='models/encoder_epoch_20.pth') net_decoder = ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights='models/decoder_epoch_20.pth', use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.eval() #segmentation_module.cuda() return segmentation_module
def semantic_segmentation(image_folder): download_segmentation() # Network Builders net_encoder = ModelBuilder.build_encoder( arch='resnet101dilated', fc_dim=2048, weights='data/segm_data/encoder_epoch_25.pth') net_decoder = ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights='data/segm_data/decoder_epoch_25.pth', use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.eval() segmentation_module.cuda() # Load and normalize one image as a singleton tensor batch pil_to_tensor = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # These are RGB mean+std values std=[0.229, 0.224, 0.225]) # across a large photo dataset. ]) pil_image = PIL.Image.open(os.path.join(image_folder, '000000.jpg')).convert('RGB') img_original = np.array(pil_image) img_data = pil_to_tensor(pil_image) singleton_batch = {'img_data': img_data[None].cuda()} output_size = img_data.shape[1:] # Run the segmentation at the highest resolution. with torch.no_grad(): scores = segmentation_module(singleton_batch, segSize=output_size) # Get the predicted scores for each pixel _, pred = torch.max(scores, dim=1) pred = pred.cpu()[0].numpy() del net_encoder, net_decoder, segmentation_module return pred
def load_model_from_url(semsegPath): model_urls = { 'encoder': 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth', 'decoder': 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth' } if not os.path.exists(os.path.join(semsegPath, 'ckpt')): os.makedirs(os.path.join(semsegPath, 'ckpt')) r = requests.get(model_urls['encoder']) with open(os.path.join(semsegPath, 'ckpt/encoder.pth'), 'wb') as f: f.write(r.content) r = requests.get(model_urls['decoder']) with open(os.path.join(semsegPath, 'ckpt/decoder.pth'), 'wb') as f: f.write(r.content) net_encoder = ModelBuilder.build_encoder(arch='resnet50dilated', fc_dim=2048, weights=os.path.join( semsegPath, 'ckpt/encoder.pth')) net_decoder = ModelBuilder.build_decoder(arch='ppm_deepsup', fc_dim=2048, num_class=150, weights=os.path.join( semsegPath, 'ckpt/decoder.pth'), use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.eval() segmentation_module.cuda() return segmentation_module
def segmentation_model_init(): if not os.path.exists(model_folder): os.makedirs(model_folder) ENCODER_NAME = 'resnet101' DECODER_NAME = 'upernet' PRETRAINED_ENCODER_MODEL_URL = 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-%s-%s/encoder_epoch_50.pth' % ( ENCODER_NAME, DECODER_NAME) PRETRAINED_DECODER_MODEL_URL = 'http://sceneparsing.csail.mit.edu/model/pytorch/ade20k-%s-%s/decoder_epoch_50.pth' % ( ENCODER_NAME, DECODER_NAME) pretrained_encoder_file = ENCODER_NAME + basename( PRETRAINED_ENCODER_MODEL_URL) pretrained_decoder_file = DECODER_NAME + basename( PRETRAINED_DECODER_MODEL_URL) encoder_path = os.path.join(model_folder, pretrained_encoder_file) decoder_path = os.path.join(model_folder, pretrained_decoder_file) if not os.path.exists(encoder_path): urllib.request.urlretrieve(PRETRAINED_ENCODER_MODEL_URL, encoder_path) if not os.path.exists(decoder_path): urllib.request.urlretrieve(PRETRAINED_DECODER_MODEL_URL, decoder_path) # options options = SimpleNamespace(fc_dim=2048, num_class=150, imgSizes=[300, 400, 500, 600], imgMaxSize=1000, padding_constant=8, segm_downsampling_rate=8) # create model builder = ModelBuilder() net_encoder = builder.build_encoder(arch=ENCODER_NAME, weights=encoder_path, fc_dim=options.fc_dim) net_decoder = builder.build_decoder(arch=DECODER_NAME, weights=decoder_path, fc_dim=options.fc_dim, num_class=options.num_class, use_softmax=True) segmentation_module = SegmentationModule(net_encoder, net_decoder, torch.nn.NLLLoss(ignore_index=-1)) segmentation_module = segmentation_module.eval() torch.set_grad_enabled(False) if torch.cuda.is_available(): segmentation_module = segmentation_module.cuda() return segmentation_module, options
def huawei_seg(imgs): parser = argparse.ArgumentParser( description="PyTorch Semantic Segmentation Testing") parser.add_argument( "--cfg", default="config/ade20k-hrnetv2-huawei.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--gpu", default=0, type=int, help="gpu id for evaluation") args = parser.parse_args() cfg.merge_from_file(args.cfg) cfg.MODEL.arch_encoder = cfg.MODEL.arch_encoder.lower() cfg.MODEL.arch_decoder = cfg.MODEL.arch_decoder.lower() # absolute paths of model weights cfg.MODEL.weights_encoder = os.path.join(cfg.DIR, 'encoder_' + cfg.TEST.checkpoint) cfg.MODEL.weights_decoder = os.path.join(cfg.DIR, 'decoder_' + cfg.TEST.checkpoint) # imgs = [imgs] cfg.list_test = [{'fpath_img': x} for x in imgs] torch.cuda.set_device(args.gpu) # Network Builders net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_test = InferDataset(cfg.list_test, cfg.DATASET) loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) segmentation_module.cuda() loader = loader_test # Main loop segmentation_module.eval() pbar = tqdm(total=len(loader)) for batch_data in loader: # process data batch_data = batch_data[0] segSize = (batch_data['img_ori'].shape[0], batch_data['img_ori'].shape[1]) img_resized_list = batch_data['img_data'] with torch.no_grad(): scores = torch.zeros(1, cfg.DATASET.num_class, segSize[0], segSize[1]) scores = async_copy_to(scores, args.gpu) for img in img_resized_list: feed_dict = batch_data.copy() feed_dict['img_data'] = img del feed_dict['img_ori'] del feed_dict['info'] feed_dict = async_copy_to(feed_dict, args.gpu) # forward pass pred_tmp = segmentation_module(feed_dict, segSize=segSize) # print(pred_tmp.shape)#torch.Size([1, 2, 1024, 1024]) scores = scores + pred_tmp / len(cfg.DATASET.imgSizes) _, pred = torch.max(scores, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) # visualization visualize_result((batch_data['img_ori'], batch_data['info']), pred, cfg) pbar.update(1) # return pred
# use_softmax=True) # hrnet net_encoder = ModelBuilder.build_encoder( arch='hrnetv2', fc_dim=720, weights='../../ckpt/ade20k-hrnetv2-c1/encoder_epoch_30.pth') net_decoder = ModelBuilder.build_decoder( arch='c1', fc_dim=720, num_class=150, weights='../../ckpt/ade20k-hrnetv2-c1/decoder_epoch_30.pth', use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.eval() segmentation_module.cuda() pil_to_tensor = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], # These are RGB mean+std values std=[0.229, 0.224, 0.225]) # across a large photo dataset. ]) pil_image = PIL.Image.open('sample.png').convert('RGB') img_original = numpy.array(pil_image) img_data = pil_to_tensor(pil_image) singleton_batch = {'img_data': img_data[None].cuda()} output_size = img_data.shape[1:] with torch.no_grad():
class ExtractSegmentationMask: def __init__(self): """ extract semantic map from pretrained model, for knowing where to ignore in the image, since we only have depth info on mountains. """ self.names = {} with open('semseg/object150_info.csv') as f: reader = csv.reader(f) next(reader) for row in reader: self.names[int(row[0])] = row[5].split(";")[0] # Network Builders self.net_encoder = ModelBuilder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights= 'semseg/ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth' ) self.net_decoder = ModelBuilder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights= 'semseg/ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth', use_softmax=True) self.crit = torch.nn.NLLLoss(ignore_index=-1) self.segmentation_module = SegmentationModule(self.net_encoder, self.net_decoder, self.crit) self.segmentation_module.eval() self.segmentation_module.to(device=defs.get_dev()) def visualize_result(self, img, pred, index=None): # filter prediction class if requested colors = scipy.io.loadmat('semseg/color150.mat')['colors'] if index is not None: pred = pred.copy() pred[pred != index] = -1 print(f'{self.names[index + 1]}:') # colorize prediction pred_color = colorEncode(pred, colors).astype(np.float32) / 255 # aggregate images and save im_vis = np.concatenate((img, pred_color), axis=1) plt.imshow(im_vis) plt.show() def __call__(self, sample): img_data = sample['image'] old_shape = img_data.shape[1], img_data.shape[2] img_data = ResizeToAlmostResolution(180, 224)({ 'image': img_data })['image'] singleton_batch = {'img_data': img_data[None]} output_size = img_data.shape[1:] # Run the segmentation at the highest resolution. with torch.no_grad(): scores = self.segmentation_module(singleton_batch, segSize=output_size) # Get the predicted scores for each pixel _, pred = torch.max(scores, dim=1) pred = TF.resize(pred, old_shape, Image.NEAREST) # other irrelevant classes: 1, 4, 12, 20, 25, 83, 116, 126, 127. # see csv in semseg folder. bad_classes = torch.Tensor([2]).to(device=defs.get_dev()) mask = torch.full_like(pred, True, dtype=torch.bool) mask[(pred[..., None] == bad_classes).any(-1)] = False if 'mask' in sample: sample['mask'] = sample['mask'] & mask else: sample['mask'] = mask return sample