def train(data_root): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # background and person num_classes = 2 dataset = PennFudanDataset(data_root,get_transform(train=True)) dataset_test = PennFudanDataset(data_root,get_transform(train=False)) # split the dataset indices = torch.randperm(len(dataset)).tolist() dataset = Subset(dataset,indices[:-50]) dataset_test = Subset(dataset_test,indices[-50:]) # define data loaders data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=tools.collate_fn) data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4, collate_fn=tools.collate_fn) # get model model = get_model_instance_segmentation(num_classes) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3) num_epochs =10 for epoch in range(num_epochs): train_one_epoch(model,optimizer,data_loader,device,epoch,print_freq=10) lr_scheduler.step() # evaluate(model,data_loader_test,device=device) torch.save(model.state_dict(),"masknet.pth") print("OK!")
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize): """ Args: heatmap: array of size (n * k * h * w) -n: views, -k: joints -h: height, -w: width grid: k lists of ndarrays of size (nbins * 3) -k: joints; 1 when the grid is shared in PSM -nbins: bins in the grid bbox2D: bounding box on which heatmap is computed Returns: unary_of_all_joints: a list of ndarray of size nbins """ device = heatmap.device share_grid = len(grid) == 1 n, k = heatmap.shape[0], heatmap.shape[1] h, w = heatmap.shape[2], heatmap.shape[3] all_unary = {} for v in range(n): center = bbox2D[v]['center'] scale = bbox2D[v]['scale'] trans = torch.as_tensor(get_transform(center, scale, 0, imgSize), dtype=torch.float, device=device) for j in range(k): grid_id = 0 if len(grid) == 1 else j nbins = grid[grid_id].shape[0] if (share_grid and j == 0) or not share_grid: xy = cameras.project_pose(grid[grid_id], cam[v]) xy = do_transform(xy, trans) * torch.tensor( [w, h], dtype=torch.float, device=device) / torch.tensor( imgSize, dtype=torch.float, device=device) sample_grid = xy / torch.tensor([h - 1, w - 1], dtype=torch.float, device=device) * 2.0 - 1.0 sample_grid = sample_grid.view(1, 1, nbins, 2) unary_per_view_joint = F.grid_sample( heatmap[v:v + 1, j:j + 1, :, :], sample_grid) if j in all_unary: all_unary[j] += unary_per_view_joint else: all_unary[j] = unary_per_view_joint all_unary_list = [] for j in range(k): all_unary_list.append(all_unary[j].view(1, -1)) return all_unary_list
save_dir='./car_SE_trainset_prediction/', dataset= { 'name': 'mots_cars_val', 'kwargs': { 'root_dir': kittiRoot, 'mode': 'train', #'mode': 'val', # 'size': 1000, 'transform': my_transforms.get_transform([ { 'name': 'LU_Pad', 'opts': { 'keys': ('mot_image', 'mot_instance','mot_label'), 'size': (384, 1248), } }, { 'name': 'ToTensor', 'opts': { 'keys': ('mot_image', 'mot_instance','mot_label'), 'type': (torch.FloatTensor, torch.LongTensor, torch.ByteTensor), } }, ]), }, 'batch_size': 1, 'workers': 32 }, max_disparity=192.0, with_uv=True )
'root_dir': CITYSCAPES_DIR, 'type': 'crops', 'size': 3000, 'transform': my_transforms.get_transform([ { 'name': 'RandomCrop', 'opts': { 'keys': ('image', 'instance', 'label'), 'size': (512, 512), } }, { 'name': 'ToTensor', 'opts': { 'keys': ('image', 'instance', 'label'), 'type': (torch.FloatTensor, torch.ByteTensor, torch.ByteTensor), } }, ]), }, 'batch_size': 8, 'workers': 8 }, val_dataset={ 'name': 'cityscapes', 'kwargs': {
train_dataset = { 'name': 'mots_cars', 'kwargs': { 'root_dir': kittiRoot, 'type': 'crop', 'size': 7000, 'transform': my_transforms.get_transform([ { 'name': 'AdjustBrightness', 'opts': {} }, { 'name': 'ToTensor', 'opts': { 'keys': ('image', 'instance','label'), 'type': (torch.FloatTensor, torch.LongTensor, torch.ByteTensor), } }, { 'name': 'Flip', 'opts': { 'keys': ('image', 'instance','label'), } }, ]), }, 'batch_size': 4, 'workers': 1, # 'batch_size': 64, # 'workers': 32 },
def get_voxel(self, heatmaps, meta, grid_size, grid_center, cube_size): device = heatmaps[0].device batch_size = heatmaps[0].shape[0] num_joints = heatmaps[0].shape[1] nbins = cube_size[0] * cube_size[1] * cube_size[2] n = len(heatmaps) cubes = torch.zeros(batch_size, num_joints, 1, nbins, n, device=device) # h, w = heatmaps[0].shape[2], heatmaps[0].shape[3] w, h = self.heatmap_size grids = torch.zeros(batch_size, nbins, 3, device=device) bounding = torch.zeros(batch_size, 1, 1, nbins, n, device=device) for i in range(batch_size): if len(grid_center[0]) == 3 or grid_center[i][3] >= 0: # This part of the code can be optimized because the projection operation is time-consuming. # If the camera locations always keep the same, the grids and sample_grids are repeated across frames # and can be computed only one time. if len(grid_center) == 1: grid = self.compute_grid(grid_size, grid_center[0], cube_size, device=device) else: grid = self.compute_grid(grid_size, grid_center[i], cube_size, device=device) grids[i:i + 1] = grid for c in range(n): center = meta[c]['center'][i] scale = meta[c]['scale'][i] width, height = center * 2 trans = torch.as_tensor(get_transform( center, scale, 0, self.img_size), dtype=torch.float, device=device) cam = {} for k, v in meta[c]['camera'].items(): cam[k] = v[i] xy = cameras.project_pose(grid, cam) bounding[i, 0, 0, :, c] = (xy[:, 0] >= 0) & (xy[:, 1] >= 0) & ( xy[:, 0] < width) & (xy[:, 1] < height) xy = torch.clamp(xy, -1.0, max(width, height)) xy = do_transform(xy, trans) xy = xy * torch.tensor( [w, h], dtype=torch.float, device=device) / torch.tensor( self.img_size, dtype=torch.float, device=device) sample_grid = xy / torch.tensor([w - 1, h - 1], dtype=torch.float, device=device) * 2.0 - 1.0 sample_grid = torch.clamp(sample_grid.view(1, 1, nbins, 2), -1.1, 1.1) # if pytorch version < 1.3.0, align_corners=True should be omitted. cubes[i:i + 1, :, :, :, c] += F.grid_sample(heatmaps[c][i:i + 1, :, :, :], sample_grid, align_corners=True) # cubes = cubes.mean(dim=-1) cubes = torch.sum(torch.mul(cubes, bounding), dim=-1) / (torch.sum(bounding, dim=-1) + 1e-6) cubes[cubes != cubes] = 0.0 cubes = cubes.clamp(0.0, 1.0) cubes = cubes.view(batch_size, num_joints, cube_size[0], cube_size[1], cube_size[2]) ## return cubes, grids
'./pretrained_models/erfnet_encoder_pretrained.pth.tar', }, dataset={ 'name': 'cityscapes', 'kwargs': { 'root_dir': CITYSCAPES_DIR, 'type': 'val', 'transform': my_transforms.get_transform([ { 'name': 'ToTensor', 'opts': { 'keys': ('image', 'instance', 'label', 'semantic_label'), 'type': (torch.FloatTensor, torch.ByteTensor, torch.ByteTensor, torch.FloatTensor), } }, ]), } }, model={ 'name': 'branched_erfnet', 'kwargs': { 'num_classes': [13, 3], } })