def __init__(self, train=True, path='./TRANCOS_v3', out_shape=8, transform=NP_T.ToTensor(), gamma=30, max_len=None, cameras=None): r""" Args: train: train (`True`) or test (`False`) images (default: `True`). path: path for the dataset (default: "./TRANCOS_v3"). out_shape: shape of the output images (default: (120, 176)). transform: transformations to apply to the images as np.arrays (default: `NP_T.ToTensor()`). gamma: precision parameter of the Gaussian kernel (default: 30). max_len: maximum sequence length (default: `None`). cameras: list with the camera IDs to be used, so that images from other cameras are discarded; if `None`, all cameras are used; it has no effect if `get_cameras` is `False` (default: `None`). """ super(TrancosSeq, self).__init__(train=train, path=path, size_red=size_red, transform=transform, gamma=gamma, get_cameras=True, cameras=cameras) self.img2idx = {img: idx for idx, img in enumerate(self.image_files) } # hash table from file names to indices self.seqs = [ ] # list of lists containing the names of the images in each sequence prev_cid = -1 cur_len = 0 with open(os.path.join(self.path, 'images', 'cam_annotations.txt')) as f: for line in f: img_f, cid = line.split() if img_f in self.image_files: # all images in the sequence must be from the same camera # and all sequences must have length not greater than max_len if (int(cid) == prev_cid) and ((max_len is None) or (cur_len < max_len)): self.seqs[-1].append(img_f) cur_len += 1 else: self.seqs.append([img_f]) cur_len = 1 prev_cid = int(cid) if max_len is None: # maximum sequence length in the dataset self.max_len = max([len(seq) for seq in self.seqs]) else: self.max_len = max_len
def __init__(self, path='./citycam/preprocessed', out_shape=(120, 176), transform=NP_T.ToTensor(), gamma=30, max_len=None, cameras=None, load_all=True): r""" Args: train: train (`True`) or test (`False`) images (default: `True`). path: path for the dataset (default: "./citycam/preprocessed"). out_shape: shape of the output images (default: (120, 176)). transform: transformations to apply to the images as np.arrays (default: `NP_T.ToTensor()`). gamma: precision parameter of the Gaussian kernel (default: 30). max_len: maximum sequence length (default: `None`). cameras: list with the camera IDs to be used, so that images from other cameras are discarded; if `None`, all cameras are used; it has no effect if `get_cameras` is `False` (default: `None`). """ super(WebcamTSeq, self).__init__(path=path, out_shape=out_shape, transform=transform, gamma=gamma, get_cameras=True, cameras=cameras, load_all=load_all) self.img2idx = {img: idx for idx, img in enumerate(self.image_files) } # hash table from file names to indices self.seqs = [] for i, img_f in enumerate(self.image_files): seq_id = img_f.split(os.sep) if i == 0: self.seqs.append([img_f]) prev_seq_id = seq_id continue if (seq_id == prev_seq_id) and ((max_len is None) or (i % max_len > 0)): self.seqs[-1].append(img_f) else: self.seqs.append([img_f]) prev_seq_id = seq_id self.max_len = max_len if (max_len is not None) else max( [len(seq) for seq in self.seqs])
def main(): parser = argparse.ArgumentParser( description='Train FCN in Trancos or WebcamT datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model_path', default='./fcn.pth', type=str, metavar='', help='model file (output of train)') parser.add_argument('-d', '--dataset', default='TRANCOS', type=str, metavar='', help='dataset') parser.add_argument('-p', '--data_path', default='/ctm-hdd-pool01/DB/TRANCOS_v3', type=str, metavar='', help='data directory path') parser.add_argument('--valid', default=0.2, type=float, metavar='', help='fraction of the training data for validation') parser.add_argument('--lr', default=1e-3, type=float, metavar='', help='learning rate') parser.add_argument('--epochs', default=500, type=int, metavar='', help='number of training epochs') parser.add_argument('--batch_size', default=32, type=int, metavar='', help='batch size') parser.add_argument('--img_shape', default=[120, 160], type=int, metavar='', help='shape of the input images') parser.add_argument( '--lambda', default=1e-3, type=float, metavar='', help= 'trade-off between density estimation and vehicle count losses (see eq. 7 in the paper)' ) parser.add_argument( '--gamma', default=1e3, type=float, metavar='', help='precision parameter of the Gaussian kernel (inverse of variance)' ) parser.add_argument('--weight_decay', default=0., type=float, metavar='', help='weight decay regularization') parser.add_argument('--use_cuda', default=True, type=int, metavar='', help='use CUDA capable GPU') parser.add_argument('--use_visdom', default=False, type=int, metavar='', help='use Visdom to visualize plots') parser.add_argument('--visdom_env', default='FCN_train', type=str, metavar='', help='Visdom environment name') parser.add_argument('--visdom_port', default=8888, type=int, metavar='', help='Visdom port') parser.add_argument( '--n2show', default=8, type=int, metavar='', help='number of examples to show in Visdom in each epoch') parser.add_argument('--vis_shape', nargs=2, default=[120, 160], type=int, metavar='', help='shape of the images shown in Visdom') parser.add_argument('--seed', default=42, type=int, metavar='', help='random seed') args = vars(parser.parse_args()) # dump args to a txt file for your records with open(args['model_path'] + '.txt', 'w') as f: f.write(str(args) + '\n') # use a fixed random seed for reproducibility purposes if args['seed'] > 0: random.seed(args['seed']) np.random.seed(seed=args['seed']) torch.manual_seed(args['seed']) # if args['use_cuda'] == True and we have a GPU, use the GPU; otherwise, use the CPU device = 'cuda:0' if (args['use_cuda'] and torch.cuda.is_available()) else 'cpu:0' print('device:', device) # define image transformations to be applied to each image in the dataset train_transf = T.Compose([ NP_T.RandomHorizontalFlip( 0.5 ), # data augmentation: horizontal flipping (we could add more transformations) NP_T.ToTensor() # convert np.array to tensor ]) valid_transf = NP_T.ToTensor() # no data augmentation in validation # instantiate the dataset if args['dataset'].upper() == 'TRANCOS': train_data = Trancos(train=True, path=args['data_path'], out_shape=args['out_shape'], transform=train_transf, gamma=args['gamma']) valid_data = Trancos(train=True, path=args['data_path'], out_shape=args['out_shape'], transform=valid_transf, gamma=args['gamma']) else: train_data = WebcamT(path=args['data_path'], out_shape=args['out_shape'], transform=train_transf, gamma=args['gamma']) valid_data = WebcamT(path=args['data_path'], out_shape=args['out_shape'], transform=valid_transf, gamma=args['gamma']) # split the data into training and validation sets if args['valid'] > 0: valid_indices = set( random.sample(range(len(train_data)), int(len(train_data) * args['valid'])) ) # randomly choose some images for validation valid_data = Subset(valid_data, list(valid_indices)) train_indices = set(range(len( train_data))) - valid_indices # remaining images are for training train_data = Subset(train_data, list(train_indices)) else: valid_data = None # create data loaders for training and validation train_loader = DataLoader( train_data, batch_size=args['batch_size'], shuffle=True) # shuffle the data at the beginning of each epoch if valid_data: valid_loader = DataLoader( valid_data, batch_size=args['batch_size'], shuffle=False) # no need to shuffle in validation else: valid_loader = None # instantiate the model and define an optimizer model = FCN_rLSTM(temporal=False).to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) # Visdom is a tool to visualize plots during training if args['use_visdom']: loss_plt = plotter.VisdomLossPlotter(env_name=args['visdom_env'], port=args['visdom_port']) img_plt = plotter.VisdomImgsPlotter(env_name=args['visdom_env'], port=args['visdom_port']) # training routine for epoch in range(args['epochs']): print('Epoch {}/{}'.format(epoch, args['epochs'] - 1)) # training phase model.train( ) # set model to training mode (affects batchnorm and dropout, if present) loss_hist = [] density_loss_hist = [] count_loss_hist = [] count_err_hist = [] X, mask, density, count = None, None, None, None t0 = time.time() for i, (X, mask, density, count) in enumerate(train_loader): # copy the tensors to GPU (if applicable) X, mask, density, count = X.to(device), mask.to( device), density.to(device), count.to(device) # forward pass through the model density_pred, count_pred = model(X, mask=mask) # compute the loss N = X.shape[0] density_loss = torch.sum((density_pred - density)**2) / (2 * N) count_loss = torch.sum((count_pred - count)**2) / (2 * N) loss = density_loss + args['lambda'] * count_loss # backward pass and optimization step optimizer.zero_grad() loss.backward() optimizer.step() print( '{}/{} mini-batch loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f}' .format(i, len(train_loader) - 1, loss.item(), density_loss.item(), count_loss.item()), flush=True, end='\r') # save the loss values loss_hist.append(loss.item()) density_loss_hist.append(density_loss.item()) count_loss_hist.append(count_loss.item()) with torch.no_grad( ): # evaluation metric, so no need to compute gradients count_err = torch.sum(torch.abs(count_pred - count)) / N count_err_hist.append(count_err.item()) t1 = time.time() print() # print the average training losses train_loss = sum(loss_hist) / len(loss_hist) train_density_loss = sum(density_loss_hist) / len(density_loss_hist) train_count_loss = sum(count_loss_hist) / len(count_loss_hist) train_count_err = sum(count_err_hist) / len(count_err_hist) print('Training statistics:') print( 'global loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}' .format(train_loss, train_density_loss, train_count_loss, train_count_err)) print('time: {:.0f} seconds'.format(t1 - t0)) if args['use_visdom']: # plot the losses loss_plt.plot('global loss', 'train', 'MSE', epoch, train_loss) loss_plt.plot('density loss', 'train', 'MSE', epoch, train_density_loss) loss_plt.plot('count loss', 'train', 'MSE', epoch, train_count_loss) loss_plt.plot('count error', 'train', 'MAE', epoch, train_count_err) # show a few training examples (images + density maps) X *= mask # show the active region only X, density, count = X.cpu().numpy(), density.cpu().numpy( ), count.cpu().numpy() density_pred, count_pred = density_pred.detach().cpu().numpy( ), count_pred.detach().cpu().numpy() n2show = min(args['n2show'], X.shape[0]) # show args['n2show'] images at most show_images(img_plt, 'train gt', X[0:n2show], density[0:n2show], count[0:n2show], shape=args['vis_shape']) show_images(img_plt, 'train pred', X[0:n2show], density_pred[0:n2show], count_pred[0:n2show], shape=args['vis_shape']) if valid_loader is None: print() continue # validation phase model.eval( ) # set model to evaluation mode (affects batchnorm and dropout, if present) loss_hist = [] density_loss_hist = [] count_loss_hist = [] count_err_hist = [] X, mask, density, count = None, None, None, None t0 = time.time() for i, (X, mask, density, count) in enumerate(valid_loader): # copy the tensors to GPU (if available) X, mask, density, count = X.to(device), mask.to( device), density.to(device), count.to(device) # forward pass through the model with torch.no_grad( ): # no need to compute gradients in validation (faster and uses less memory) density_pred, count_pred = model(X, mask=mask) # compute the loss N = X.shape[0] density_loss = torch.sum((density_pred - density)**2) / (2 * N) count_loss = torch.sum((count_pred - count)**2) / (2 * N) loss = density_loss + args['lambda'] * count_loss # save the loss values loss_hist.append(loss.item()) density_loss_hist.append(density_loss.item()) count_loss_hist.append(count_loss.item()) count_err = torch.sum(torch.abs(count_pred - count)) / N count_err_hist.append(count_err.item()) t1 = time.time() # print the average validation losses valid_loss = sum(loss_hist) / len(loss_hist) valid_density_loss = sum(density_loss_hist) / len(density_loss_hist) valid_count_loss = sum(count_loss_hist) / len(count_loss_hist) valid_count_err = sum(count_err_hist) / len(count_err_hist) print('Validation statistics:') print( 'global loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}' .format(valid_loss, valid_density_loss, valid_count_loss, valid_count_err)) print('time: {:.0f} seconds'.format(t1 - t0)) print() if args['use_visdom']: # plot the losses loss_plt.plot('global loss', 'valid', 'MSE', epoch, valid_loss) loss_plt.plot('density loss', 'valid', 'MSE', epoch, valid_density_loss) loss_plt.plot('count loss', 'valid', 'MSE', epoch, valid_count_loss) loss_plt.plot('count error', 'valid', 'MAE', epoch, valid_count_err) # show a few training examples (images + density maps) X *= mask # show the active region only X, density, count = X.cpu().numpy(), density.cpu().numpy( ), count.cpu().numpy() density_pred, count_pred = density_pred.cpu().numpy( ), count_pred.cpu().numpy() n2show = min(args['n2show'], X.shape[0]) # show args['n2show'] images at most show_images(img_plt, 'valid gt', X[0:n2show], density[0:n2show], count[0:n2show], shape=args['vis_shape']) show_images(img_plt, 'valid pred', X[0:n2show], density_pred[0:n2show], count_pred[0:n2show], shape=args['vis_shape']) torch.save(model.state_dict(), args['model_path'])
def main(): parser = argparse.ArgumentParser( description='Test FCN in Trancos dataset.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model_path', default='./fcn.pth', type=str, metavar='', help='model file (output of train)') parser.add_argument('-d', '--data_path', default='/ctm-hdd-pool01/DB/TRANCOS_v3', type=str, metavar='', help='data directory path') parser.add_argument('--batch_size', default=32, type=int, metavar='', help='batch size') parser.add_argument( '--size_red', default=4, type=int, metavar='', help='size reduction factor to be applied to the images') parser.add_argument( '--gamma', default=1e3, type=float, metavar='', help='parameter of the Gaussian kernel (inverse of variance)') parser.add_argument('--use_cuda', default=True, type=int, metavar='', help='use CUDA capable GPU') parser.add_argument('--use_visdom', default=False, type=int, metavar='', help='use Visdom to visualize plots') parser.add_argument('--visdom_env', default='FCN_test', type=str, metavar='', help='Visdom environment name') parser.add_argument('--visdom_port', default=8888, type=int, metavar='', help='Visdom port') parser.add_argument('--n2show', default=16, type=int, metavar='', help='number of examples to show in Visdom') parser.add_argument('--vis_shape', nargs=2, default=[120, 160], type=int, metavar='', help='shape of the images shown in Visdom') parser.add_argument('--seed', default=-1, type=int, metavar='', help='random seed') args = vars(parser.parse_args()) # use a fixed random seed for reproducibility purposes if args['seed'] > 0: random.seed(args['seed']) np.random.seed(seed=args['seed']) torch.manual_seed(args['seed']) # if args['use_cuda'] == True and we have a GPU, use the GPU; otherwise, use the CPU device = 'cuda:0' if (args['use_cuda'] and torch.cuda.is_available()) else 'cpu' print('device:', device) # instantiate the dataset test_data = Trancos(train=False, path=args['data_path'], size_red=args['size_red'], transform=NP_T.ToTensor(), gamma=args['gamma']) # create a data loader test_loader = DataLoader(test_data, batch_size=args['batch_size'], shuffle=True) # instantiate the model model = FCN_rLSTM(temporal=False).to(device) model.load_state_dict(torch.load(args['model_path'], map_location=device)) print(model) # Visdom is a tool to visualize plots during training if args['use_visdom']: img_plt = plotter.VisdomImgsPlotter(env_name=args['visdom_env'], port=args['visdom_port']) samples = { 'X': [], 'density': [], 'count': [], 'density_pred': [], 'count_pred': [] } nsaved = 0 # do inference and print statistics model.eval() # set model to evaluation mode density_loss = 0. count_loss = 0. count_err = 0. t0 = time.time() for i, (X, mask, density, count) in enumerate(test_loader): # copy the tensors to GPU (if available) X, mask, density, count = X.to(device), mask.to(device), density.to( device), count.to(device) # forward pass through the model with torch.no_grad( ): # no need to compute gradients in test (faster and uses less memory) density_pred, count_pred = model(X, mask=mask) # compute the performance metrics density_loss += torch.sum((density_pred - density)**2) / 2 count_loss += torch.sum((count_pred - count)**2) / 2 count_err += torch.sum(torch.abs(count_pred - count)) # save a few examples to show in Visdom if args['use_visdom'] and (nsaved < args['n2show']): n2save = min(X.shape[0], args['n2show'] - nsaved) samples['X'].append((X[0:n2save] * mask[0:n2save]).cpu().numpy()) samples['density'].append(density[0:n2save].cpu().numpy()) samples['count'].append(count[0:n2save].cpu().numpy()) samples['density_pred'].append( density_pred[0:n2save].cpu().numpy()) samples['count_pred'].append(count_pred[0:n2save].cpu().numpy()) nsaved += n2save print('Testing... ({:.0f}% done)'.format(100. * (i + 1) / len(test_loader)), flush=True, end='\r') print() density_loss /= len(test_data) count_loss /= len(test_data) count_err /= len(test_data) t1 = time.time() print('Test statistics:') print('density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}'. format(density_loss, count_loss, count_err)) print('time: {:.0f} seconds'.format(t1 - t0)) # show a few examples if args['use_visdom']: for key in samples: samples[key] = np.concatenate(samples[key], axis=0) show_images(img_plt, 'test gt', samples['X'], samples['density'], samples['count'], shape=args['vis_shape']) show_images(img_plt, 'test pred', samples['X'], samples['density_pred'], samples['count_pred'], shape=args['vis_shape'])