def __init__(self, opt, motion_dim, feat_dim, dataloader): self.opt = opt self.RAN_motion = RAN(input_size=motion_dim, hidden_size=32, history_size=10, drop_rate=0.5) self.RAN_feat = RAN(input_size=feat_dim, hidden_size=32, history_size=10, drop_rate=0.5) self.dataloader = dataloader if self.opt.use_cuda: self.RAN_motion.cuda() self.RAN_feat.cuda() self.optimizer_motion = optim.Adam(self.RAN_motion.parameters(), lr=1e-3, betas=(0.9, 0.999)) self.optimizer_feat = optim.Adam(self.RAN_feat.parameters(), lr=1e-3, betas=(0.9, 0.999))
for d, t in matched_indices: if sim_matrix[d, t] < self.min_similarity: unmatched_tracks.append(t) unmatched_detections.append(d) else: matches.append((d, t)) return matches, unmatched_tracks, unmatched_detections if __name__ == '__main__': from models import RAN model_save_prefix = "/scratch0/RAN/trained_model/ran" # load model ran = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5) ran = ran.cuda() ran.eval() bbox1_1 = np.array([500, 500, 40, 50], dtype=np.float32) bbox1_2 = np.array([100, 200, 60, 60], dtype=np.float32) bbox1_3 = np.array([400, 300, 70, 70], dtype=np.float32) bbox1_4 = np.array([200, 100, 80, 80], dtype=np.float32) bbox2_1 = np.array([512, 490, 40, 50], dtype=np.float32) bbox2_2 = np.array([400, 330, 70, 75], dtype=np.float32) bbox2_3 = np.array([110, 198, 65, 65], dtype=np.float32) bbox2_4 = np.array([200, 120, 85, 85], dtype=np.float32) bbox2_5 = np.array([100, 100, 45, 45], dtype=np.float32) # gt for matching:
def get_samples(target, nb_class=10, sample_index=0, attention=None, device='cpu'): ''' Get samples : original images, preprocessed images, target class, trained model args: - target: [mnist, cifar10] - nb_class: number of classes - example_index: index of image by class return: - original_images (numpy array): Original images, shape = (number of class, W, H, C) - pre_images (torch array): Preprocessing images, shape = (number of class, C, W, H) - target_classes (dictionary): keys = class index, values = class name - model (pytorch model): pretrained model ''' if target == 'mnist': image_size = (28, 28, 1) _, _, testloader = mnist_load() testset = testloader.dataset elif target == 'cifar10': image_size = (32, 32, 3) _, _, testloader = cifar10_load() testset = testloader.dataset # idx2class target_class2idx = testset.class_to_idx target_classes = dict( zip(list(target_class2idx.values()), list(target_class2idx.keys()))) # select images idx_by_class = [ np.where(np.array(testset.targets) == i)[0][sample_index] for i in range(nb_class) ] original_images = testset.data[idx_by_class] if not isinstance(original_images, np.ndarray): original_images = original_images.numpy() original_images = original_images.reshape((nb_class, ) + image_size) # select targets if isinstance(testset.targets, list): original_targets = torch.LongTensor(testset.targets)[idx_by_class] else: original_targets = testset.targets[idx_by_class] # model load filename = f'simple_cnn_{target}' if attention in ['CAM', 'CBAM']: filename += f'_{attention}' elif attention in ['RAN', 'WARN']: filename = f'{target}_{attention}' print('filename: ', filename) weights = torch.load(f'../checkpoint/{filename}.pth') if attention == 'RAN': model = RAN(target).to(device) elif attention == 'WARN': model = WideResNetAttention(target).to(device) else: model = SimpleCNN(target, attention).to(device) model.load_state_dict(weights['model']) # image preprocessing pre_images = torch.zeros(original_images.shape) pre_images = np.transpose(pre_images, (0, 3, 1, 2)) for i in range(len(original_images)): pre_images[i] = testset.transform(original_images[i]) return original_images, original_targets, pre_images, target_classes, model
bbox_list.append(bbox.astype(np.float32)) conf_list.append(confidence) return bbox_list, conf_list if __name__ == '__main__': seq_info = gather_sequence_info('/scratch0/MOT/MOT16/train/MOT16-02', '/scratch0/MOT/MOT16/external/MOT16-02_det.txt') video = cv2.VideoWriter('../results/video_gt.avi', cv2.VideoWriter_fourcc(*"MJPG"), seq_info['fps'], (640, 480)) model_path = "../results/models/RAN.pth" # load model checkpoint = torch.load(model_path) RAN_motion = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5) RAN_feat = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5) RAN_motion.load_state_dict(checkpoint['RAN_motion']) RAN_feat.load_state_dict(checkpoint['RAN_feat']) RAN_motion = RAN_motion.cuda() RAN_feat = RAN_feat.cuda() RAN_motion.eval() RAN_feat.eval() tracker = RANTracker(RAN_motion, feat_model=None) for frame_idx in seq_info['image_filenames'].keys(): bboxes, confs = create_detections(seq_info['groundtruth'], frame_idx) #bboxes, confs = create_detections(seq_info['detections'], frame_idx)
def test(idx): dataroot = '/scratch0/MOT/MOT16' detroot = '/scratch0/MOT/MOT16/external' model_path = '../results/models/RAN.pth' video_path = '../results/visualization/sample_track.avi' video_handle = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*"MJPG"), 20, (640, 480)) dataset = MOT16_train_dataset(dataroot, detroot) # load model checkpoint = torch.load(model_path) RAN_motion = RAN(input_size=dataset.motion_dim, hidden_size=32, history_size=10, drop_rate=0.5) RAN_feat = RAN(input_size=dataset.feat_dim, hidden_size=32, history_size=10, drop_rate=0.5) RAN_motion.load_state_dict(checkpoint['RAN_motion']) RAN_feat.load_state_dict(checkpoint['RAN_feat']) RAN_motion = RAN_motion.cuda() RAN_feat = RAN_feat.cuda() RAN_motion.eval() RAN_feat.eval() memory_size = 10 input_size = 4 hidden = RAN_motion.init_hidden(batch_size=1) external = deque( [np.zeros(input_size, dtype=np.float32) for _ in range(memory_size)], maxlen=memory_size) # sample a track from training data bbox_data = dataset.bbox[idx] bbox_motion = dataset.motion[idx] frame_num = dataset.frame_num[idx] video_id = dataset.video_id[idx][0] image_names = dataset.image_filenames[video_id] bbox_gt = bbox_data.copy() bbox_gt[:, 0:2] -= bbox_gt[:, 2:4] / 2.0 for f_num in np.arange(frame_num.min(), frame_num.max() + 1): if np.any(frame_num == f_num): idx = np.where(frame_num == f_num)[0][0] gt = bbox_gt[idx].copy() motion = bbox_motion[idx].copy() bbox = bbox_data[idx].copy() external.appendleft(motion) motion_var = to_var(motion).view(1, 1, -1) alpha, sigma, hidden = RAN_motion(motion_var, hidden) # linear combination of history alpha_np = to_np(alpha.squeeze()) motion_pred = np.matmul(alpha_np, np.array(external)) bbox_pred = bbox + motion_pred bbox_pred[0:2] -= bbox_pred[2:4] / 2.0 save_to_video(video_handle, image_names[f_num], (640, 480), [gt, bbox_pred], [(0, 255, 0), (0, 0, 255)]) else: save_to_video(video_handle, image_names[f_num], (640, 480), [], []) # for bbox, motion, gt, frame in zip(bbox_data, bbox_motion, bbox_gt, frame_num): # # external.appendleft(motion) # motion_var = to_var(motion).view(1, 1, -1) # alpha, sigma, hidden = RAN_motion(motion_var, hidden) # # linear combination of history # alpha_np = to_np(alpha.squeeze()) # motion_pred = np.matmul(alpha_np, np.array(external)) # # bbox_pred = bbox + motion_pred # bbox_pred[0:2] -= bbox_pred[2:4] / 2.0 # # save_to_video(video_handle, image_names[frame], (640, 480), [gt, bbox_pred], [(0,255,0), (0,0,255)]) video_handle.release()
class Trainer(object): def __init__(self, opt, motion_dim, feat_dim, dataloader): self.opt = opt self.RAN_motion = RAN(input_size=motion_dim, hidden_size=32, history_size=10, drop_rate=0.5) self.RAN_feat = RAN(input_size=feat_dim, hidden_size=32, history_size=10, drop_rate=0.5) self.dataloader = dataloader if self.opt.use_cuda: self.RAN_motion.cuda() self.RAN_feat.cuda() self.optimizer_motion = optim.Adam(self.RAN_motion.parameters(), lr=1e-3, betas=(0.9, 0.999)) self.optimizer_feat = optim.Adam(self.RAN_feat.parameters(), lr=1e-3, betas=(0.9, 0.999)) def train(self): self.RAN_motion.train() self.RAN_feat.train() total_loss = [] curr_iters = 0 for epoch in range(self.opt.nepoch): for i, (motion_data, feat_data) in enumerate(self.dataloader): curr_iters += 1 ######## # Train motion model ######## self.RAN_motion.zero_grad() padded_batch, lengths, packed_input, ext = self.prepare_data( motion_data) hidden = self.RAN_motion.init_hidden( len(lengths)) # (1, B, hidden) alpha, sigma, h_n = self.RAN_motion(packed_input, hidden) loss = loss_fn(alpha, sigma, padded_batch[1:], ext, lengths) loss.backward() self.optimizer_motion.step() if i == 1: print('Epoch: {}, M Loss: {}'.format( epoch, loss.cpu().data.numpy())) ######## # Train appearance model ######## self.RAN_feat.zero_grad() padded_batch, lengths, packed_input, ext = self.prepare_data( feat_data) hidden = self.RAN_feat.init_hidden( len(lengths)) # (1, B, hidden) alpha, sigma, h_n = self.RAN_feat(packed_input, hidden) loss = loss_fn(alpha, sigma, padded_batch[1:], ext, lengths) loss.backward() self.optimizer_feat.step() if i == 1: print('Epoch: {}, A Loss: {}'.format( epoch, loss.cpu().data.numpy())) torch.save( { 'RAN_motion': self.RAN_motion.state_dict(), 'RAN_feat': self.RAN_feat.state_dict() }, '{}/models/RAN.pth'.format(self.opt.outf)) def prepare_data(self, batch_data): # obtain a tensor (max_length, batch_size, feat_dim) and lengths for sequences padded_batch, lengths = pad_packed_sequence(batch_data) lengths = [l - 1 for l in lengths] ext = generate_external(padded_batch.data.numpy()[:-1], lengths, self.opt.history_size) ext = Variable(torch.from_numpy(ext), requires_grad=False) # generate input from t=0 to t=L-2 packed_input = pack_padded_sequence(padded_batch[:-1], lengths) if self.opt.use_cuda: ext = ext.cuda() padded_batch = padded_batch.cuda() packed_input = PackedSequence(packed_input.data.cuda(), packed_input.batch_sizes) return padded_batch, lengths, packed_input, ext
def main(args, **kwargs): ################################# # Config ################################# epochs = args.epochs batch_size = args.batch_size valid_rate = args.valid_rate lr = args.lr verbose = args.verbose # checkpoint target = args.target attention = args.attention monitor = args.monitor mode = args.mode # save name model_name = 'simple_cnn_{}'.format(target) if attention in ['CAM', 'CBAM']: model_name = model_name + '_{}'.format(attention) elif attention in ['RAN', 'WARN']: model_name = '{}_{}'.format(target, attention) # save directory savedir = '../checkpoint' logdir = '../logs' # device setting cpu or cuda(gpu) device = 'cuda' if torch.cuda.is_available() else 'cpu' print('=====Setting=====') print('Training: ', args.train) print('Epochs: ', epochs) print('Batch Size: ', batch_size) print('Validation Rate: ', valid_rate) print('Learning Rate: ', lr) print('Target: ', target) print('Monitor: ', monitor) print('Model Name: ', model_name) print('Mode: ', mode) print('Attention: ', attention) print('Save Directory: ', savedir) print('Log Directory: ', logdir) print('Device: ', device) print('Verbose: ', verbose) print() print('Evaluation: ', args.eval) if args.eval != None: print('Pixel ratio: ', kwargs['ratio']) print() print('Setting Random Seed') print() seed_everything() # seed setting ################################# # Data Load ################################# print('=====Data Load=====') if target == 'mnist': trainloader, validloader, testloader = mnist_load( batch_size=batch_size, validation_rate=valid_rate, shuffle=True) elif target == 'cifar10': trainloader, validloader, testloader = cifar10_load( batch_size=batch_size, validation_rate=valid_rate, shuffle=True) ################################# # ROAR or KAR ################################# if (args.eval == 'ROAR') or (args.eval == 'KAR'): # saliency map load filename = f'../saliency_maps/[{args.target}]{args.method}' if attention in ['CBAM', 'RAN']: filename += f'_{attention}' hf = h5py.File(f'{filename}_train.hdf5', 'r') sal_maps = np.array(hf['saliencys']) # adjust image trainloader = adjust_image(kwargs['ratio'], trainloader, sal_maps, args.eval) # hdf5 close hf.close() # model name model_name = model_name + '_{0:}_{1:}{2:.1f}'.format( args.method, args.eval, kwargs['ratio']) # check exit if os.path.isfile('{}/{}_logs.txt'.format(logdir, model_name)): sys.exit() ################################# # Load model ################################# print('=====Model Load=====') if attention == 'RAN': net = RAN(target).to(device) elif attention == 'WARN': net = WideResNetAttention(target).to(device) else: net = SimpleCNN(target, attention).to(device) n_parameters = sum([np.prod(p.size()) for p in net.parameters()]) print('Total number of parameters:', n_parameters) print() # Model compile optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.CrossEntropyLoss() ################################# # Train ################################# modeltrain = ModelTrain(model=net, data=trainloader, epochs=epochs, criterion=criterion, optimizer=optimizer, device=device, model_name=model_name, savedir=savedir, monitor=monitor, mode=mode, validation=validloader, verbose=verbose) ################################# # Test ################################# modeltest = ModelTest(model=net, data=testloader, loaddir=savedir, model_name=model_name, device=device) modeltrain.history['test_result'] = modeltest.results # History save as json file if not (os.path.isdir(logdir)): os.mkdir(logdir) with open(f'{logdir}/{model_name}_logs.txt', 'w') as outfile: json.dump(modeltrain.history, outfile)