示例#1
0
    def __init__(self, opt, motion_dim, feat_dim, dataloader):

        self.opt = opt
        self.RAN_motion = RAN(input_size=motion_dim,
                              hidden_size=32,
                              history_size=10,
                              drop_rate=0.5)
        self.RAN_feat = RAN(input_size=feat_dim,
                            hidden_size=32,
                            history_size=10,
                            drop_rate=0.5)

        self.dataloader = dataloader

        if self.opt.use_cuda:
            self.RAN_motion.cuda()
            self.RAN_feat.cuda()

        self.optimizer_motion = optim.Adam(self.RAN_motion.parameters(),
                                           lr=1e-3,
                                           betas=(0.9, 0.999))
        self.optimizer_feat = optim.Adam(self.RAN_feat.parameters(),
                                         lr=1e-3,
                                         betas=(0.9, 0.999))
示例#2
0
        for d, t in matched_indices:
            if sim_matrix[d, t] < self.min_similarity:
                unmatched_tracks.append(t)
                unmatched_detections.append(d)
            else:
                matches.append((d, t))
        return matches, unmatched_tracks, unmatched_detections


if __name__ == '__main__':
    from models import RAN

    model_save_prefix = "/scratch0/RAN/trained_model/ran"

    # load model
    ran = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5)
    ran = ran.cuda()
    ran.eval()

    bbox1_1 = np.array([500, 500, 40, 50], dtype=np.float32)
    bbox1_2 = np.array([100, 200, 60, 60], dtype=np.float32)
    bbox1_3 = np.array([400, 300, 70, 70], dtype=np.float32)
    bbox1_4 = np.array([200, 100, 80, 80], dtype=np.float32)

    bbox2_1 = np.array([512, 490, 40, 50], dtype=np.float32)
    bbox2_2 = np.array([400, 330, 70, 75], dtype=np.float32)
    bbox2_3 = np.array([110, 198, 65, 65], dtype=np.float32)
    bbox2_4 = np.array([200, 120, 85, 85], dtype=np.float32)
    bbox2_5 = np.array([100, 100, 45, 45], dtype=np.float32)

    # gt for matching:
示例#3
0
def get_samples(target,
                nb_class=10,
                sample_index=0,
                attention=None,
                device='cpu'):
    '''
    Get samples : original images, preprocessed images, target class, trained model

    args:
    - target: [mnist, cifar10]
    - nb_class: number of classes
    - example_index: index of image by class

    return:
    - original_images (numpy array): Original images, shape = (number of class, W, H, C)
    - pre_images (torch array): Preprocessing images, shape = (number of class, C, W, H)
    - target_classes (dictionary): keys = class index, values = class name
    - model (pytorch model): pretrained model
    '''

    if target == 'mnist':
        image_size = (28, 28, 1)

        _, _, testloader = mnist_load()
        testset = testloader.dataset

    elif target == 'cifar10':
        image_size = (32, 32, 3)

        _, _, testloader = cifar10_load()
        testset = testloader.dataset

    # idx2class
    target_class2idx = testset.class_to_idx
    target_classes = dict(
        zip(list(target_class2idx.values()), list(target_class2idx.keys())))

    # select images
    idx_by_class = [
        np.where(np.array(testset.targets) == i)[0][sample_index]
        for i in range(nb_class)
    ]
    original_images = testset.data[idx_by_class]
    if not isinstance(original_images, np.ndarray):
        original_images = original_images.numpy()
    original_images = original_images.reshape((nb_class, ) + image_size)
    # select targets
    if isinstance(testset.targets, list):
        original_targets = torch.LongTensor(testset.targets)[idx_by_class]
    else:
        original_targets = testset.targets[idx_by_class]

    # model load
    filename = f'simple_cnn_{target}'
    if attention in ['CAM', 'CBAM']:
        filename += f'_{attention}'
    elif attention in ['RAN', 'WARN']:
        filename = f'{target}_{attention}'
    print('filename: ', filename)
    weights = torch.load(f'../checkpoint/{filename}.pth')

    if attention == 'RAN':
        model = RAN(target).to(device)
    elif attention == 'WARN':
        model = WideResNetAttention(target).to(device)
    else:
        model = SimpleCNN(target, attention).to(device)
    model.load_state_dict(weights['model'])

    # image preprocessing
    pre_images = torch.zeros(original_images.shape)
    pre_images = np.transpose(pre_images, (0, 3, 1, 2))
    for i in range(len(original_images)):
        pre_images[i] = testset.transform(original_images[i])

    return original_images, original_targets, pre_images, target_classes, model
示例#4
0
        bbox_list.append(bbox.astype(np.float32))
        conf_list.append(confidence)

    return bbox_list, conf_list


if __name__ == '__main__':

    seq_info = gather_sequence_info('/scratch0/MOT/MOT16/train/MOT16-02', '/scratch0/MOT/MOT16/external/MOT16-02_det.txt')

    video = cv2.VideoWriter('../results/video_gt.avi', cv2.VideoWriter_fourcc(*"MJPG"), seq_info['fps'], (640, 480))

    model_path = "../results/models/RAN.pth"
    # load model
    checkpoint = torch.load(model_path)
    RAN_motion = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5)
    RAN_feat = RAN(input_size=4, hidden_size=32, history_size=10, drop_rate=0.5)

    RAN_motion.load_state_dict(checkpoint['RAN_motion'])
    RAN_feat.load_state_dict(checkpoint['RAN_feat'])
    RAN_motion = RAN_motion.cuda()
    RAN_feat = RAN_feat.cuda()

    RAN_motion.eval()
    RAN_feat.eval()
    tracker = RANTracker(RAN_motion, feat_model=None)

    for frame_idx in seq_info['image_filenames'].keys():
        bboxes, confs = create_detections(seq_info['groundtruth'], frame_idx)
        #bboxes, confs = create_detections(seq_info['detections'], frame_idx)
示例#5
0
def test(idx):
    dataroot = '/scratch0/MOT/MOT16'
    detroot = '/scratch0/MOT/MOT16/external'
    model_path = '../results/models/RAN.pth'
    video_path = '../results/visualization/sample_track.avi'

    video_handle = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*"MJPG"),
                                   20, (640, 480))

    dataset = MOT16_train_dataset(dataroot, detroot)

    # load model
    checkpoint = torch.load(model_path)
    RAN_motion = RAN(input_size=dataset.motion_dim,
                     hidden_size=32,
                     history_size=10,
                     drop_rate=0.5)
    RAN_feat = RAN(input_size=dataset.feat_dim,
                   hidden_size=32,
                   history_size=10,
                   drop_rate=0.5)

    RAN_motion.load_state_dict(checkpoint['RAN_motion'])
    RAN_feat.load_state_dict(checkpoint['RAN_feat'])
    RAN_motion = RAN_motion.cuda()
    RAN_feat = RAN_feat.cuda()

    RAN_motion.eval()
    RAN_feat.eval()

    memory_size = 10
    input_size = 4

    hidden = RAN_motion.init_hidden(batch_size=1)
    external = deque(
        [np.zeros(input_size, dtype=np.float32) for _ in range(memory_size)],
        maxlen=memory_size)

    # sample a track from training data
    bbox_data = dataset.bbox[idx]
    bbox_motion = dataset.motion[idx]
    frame_num = dataset.frame_num[idx]

    video_id = dataset.video_id[idx][0]
    image_names = dataset.image_filenames[video_id]

    bbox_gt = bbox_data.copy()
    bbox_gt[:, 0:2] -= bbox_gt[:, 2:4] / 2.0

    for f_num in np.arange(frame_num.min(), frame_num.max() + 1):
        if np.any(frame_num == f_num):
            idx = np.where(frame_num == f_num)[0][0]
            gt = bbox_gt[idx].copy()
            motion = bbox_motion[idx].copy()
            bbox = bbox_data[idx].copy()

            external.appendleft(motion)
            motion_var = to_var(motion).view(1, 1, -1)
            alpha, sigma, hidden = RAN_motion(motion_var, hidden)
            # linear combination of history
            alpha_np = to_np(alpha.squeeze())
            motion_pred = np.matmul(alpha_np, np.array(external))

            bbox_pred = bbox + motion_pred
            bbox_pred[0:2] -= bbox_pred[2:4] / 2.0

            save_to_video(video_handle, image_names[f_num], (640, 480),
                          [gt, bbox_pred], [(0, 255, 0), (0, 0, 255)])

        else:
            save_to_video(video_handle, image_names[f_num], (640, 480), [], [])

    # for bbox, motion, gt, frame in zip(bbox_data, bbox_motion, bbox_gt, frame_num):
    #
    #     external.appendleft(motion)
    #     motion_var = to_var(motion).view(1, 1, -1)
    #     alpha, sigma, hidden = RAN_motion(motion_var, hidden)
    #     # linear combination of history
    #     alpha_np = to_np(alpha.squeeze())
    #     motion_pred = np.matmul(alpha_np, np.array(external))
    #
    #     bbox_pred = bbox + motion_pred
    #     bbox_pred[0:2] -= bbox_pred[2:4] / 2.0
    #
    #     save_to_video(video_handle, image_names[frame], (640, 480), [gt, bbox_pred], [(0,255,0), (0,0,255)])

    video_handle.release()
示例#6
0
class Trainer(object):
    def __init__(self, opt, motion_dim, feat_dim, dataloader):

        self.opt = opt
        self.RAN_motion = RAN(input_size=motion_dim,
                              hidden_size=32,
                              history_size=10,
                              drop_rate=0.5)
        self.RAN_feat = RAN(input_size=feat_dim,
                            hidden_size=32,
                            history_size=10,
                            drop_rate=0.5)

        self.dataloader = dataloader

        if self.opt.use_cuda:
            self.RAN_motion.cuda()
            self.RAN_feat.cuda()

        self.optimizer_motion = optim.Adam(self.RAN_motion.parameters(),
                                           lr=1e-3,
                                           betas=(0.9, 0.999))
        self.optimizer_feat = optim.Adam(self.RAN_feat.parameters(),
                                         lr=1e-3,
                                         betas=(0.9, 0.999))

    def train(self):
        self.RAN_motion.train()
        self.RAN_feat.train()

        total_loss = []
        curr_iters = 0

        for epoch in range(self.opt.nepoch):

            for i, (motion_data, feat_data) in enumerate(self.dataloader):
                curr_iters += 1

                ########
                # Train motion model
                ########
                self.RAN_motion.zero_grad()
                padded_batch, lengths, packed_input, ext = self.prepare_data(
                    motion_data)

                hidden = self.RAN_motion.init_hidden(
                    len(lengths))  # (1, B, hidden)

                alpha, sigma, h_n = self.RAN_motion(packed_input, hidden)

                loss = loss_fn(alpha, sigma, padded_batch[1:], ext, lengths)
                loss.backward()
                self.optimizer_motion.step()

                if i == 1:
                    print('Epoch: {}, M Loss: {}'.format(
                        epoch,
                        loss.cpu().data.numpy()))

                ########
                # Train appearance model
                ########
                self.RAN_feat.zero_grad()
                padded_batch, lengths, packed_input, ext = self.prepare_data(
                    feat_data)

                hidden = self.RAN_feat.init_hidden(
                    len(lengths))  # (1, B, hidden)

                alpha, sigma, h_n = self.RAN_feat(packed_input, hidden)

                loss = loss_fn(alpha, sigma, padded_batch[1:], ext, lengths)
                loss.backward()
                self.optimizer_feat.step()

                if i == 1:
                    print('Epoch: {}, A Loss: {}'.format(
                        epoch,
                        loss.cpu().data.numpy()))

        torch.save(
            {
                'RAN_motion': self.RAN_motion.state_dict(),
                'RAN_feat': self.RAN_feat.state_dict()
            }, '{}/models/RAN.pth'.format(self.opt.outf))

    def prepare_data(self, batch_data):
        # obtain a tensor (max_length, batch_size, feat_dim) and lengths for sequences
        padded_batch, lengths = pad_packed_sequence(batch_data)
        lengths = [l - 1 for l in lengths]

        ext = generate_external(padded_batch.data.numpy()[:-1], lengths,
                                self.opt.history_size)
        ext = Variable(torch.from_numpy(ext), requires_grad=False)

        # generate input from t=0 to t=L-2
        packed_input = pack_padded_sequence(padded_batch[:-1], lengths)

        if self.opt.use_cuda:
            ext = ext.cuda()
            padded_batch = padded_batch.cuda()
            packed_input = PackedSequence(packed_input.data.cuda(),
                                          packed_input.batch_sizes)

        return padded_batch, lengths, packed_input, ext
示例#7
0
def main(args, **kwargs):
    #################################
    # Config
    #################################
    epochs = args.epochs
    batch_size = args.batch_size
    valid_rate = args.valid_rate
    lr = args.lr
    verbose = args.verbose

    # checkpoint
    target = args.target
    attention = args.attention
    monitor = args.monitor
    mode = args.mode

    # save name
    model_name = 'simple_cnn_{}'.format(target)
    if attention in ['CAM', 'CBAM']:
        model_name = model_name + '_{}'.format(attention)
    elif attention in ['RAN', 'WARN']:
        model_name = '{}_{}'.format(target, attention)

    # save directory
    savedir = '../checkpoint'
    logdir = '../logs'

    # device setting cpu or cuda(gpu)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print('=====Setting=====')
    print('Training: ', args.train)
    print('Epochs: ', epochs)
    print('Batch Size: ', batch_size)
    print('Validation Rate: ', valid_rate)
    print('Learning Rate: ', lr)
    print('Target: ', target)
    print('Monitor: ', monitor)
    print('Model Name: ', model_name)
    print('Mode: ', mode)
    print('Attention: ', attention)
    print('Save Directory: ', savedir)
    print('Log Directory: ', logdir)
    print('Device: ', device)
    print('Verbose: ', verbose)
    print()
    print('Evaluation: ', args.eval)
    if args.eval != None:
        print('Pixel ratio: ', kwargs['ratio'])
    print()
    print('Setting Random Seed')
    print()
    seed_everything()  # seed setting

    #################################
    # Data Load
    #################################
    print('=====Data Load=====')
    if target == 'mnist':
        trainloader, validloader, testloader = mnist_load(
            batch_size=batch_size, validation_rate=valid_rate, shuffle=True)

    elif target == 'cifar10':
        trainloader, validloader, testloader = cifar10_load(
            batch_size=batch_size, validation_rate=valid_rate, shuffle=True)

    #################################
    # ROAR or KAR
    #################################
    if (args.eval == 'ROAR') or (args.eval == 'KAR'):
        # saliency map load
        filename = f'../saliency_maps/[{args.target}]{args.method}'
        if attention in ['CBAM', 'RAN']:
            filename += f'_{attention}'
        hf = h5py.File(f'{filename}_train.hdf5', 'r')
        sal_maps = np.array(hf['saliencys'])
        # adjust image
        trainloader = adjust_image(kwargs['ratio'], trainloader, sal_maps,
                                   args.eval)
        # hdf5 close
        hf.close()
        # model name
        model_name = model_name + '_{0:}_{1:}{2:.1f}'.format(
            args.method, args.eval, kwargs['ratio'])

    # check exit
    if os.path.isfile('{}/{}_logs.txt'.format(logdir, model_name)):
        sys.exit()

    #################################
    # Load model
    #################################
    print('=====Model Load=====')
    if attention == 'RAN':
        net = RAN(target).to(device)
    elif attention == 'WARN':
        net = WideResNetAttention(target).to(device)
    else:
        net = SimpleCNN(target, attention).to(device)
    n_parameters = sum([np.prod(p.size()) for p in net.parameters()])
    print('Total number of parameters:', n_parameters)
    print()

    # Model compile
    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)
    criterion = nn.CrossEntropyLoss()

    #################################
    # Train
    #################################
    modeltrain = ModelTrain(model=net,
                            data=trainloader,
                            epochs=epochs,
                            criterion=criterion,
                            optimizer=optimizer,
                            device=device,
                            model_name=model_name,
                            savedir=savedir,
                            monitor=monitor,
                            mode=mode,
                            validation=validloader,
                            verbose=verbose)

    #################################
    # Test
    #################################
    modeltest = ModelTest(model=net,
                          data=testloader,
                          loaddir=savedir,
                          model_name=model_name,
                          device=device)

    modeltrain.history['test_result'] = modeltest.results

    # History save as json file
    if not (os.path.isdir(logdir)):
        os.mkdir(logdir)
    with open(f'{logdir}/{model_name}_logs.txt', 'w') as outfile:
        json.dump(modeltrain.history, outfile)