Exemplo n.º 1
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        self.rgb_model_backbone, self.rgb_model_branch = None, None
        self.flow_model_backbone, self.flow_model_branch = None, None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model_backbone, self.rgb_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.rgb_model_backbone, self.rgb_model_branch = load_inference_model(
                self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model)
            self.rgb_model_backbone = DataParallel(self.rgb_model_backbone,
                                                   device_ids=[opt.gpus[0]],
                                                   chunk_sizes=[1]).to(
                                                       opt.device)
            self.rgb_model_branch = DataParallel(self.rgb_model_branch,
                                                 device_ids=[opt.gpus[0]],
                                                 chunk_sizes=[1
                                                              ]).to(opt.device)
            self.rgb_model_backbone.eval()
            self.rgb_model_branch.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model_backbone, self.flow_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.flow_model_backbone = convert2flow(opt.ninput,
                                                    self.flow_model_backbone)
            self.flow_model_backbone, self.flow_model_branch = load_inference_model(
                self.flow_model_backbone, self.flow_model_branch,
                opt.flow_model)
            self.flow_model_backbone = DataParallel(self.flow_model_backbone,
                                                    device_ids=[opt.gpus[0]],
                                                    chunk_sizes=[1]).to(
                                                        opt.device)
            self.flow_model_branch = DataParallel(self.flow_model_branch,
                                                  device_ids=[opt.gpus[0]],
                                                  chunk_sizes=[1]).to(
                                                      opt.device)
            self.flow_model_backbone.eval()
            self.flow_model_branch.eval()

        self.num_classes = opt.num_classes
        self.opt = opt

        self.rgb_buffer = []
        self.flow_buffer = []
        self.rgb_buffer_flip = []
        self.flow_buffer_flip = []
Exemplo n.º 2
0
 def __init__(self, opt, model, optimizer=None):
     self.opt = opt
     self.optimizer = optimizer
     self.loss_stats = [
         'loss', 'loss_hm', 'loss_mov', 'loss_wh', 'loss_mgan'
     ]
     self.model_with_loss = ModleWithLoss(model, MOCTrainLoss(opt))
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.rgb_model = None
        self.flow_model = None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model = create_model(opt.arch,
                                          opt.branch_info,
                                          opt.head_conv,
                                          opt.K,
                                          flip_test=opt.flip_test)
            self.rgb_model = load_model(self.rgb_model, opt.rgb_model)
            self.rgb_model = DataParallel(self.rgb_model,
                                          device_ids=opt.gpus,
                                          chunk_sizes=opt.chunk_sizes).to(
                                              opt.device)
            self.rgb_model.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model = create_model(opt.arch,
                                           opt.branch_info,
                                           opt.head_conv,
                                           opt.K,
                                           flip_test=opt.flip_test)
            self.flow_model = convert2flow(opt.ninput, self.flow_model)
            self.flow_model = load_model(self.flow_model, opt.flow_model)

            self.flow_model = DataParallel(self.flow_model,
                                           device_ids=opt.gpus,
                                           chunk_sizes=opt.chunk_sizes).to(
                                               opt.device)
            self.flow_model.eval()
        self.num_classes = opt.num_classes
        self.opt = opt
Exemplo n.º 4
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.rgb_model = None
        self.flow_model = None
        self.pa_model = None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model = create_model(opt.arch,
                                          opt.branch_info,
                                          opt.head_conv,
                                          opt.K,
                                          flip_test=opt.flip_test,
                                          is_pa=False)
            self.rgb_model = load_model(self.rgb_model,
                                        opt.save_root + opt.rgb_model)
            '''
            # ADDED: debug param weights
            for i, child in enumerate(self.rgb_model.children()):
                if i < 2 : 
                    continue
                
                for l, param in enumerate(child.parameters()):
                    if l == 0:
                        vistensor(param)
                            
                    #param.requires_grad = False
                    #print(param.size())
            '''
            # ORIG
            self.rgb_model = DataParallel(self.rgb_model,
                                          device_ids=opt.gpus,
                                          chunk_sizes=opt.chunk_sizes).to(
                                              opt.device)

            self.rgb_model.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model = create_model(opt.arch,
                                           opt.branch_info,
                                           opt.head_conv,
                                           opt.K,
                                           flip_test=opt.flip_test)
            self.flow_model = convert2flow(opt.ninput, self.flow_model)
            self.flow_model = load_model(self.flow_model, opt.flow_model)

            self.flow_model = DataParallel(self.flow_model,
                                           device_ids=opt.gpus,
                                           chunk_sizes=opt.chunk_sizes).to(
                                               opt.device)
            self.flow_model.eval()

        if opt.pa_model != '':
            print('create PA model')
            self.pa_model = create_model(opt.arch,
                                         opt.branch_info,
                                         opt.head_conv,
                                         opt.K,
                                         flip_test=opt.flip_test,
                                         is_pa=True,
                                         pa_fuse_mode=opt.pa_fuse_mode,
                                         rgb_w3=opt.rgb_w3)

            if opt.pa_fuse_mode == 'PAN':
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            elif opt.pa_fuse_mode == 'TDN':
                pass
                #self.pa_model = convert2TDN(opt.ninput, self.pa_model, conv_idx=2) # idx 1 or 2? does not matter here as trained weight would be loaded here?

            # Single PAN stream
            else:
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            self.pa_model = load_model(self.pa_model,
                                       opt.save_root + opt.pa_model)

            self.pa_model = DataParallel(
                self.pa_model,
                device_ids=opt.gpus,  #[0]
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.pa_model.eval()

        self.num_classes = opt.num_classes
        self.opt = opt

        # added: for speed measurement
        self.total_time = 0
Exemplo n.º 5
0
class MOCDetector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.rgb_model = None
        self.flow_model = None
        self.pa_model = None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model = create_model(opt.arch,
                                          opt.branch_info,
                                          opt.head_conv,
                                          opt.K,
                                          flip_test=opt.flip_test,
                                          is_pa=False)
            self.rgb_model = load_model(self.rgb_model,
                                        opt.save_root + opt.rgb_model)
            '''
            # ADDED: debug param weights
            for i, child in enumerate(self.rgb_model.children()):
                if i < 2 : 
                    continue
                
                for l, param in enumerate(child.parameters()):
                    if l == 0:
                        vistensor(param)
                            
                    #param.requires_grad = False
                    #print(param.size())
            '''
            # ORIG
            self.rgb_model = DataParallel(self.rgb_model,
                                          device_ids=opt.gpus,
                                          chunk_sizes=opt.chunk_sizes).to(
                                              opt.device)

            self.rgb_model.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model = create_model(opt.arch,
                                           opt.branch_info,
                                           opt.head_conv,
                                           opt.K,
                                           flip_test=opt.flip_test)
            self.flow_model = convert2flow(opt.ninput, self.flow_model)
            self.flow_model = load_model(self.flow_model, opt.flow_model)

            self.flow_model = DataParallel(self.flow_model,
                                           device_ids=opt.gpus,
                                           chunk_sizes=opt.chunk_sizes).to(
                                               opt.device)
            self.flow_model.eval()

        if opt.pa_model != '':
            print('create PA model')
            self.pa_model = create_model(opt.arch,
                                         opt.branch_info,
                                         opt.head_conv,
                                         opt.K,
                                         flip_test=opt.flip_test,
                                         is_pa=True,
                                         pa_fuse_mode=opt.pa_fuse_mode,
                                         rgb_w3=opt.rgb_w3)

            if opt.pa_fuse_mode == 'PAN':
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            elif opt.pa_fuse_mode == 'TDN':
                pass
                #self.pa_model = convert2TDN(opt.ninput, self.pa_model, conv_idx=2) # idx 1 or 2? does not matter here as trained weight would be loaded here?

            # Single PAN stream
            else:
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            self.pa_model = load_model(self.pa_model,
                                       opt.save_root + opt.pa_model)

            self.pa_model = DataParallel(
                self.pa_model,
                device_ids=opt.gpus,  #[0]
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.pa_model.eval()

        self.num_classes = opt.num_classes
        self.opt = opt

        # added: for speed measurement
        self.total_time = 0

    def pre_process(self, images, is_flow=False, ninput=1):

        K = self.opt.K
        images = [
            cv2.resize(im, (self.opt.resize_height, self.opt.resize_width),
                       interpolation=cv2.INTER_LINEAR) for im in images
        ]

        if self.opt.flip_test:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32) for i in range(K * 2)
            ]
        else:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32) for i in range(K)
            ]

        mean = np.tile(
            np.array(self.opt.mean, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))
        std = np.tile(
            np.array(self.opt.std, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))

        for i in range(K):
            for ii in range(ninput):
                data[i][3 * ii:3 * ii + 3, :, :] = np.transpose(
                    images[i * ninput + ii], (2, 0, 1))  # added: *ninput
                if self.opt.flip_test:
                    # TODO
                    if is_flow:
                        temp = images[i + ii].copy()
                        temp = temp[:, ::-1, :]
                        temp[:, :, 2] = 255 - temp[:, :, 2]
                        data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose(
                            temp, (2, 0, 1))
                    else:
                        data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose(
                            images[i + ii], (2, 0, 1))[:, :, ::-1]
            # normalize
            data[i] = ((data[i] / 255.) - mean) / std
            if self.opt.flip_test:
                data[i + K] = ((data[i + K] / 255.) - mean) / std
        return data

    def process(self, images, flows):
        with torch.no_grad():
            if self.rgb_model is not None:
                rgb_output = self.rgb_model(images)
                #rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_hm = rgb_output[0]['hm']
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']

                # ADDED: one additional loss
                #rgb_hmc = rgb_output[0]['hmc']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model is not None:
                flow_output = self.flow_model(flows)
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.pa_model is not None:
                pa_output = self.pa_model(flows)
                pa_hm = pa_output[0]['hm'].sigmoid_()
                pa_wh = pa_output[0]['wh']
                pa_mov = pa_output[0]['mov']

            if self.flow_model is not None and self.rgb_model is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model is not None and self.rgb_model is None and self.pa_model is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model is not None and self.flow_model is None and self.pa_model is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov

            # TODO: two stream for rgb + pa
            elif self.pa_model is not None and self.rgb_model is not None and self.flow_model is None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * pa_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * pa_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * pa_mov + self.opt.mov_fusion_rgb * rgb_mov

            elif self.pa_model is not None and self.rgb_model is None and self.flow_model is None:
                hm = pa_hm
                wh = pa_wh
                mov = pa_mov

            else:
                print('No model exists.')
                assert 0

            # ADDED: minus mem (only detect on current clip)

            #mov = None
            detections = moc_decode(hm,
                                    wh,
                                    mov,
                                    N=self.opt.N,
                                    K=self.opt.K - 0)

            #hm = hm[:,42:63,:,:]
            #detections = moc_decode_multihm(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0)
            return detections

    def post_process(self, detections, height, width, output_height,
                     output_width, num_classes, K):
        detections = detections.detach().cpu().numpy()

        results = []
        for i in range(detections.shape[0]):  # batch
            top_preds = {}
            for j in range((detections.shape[2] - 2) // 2):
                # tailor bbox to prevent out of bounds
                detections[i, :, 2 * j] = np.maximum(
                    0,
                    np.minimum(width - 1,
                               detections[i, :, 2 * j] / output_width * width))
                detections[i, :, 2 * j + 1] = np.maximum(
                    0,
                    np.minimum(
                        height - 1,
                        detections[i, :, 2 * j + 1] / output_height * height))
            classes = detections[i, :, -1]
            # gather bbox for each class
            for c in range(self.opt.num_classes):
                inds = (classes == c)
                top_preds[c + 1] = detections[i,
                                              inds, :4 * (K - 0) + 1].astype(
                                                  np.float32)  # ORIG: just K
            results.append(top_preds)
        return results

    def run(self, data):

        flows = None
        images = None

        if self.rgb_model is not None:
            images = data['images']

            for i in range(len(images)):
                '''
                # ADDED: vis for debug
                # data[i] = ((data[i] / 255.) - mean) / std
                image_temp = images[i].numpy().squeeze().transpose(1,2,0)
                image_temp = ((image_temp * self.opt.std + self.opt.mean) * 255).astype(np.uint8)
                image_temp = cv2.cvtColor(image_temp, cv2.COLOR_BGR2RGB)
                plt.imshow(image_temp)
                plt.show()
              '''
                images[i] = images[i].to(self.opt.device)
        if self.flow_model is not None:
            flows = data['flows']
            for i in range(len(flows)):
                flows[i] = flows[i].to(self.opt.device)

        if self.pa_model is not None:
            flows = data['flows']
            for i in range(len(flows)):
                flows[i] = flows[i].to(self.opt.device)

        meta = data['meta']
        meta = {k: v.numpy()[0] for k, v in meta.items()}

        detection_start = time.time()
        detections = self.process(images, flows)
        detection_end = time.time()
        self.total_time += detection_end - detection_start

        detections = self.post_process(detections, meta['height'],
                                       meta['width'], meta['output_height'],
                                       meta['output_width'],
                                       self.opt.num_classes, self.opt.K)

        return detections, self.total_time
Exemplo n.º 6
0
class MOCTrainer(object):
    def __init__(self, opt, model, optimizer=None):
        self.opt = opt
        self.optimizer = optimizer
        self.loss_stats = [
            'loss', 'loss_hm', 'loss_mov', 'loss_wh', 'loss_mgan'
        ]
        self.model_with_loss = ModleWithLoss(model, MOCTrainLoss(opt))

    def train(self, epoch, data_loader, writer):
        return self.run_epoch('train', epoch, data_loader, writer)

    def val(self, epoch, data_loader, writer):
        return self.run_epoch('val', epoch, data_loader, writer)

    def run_epoch(self, phase, epoch, data_loader, writer):
        model_with_loss = self.model_with_loss
        if phase == 'train':
            model_with_loss.train()
        else:
            model_with_loss.eval()
            torch.cuda.empty_cache()

        opt = self.opt
        avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
        num_iters = len(data_loader)
        # num_iters = 10
        bar = Bar(opt.exp_id, max=num_iters)
        for iter, batch in enumerate(data_loader):
            if iter >= num_iters:
                break

            for k in batch:
                if k == 'input':
                    assert len(batch[k]) == self.opt.K
                    for i in range(len(batch[k])):
                        # MODIFY for pytorch 0.4.0
                        # batch[k][i] = batch[k][i].to(device=opt.device)
                        batch[k][i] = batch[k][i].to(device=opt.device,
                                                     non_blocking=True)
                else:
                    # MODIFY for pytorch 0.4.0
                    # batch[k] = batch[k].to(device=opt.device)
                    batch[k] = batch[k].to(device=opt.device,
                                           non_blocking=True)
            output, loss, loss_stats = model_with_loss(batch)
            loss = loss.mean()
            if phase == 'train':
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
                epoch,
                iter,
                num_iters,
                phase=phase,
                total=bar.elapsed_td,
                eta=bar.eta_td)

            step = iter // opt.visual_per_inter + num_iters // opt.visual_per_inter * (
                epoch - 1)

            for l in self.loss_stats:
                avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                         batch['input'][0].size(0))

                if phase == 'train' and iter % opt.visual_per_inter == 0 and iter != 0:
                    writer.add_scalar('train/{}'.format(l),
                                      avg_loss_stats[l].avg, step)
                    writer.flush()
                Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(
                    l, avg_loss_stats[l].avg)
            bar.next()
            del output, loss, loss_stats

        bar.finish()
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        ret['time'] = bar.elapsed_td.total_seconds() / 60.
        return ret

    def set_device(self, gpus, chunk_sizes, device):
        if len(gpus) > 1:
            self.model_with_loss = DataParallel(
                self.model_with_loss, device_ids=gpus,
                chunk_sizes=chunk_sizes).to(device)
        else:
            self.model_with_loss = self.model_with_loss.to(device)

        for state in self.optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    # MODIFY for pytorch 0.4.0
                    state[k] = v.to(device=device, non_blocking=True)
class MOCDetector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            assert 'cpu is not supported!'

        self.rgb_model_backbone, self.rgb_model_branch = None, None
        self.flow_model_backbone, self.flow_model_branch = None, None
        if opt.rgb_model != '':
            self.rgb_model_backbone, self.rgb_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            print('create rgb model', flush=True)
            self.rgb_model_backbone, self.rgb_model_branch = load_inference_model(
                self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model)
            print('load rgb model', flush=True)
            self.rgb_model_backbone = DataParallel(self.rgb_model_backbone,
                                                   device_ids=[opt.gpus[0]],
                                                   chunk_sizes=[1]).to(
                                                       opt.device)
            self.rgb_model_branch = DataParallel(self.rgb_model_branch,
                                                 device_ids=[opt.gpus[0]],
                                                 chunk_sizes=[1
                                                              ]).to(opt.device)
            print('put rgb model to gpu', flush=True)
            self.rgb_model_backbone.eval()
            self.rgb_model_branch.eval()
        if opt.flow_model != '':
            self.flow_model_backbone, self.flow_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.flow_model_backbone = convert2flow(opt.ninput,
                                                    self.flow_model_backbone)
            print('create flow model', flush=True)
            self.flow_model_backbone, self.flow_model_branch = load_inference_model(
                self.flow_model_backbone, self.flow_model_branch,
                opt.flow_model)
            print('load flow model', flush=True)
            self.flow_model_backbone = DataParallel(self.flow_model_backbone,
                                                    device_ids=[opt.gpus[0]],
                                                    chunk_sizes=[1]).to(
                                                        opt.device)
            self.flow_model_branch = DataParallel(self.flow_model_branch,
                                                  device_ids=[opt.gpus[0]],
                                                  chunk_sizes=[1]).to(
                                                      opt.device)
            print('put flow model to gpu', flush=True)
            self.flow_model_backbone.eval()
            self.flow_model_branch.eval()

        self.num_classes = opt.num_classes
        self.opt = opt

        self.rgb_buffer = []
        self.flow_buffer = []
        self.rgb_buffer_flip = []
        self.flow_buffer_flip = []

    def pre_process(self, images, is_flow=False, ninput=1):

        K = self.opt.K
        images = [
            cv2.resize(im, (self.opt.resize_width, self.opt.resize_height),
                       interpolation=cv2.INTER_LINEAR) for im in images
        ]

        if self.opt.flip_test:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32) for i in range(K * 2)
            ]
        else:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32) for i in range(K)
            ]

        mean = np.tile(
            np.array(self.opt.mean, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))
        std = np.tile(
            np.array(self.opt.std, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))

        for i in range(K):
            for ii in range(ninput):
                data[i][3 * ii:3 * ii + 3, :, :] = np.transpose(
                    images[i + ii], (2, 0, 1))
                if self.opt.flip_test:
                    # TODO
                    if is_flow:
                        temp = images[i + ii].copy()
                        temp = temp[:, ::-1, :]
                        temp[:, :, 2] = 255 - temp[:, :, 2]
                        data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose(
                            temp, (2, 0, 1))
                    else:
                        data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose(
                            images[i + ii], (2, 0, 1))[:, :, ::-1]
            # normalize
            data[i] = ((data[i] / 255.) - mean) / std
            if self.opt.flip_test:
                data[i + K] = ((data[i + K] / 255.) - mean) / std
        return data

    def pre_process_single_frame(self,
                                 images,
                                 is_flow=False,
                                 ninput=1,
                                 data_last=None,
                                 data_last_flip=None):
        images = cv2.resize(images,
                            (self.opt.resize_height, self.opt.resize_width),
                            interpolation=cv2.INTER_LINEAR)

        data = np.empty(
            (3 * ninput, self.opt.resize_height, self.opt.resize_width),
            dtype=np.float32)
        data_flip = np.empty(
            (3 * ninput, self.opt.resize_height, self.opt.resize_width),
            dtype=np.float32)

        mean = np.array(self.opt.mean, dtype=np.float32)[:, None, None]
        std = np.array(self.opt.std, dtype=np.float32)[:, None, None]
        if not is_flow:
            data = np.transpose(images, (2, 0, 1))
            if self.opt.flip_test:
                data_flip = np.transpose(images, (2, 0, 1))[:, :, ::-1]
            data = ((data / 255.) - mean) / std
            if self.opt.flip_test:
                data_flip = ((data_flip / 255.) - mean) / std

        else:
            data[:3 * ninput - 3, :, :] = data_last[3:, :, :]
            data[3 * ninput -
                 3:, :, :] = (np.transpose(images,
                                           (2, 0, 1)) / 255. - mean) / std
            if self.opt.flip_test:
                temp = images.copy()
                temp = temp[:, ::-1, :]
                temp[:, :, 2] = 255 - temp[:, :, 2]
                data_flip[:3 * ninput - 3, :, :] = data_last_flip[3:, :, :]
                data_flip[3 * ninput -
                          3:, :, :] = (np.transpose(temp, (2, 0, 1)) / 255. -
                                       mean) / std
        return data, data_flip

    def process(self, images, flows, video_tag):
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                if video_tag == 0:
                    rgb_features = [
                        self.rgb_model_backbone(images[i])
                        for i in range(self.opt.K)
                    ]
                    self.rgb_buffer = rgb_features
                    if self.opt.flip_test:
                        rgb_features_flip = [
                            self.rgb_model_backbone(images[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        self.rgb_buffer_flip = rgb_features_flip
                else:
                    del self.rgb_buffer[0]
                    self.rgb_buffer.append(
                        self.rgb_model_backbone(images[self.opt.K - 1]))
                    if self.opt.flip_test:
                        del self.rgb_buffer_flip[0]
                        self.rgb_buffer_flip.append(
                            self.rgb_model_backbone(images[-1]))
                rgb_output = self.rgb_model_branch(self.rgb_buffer,
                                                   self.rgb_buffer_flip)
                rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None:
                if video_tag == 0:
                    flow_features = [
                        self.flow_model_backbone(flows[i])
                        for i in range(self.opt.K)
                    ]
                    self.flow_buffer = flow_features
                    if self.opt.flip_test:
                        flow_features_flip = [
                            self.flow_model_backbone(flows[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        self.flow_buffer_flip = flow_features_flip
                else:
                    del self.flow_buffer[0]
                    self.flow_buffer.append(
                        self.flow_model_backbone(flows[self.opt.K - 1]))
                    if self.opt.flip_test:
                        del self.flow_buffer_flip[0]
                        self.flow_buffer_flip.append(
                            self.flow_model_backbone(flows[-1]))
                flow_output = self.flow_model_branch(self.flow_buffer,
                                                     self.flow_buffer_flip)
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.flow_model_backbone is not None and self.rgb_model_backbone is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model_backbone is not None and self.rgb_model_backbone is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model_backbone is not None and self.flow_model_backbone is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov
            else:
                print('No model exists.')
                assert 0

            detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K)
            return detections

    def post_process(self, detections, height, width, output_height,
                     output_width, num_classes, K):
        detections = detections.detach().cpu().numpy()

        results = []
        for i in range(detections.shape[0]):
            top_preds = {}
            for j in range((detections.shape[2] - 2) // 2):
                # tailor bbox to prevent out of bounds
                detections[i, :, 2 * j] = np.maximum(
                    0,
                    np.minimum(width - 1,
                               detections[i, :, 2 * j] / output_width * width))
                detections[i, :, 2 * j + 1] = np.maximum(
                    0,
                    np.minimum(
                        height - 1,
                        detections[i, :, 2 * j + 1] / output_height * height))
            classes = detections[i, :, -1]
            # gather bbox for each class
            for c in range(self.opt.num_classes):
                inds = (classes == c)
                top_preds[c + 1] = detections[i, inds, :4 * K + 1].astype(
                    np.float32)
            results.append(top_preds)
            return results

    def run(self, data):

        flows = None
        images = None

        if self.rgb_model_backbone is not None:
            images = data['images']
            for i in range(len(images)):
                images[i] = images[i].to(self.opt.device)
        if self.flow_model_backbone is not None:
            flows = data['flows']
            for i in range(len(flows)):
                flows[i] = flows[i].to(self.opt.device)

        meta = data['meta']
        meta = {k: v.numpy()[0] for k, v in meta.items()}

        detections = self.process(images, flows, data['video_tag'])

        detections = self.post_process(detections, meta['height'],
                                       meta['width'], meta['output_height'],
                                       meta['output_width'],
                                       self.opt.num_classes, self.opt.K)

        return detections
Exemplo n.º 8
0
class MOCDetector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        self.rgb_model_backbone, self.rgb_model_branch = None, None
        self.flow_model_backbone, self.flow_model_branch = None, None
        self.num_classes = opt.num_classes
        self.opt = opt

    def load_backbone(self):
        opt = self.opt
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model_backbone, self.rgb_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.rgb_model_backbone, self.rgb_model_branch = load_inference_model(
                self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model)
            self.rgb_model_backbone = DataParallel(
                self.rgb_model_backbone,
                device_ids=opt.gpus,
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.rgb_model_backbone.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model_backbone, self.flow_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.flow_model_backbone = convert2flow(opt.ninput,
                                                    self.flow_model_backbone)
            self.flow_model_backbone, self.flow_model_branch = load_inference_model(
                self.flow_model_backbone, self.flow_model_branch,
                opt.flow_model)
            self.flow_model_backbone = DataParallel(
                self.flow_model_backbone,
                device_ids=opt.gpus,
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.flow_model_backbone.eval()

    def load_branch(self):
        opt = self.opt
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model_backbone, self.rgb_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.rgb_model_backbone, self.rgb_model_branch = load_inference_model(
                self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model)
            self.rgb_model_branch = DataParallel(
                self.rgb_model_branch,
                device_ids=opt.gpus,
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.rgb_model_branch.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model_backbone, self.flow_model_branch = create_inference_model(
                opt.arch,
                opt.branch_info,
                opt.head_conv,
                opt.K,
                flip_test=opt.flip_test)
            self.flow_model_backbone = convert2flow(opt.ninput,
                                                    self.flow_model_backbone)
            self.flow_model_backbone, self.flow_model_branch = load_inference_model(
                self.flow_model_backbone, self.flow_model_branch,
                opt.flow_model)
            self.flow_model_branch = DataParallel(
                self.flow_model_branch,
                device_ids=opt.gpus,
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.flow_model_branch.eval()

    def pre_process(self, images, is_flow=False, ninput=1):

        images = [
            cv2.resize(im, (self.opt.resize_height, self.opt.resize_width),
                       interpolation=cv2.INTER_LINEAR) for im in images
        ]

        if self.opt.flip_test:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32) for i in range(2)
            ]
        else:
            data = [
                np.empty((3 * ninput, self.opt.resize_height,
                          self.opt.resize_width),
                         dtype=np.float32)
            ]

        mean = np.tile(
            np.array(self.opt.mean, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))
        std = np.tile(
            np.array(self.opt.std, dtype=np.float32)[:, None, None],
            (ninput, 1, 1))

        for ii in range(ninput):
            data[0][3 * ii:3 * ii + 3, :, :] = np.transpose(
                images[ii], (2, 0, 1))
            if self.opt.flip_test:
                if is_flow:
                    temp = images[ii].copy()
                    temp = temp[:, ::-1, :]
                    temp[:, :, 2] = 255 - temp[:, :, 2]
                    data[1][3 * ii:3 * ii + 3, :, :] = np.transpose(
                        temp, (2, 0, 1))
                else:
                    data[1][3 * ii:3 * ii + 3, :, :] = np.transpose(
                        images[ii], (2, 0, 1))[:, :, ::-1]
        # normalize
        data[0] = ((data[0] / 255.) - mean) / std
        if self.opt.flip_test:
            data[1] = ((data[1] / 255.) - mean) / std
        return data

    def extract_feature(self, data):

        flows = None
        images = None

        if self.rgb_model_backbone is not None:
            images = data['images']
            for i in range(len(images)):
                images[i] = images[i].to(self.opt.device)
        if self.flow_model_backbone is not None:
            flows = data['flows']
            for i in range(len(flows)):
                flows[i] = flows[i].to(self.opt.device)

        rgb_features, rgb_features_flip, flow_features, flow_features_flip = None, None, None, None
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                rgb_features = self.rgb_model_backbone(images[0])

                if self.opt.flip_test:
                    rgb_features_flip = self.rgb_model_backbone(images[1])

            if self.flow_model_backbone is not None:
                if self.flow_model_backbone is not None:
                    flow_features = self.flow_model_backbone(flows[0])

                    if self.opt.flip_test:
                        flow_features_flip = self.flow_model_backbone(flows[1])

            return rgb_features, rgb_features_flip, flow_features, flow_features_flip

    def det_process(self, feature):
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                rgb_output = self.rgb_model_branch(
                    feature['rgb_features'], feature['rgb_features_flip'])
                rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None:
                flow_output = self.flow_model_branch(
                    feature['flow_features'], feature['flow_features_flip'])
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.flow_model_backbone is not None and self.rgb_model_backbone is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model_backbone is not None and self.rgb_model_backbone is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model_backbone is not None and self.flow_model_backbone is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov
            else:
                print('No model exists.')
                assert 0

            detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K)
            return detections

    def post_process(self, detections, height, width, output_height,
                     output_width, num_classes, K):
        detections = detections.detach().cpu().numpy()

        results = []
        for i in range(detections.shape[0]):
            top_preds = {}
            for j in range((detections.shape[2] - 2) // 2):
                # tailor bbox to prevent out of bounds
                detections[i, :, 2 * j] = np.maximum(
                    0,
                    np.minimum(width - 1,
                               detections[i, :, 2 * j] / output_width * width))
                detections[i, :, 2 * j + 1] = np.maximum(
                    0,
                    np.minimum(
                        height - 1,
                        detections[i, :, 2 * j + 1] / output_height * height))
            classes = detections[i, :, -1]
            # gather bbox for each class
            for c in range(self.opt.num_classes):
                inds = (classes == c)
                top_preds[c + 1] = np.concatenate([
                    detections[i, inds, :4 * K].astype(np.float32),
                    detections[i, inds, 4 * K:4 * K + 1].astype(np.float32)
                ],
                                                  axis=1).tolist()
            results.append(top_preds)

        for i in range(len(results)):
            for j in range(1, self.num_classes + 1):
                results[i][j] = np.array(results[i][j],
                                         dtype=np.float32).reshape(
                                             -1, self.opt.K * 4 + 1)
        return results

    def run(self, data):
        if self.rgb_model_backbone is not None:
            for i in range(self.opt.K):
                data['rgb_features'][i] = data['rgb_features'][i].to(
                    self.opt.device)
            if self.opt.flip_test:
                for i in range(self.opt.K):
                    data['rgb_features_flip'][i] = data['rgb_features_flip'][
                        i].to(self.opt.device)
        if self.flow_model_backbone is not None:
            for i in range(self.opt.K):
                data['flow_features'][i] = data['flow_features'][i].to(
                    self.opt.device)
            if self.opt.flip_test:
                for i in range(self.opt.K):
                    data['flow_features_flip'][i] = data['flow_features_flip'][
                        i].to(self.opt.device)

        meta = data['meta']
        meta = {k: v.numpy()[0] for k, v in meta.items()}

        # detections--->[b, N, 4*K+1+1]  (bboxes, scores, classes)
        detections = self.det_process(data)

        # detections--->[b, class, 4*K+1]  (bboxes, scores)
        detections = self.post_process(detections, meta['height'],
                                       meta['width'], meta['output_height'],
                                       meta['output_width'],
                                       self.opt.num_classes, self.opt.K)

        return detections