def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.rgb_model_backbone, self.rgb_model_branch = None, None self.flow_model_backbone, self.flow_model_branch = None, None if opt.rgb_model != '': print('create rgb model') self.rgb_model_backbone, self.rgb_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.rgb_model_backbone, self.rgb_model_branch = load_inference_model( self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model) self.rgb_model_backbone = DataParallel(self.rgb_model_backbone, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) self.rgb_model_branch = DataParallel(self.rgb_model_branch, device_ids=[opt.gpus[0]], chunk_sizes=[1 ]).to(opt.device) self.rgb_model_backbone.eval() self.rgb_model_branch.eval() if opt.flow_model != '': print('create flow model') self.flow_model_backbone, self.flow_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model_backbone = convert2flow(opt.ninput, self.flow_model_backbone) self.flow_model_backbone, self.flow_model_branch = load_inference_model( self.flow_model_backbone, self.flow_model_branch, opt.flow_model) self.flow_model_backbone = DataParallel(self.flow_model_backbone, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) self.flow_model_branch = DataParallel(self.flow_model_branch, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) self.flow_model_backbone.eval() self.flow_model_branch.eval() self.num_classes = opt.num_classes self.opt = opt self.rgb_buffer = [] self.flow_buffer = [] self.rgb_buffer_flip = [] self.flow_buffer_flip = []
def __init__(self, opt, model, optimizer=None): self.opt = opt self.optimizer = optimizer self.loss_stats = [ 'loss', 'loss_hm', 'loss_mov', 'loss_wh', 'loss_mgan' ] self.model_with_loss = ModleWithLoss(model, MOCTrainLoss(opt))
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.rgb_model = None self.flow_model = None if opt.rgb_model != '': print('create rgb model') self.rgb_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.rgb_model = load_model(self.rgb_model, opt.rgb_model) self.rgb_model = DataParallel(self.rgb_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.rgb_model.eval() if opt.flow_model != '': print('create flow model') self.flow_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model = convert2flow(opt.ninput, self.flow_model) self.flow_model = load_model(self.flow_model, opt.flow_model) self.flow_model = DataParallel(self.flow_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.flow_model.eval() self.num_classes = opt.num_classes self.opt = opt
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.rgb_model = None self.flow_model = None self.pa_model = None if opt.rgb_model != '': print('create rgb model') self.rgb_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test, is_pa=False) self.rgb_model = load_model(self.rgb_model, opt.save_root + opt.rgb_model) ''' # ADDED: debug param weights for i, child in enumerate(self.rgb_model.children()): if i < 2 : continue for l, param in enumerate(child.parameters()): if l == 0: vistensor(param) #param.requires_grad = False #print(param.size()) ''' # ORIG self.rgb_model = DataParallel(self.rgb_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.rgb_model.eval() if opt.flow_model != '': print('create flow model') self.flow_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model = convert2flow(opt.ninput, self.flow_model) self.flow_model = load_model(self.flow_model, opt.flow_model) self.flow_model = DataParallel(self.flow_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.flow_model.eval() if opt.pa_model != '': print('create PA model') self.pa_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test, is_pa=True, pa_fuse_mode=opt.pa_fuse_mode, rgb_w3=opt.rgb_w3) if opt.pa_fuse_mode == 'PAN': self.pa_model = convert2PAN(opt.ninput, self.pa_model, conv_idx=1) elif opt.pa_fuse_mode == 'TDN': pass #self.pa_model = convert2TDN(opt.ninput, self.pa_model, conv_idx=2) # idx 1 or 2? does not matter here as trained weight would be loaded here? # Single PAN stream else: self.pa_model = convert2PAN(opt.ninput, self.pa_model, conv_idx=1) self.pa_model = load_model(self.pa_model, opt.save_root + opt.pa_model) self.pa_model = DataParallel( self.pa_model, device_ids=opt.gpus, #[0] chunk_sizes=opt.chunk_sizes).to(opt.device) self.pa_model.eval() self.num_classes = opt.num_classes self.opt = opt # added: for speed measurement self.total_time = 0
class MOCDetector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.rgb_model = None self.flow_model = None self.pa_model = None if opt.rgb_model != '': print('create rgb model') self.rgb_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test, is_pa=False) self.rgb_model = load_model(self.rgb_model, opt.save_root + opt.rgb_model) ''' # ADDED: debug param weights for i, child in enumerate(self.rgb_model.children()): if i < 2 : continue for l, param in enumerate(child.parameters()): if l == 0: vistensor(param) #param.requires_grad = False #print(param.size()) ''' # ORIG self.rgb_model = DataParallel(self.rgb_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.rgb_model.eval() if opt.flow_model != '': print('create flow model') self.flow_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model = convert2flow(opt.ninput, self.flow_model) self.flow_model = load_model(self.flow_model, opt.flow_model) self.flow_model = DataParallel(self.flow_model, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to( opt.device) self.flow_model.eval() if opt.pa_model != '': print('create PA model') self.pa_model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test, is_pa=True, pa_fuse_mode=opt.pa_fuse_mode, rgb_w3=opt.rgb_w3) if opt.pa_fuse_mode == 'PAN': self.pa_model = convert2PAN(opt.ninput, self.pa_model, conv_idx=1) elif opt.pa_fuse_mode == 'TDN': pass #self.pa_model = convert2TDN(opt.ninput, self.pa_model, conv_idx=2) # idx 1 or 2? does not matter here as trained weight would be loaded here? # Single PAN stream else: self.pa_model = convert2PAN(opt.ninput, self.pa_model, conv_idx=1) self.pa_model = load_model(self.pa_model, opt.save_root + opt.pa_model) self.pa_model = DataParallel( self.pa_model, device_ids=opt.gpus, #[0] chunk_sizes=opt.chunk_sizes).to(opt.device) self.pa_model.eval() self.num_classes = opt.num_classes self.opt = opt # added: for speed measurement self.total_time = 0 def pre_process(self, images, is_flow=False, ninput=1): K = self.opt.K images = [ cv2.resize(im, (self.opt.resize_height, self.opt.resize_width), interpolation=cv2.INTER_LINEAR) for im in images ] if self.opt.flip_test: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) for i in range(K * 2) ] else: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) for i in range(K) ] mean = np.tile( np.array(self.opt.mean, dtype=np.float32)[:, None, None], (ninput, 1, 1)) std = np.tile( np.array(self.opt.std, dtype=np.float32)[:, None, None], (ninput, 1, 1)) for i in range(K): for ii in range(ninput): data[i][3 * ii:3 * ii + 3, :, :] = np.transpose( images[i * ninput + ii], (2, 0, 1)) # added: *ninput if self.opt.flip_test: # TODO if is_flow: temp = images[i + ii].copy() temp = temp[:, ::-1, :] temp[:, :, 2] = 255 - temp[:, :, 2] data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose( temp, (2, 0, 1)) else: data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose( images[i + ii], (2, 0, 1))[:, :, ::-1] # normalize data[i] = ((data[i] / 255.) - mean) / std if self.opt.flip_test: data[i + K] = ((data[i + K] / 255.) - mean) / std return data def process(self, images, flows): with torch.no_grad(): if self.rgb_model is not None: rgb_output = self.rgb_model(images) #rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_hm = rgb_output[0]['hm'] rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] # ADDED: one additional loss #rgb_hmc = rgb_output[0]['hmc'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model is not None: flow_output = self.flow_model(flows) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.pa_model is not None: pa_output = self.pa_model(flows) pa_hm = pa_output[0]['hm'].sigmoid_() pa_wh = pa_output[0]['wh'] pa_mov = pa_output[0]['mov'] if self.flow_model is not None and self.rgb_model is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model is not None and self.rgb_model is None and self.pa_model is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model is not None and self.flow_model is None and self.pa_model is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov # TODO: two stream for rgb + pa elif self.pa_model is not None and self.rgb_model is not None and self.flow_model is None: hm = (1 - self.opt.hm_fusion_rgb ) * pa_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * pa_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * pa_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.pa_model is not None and self.rgb_model is None and self.flow_model is None: hm = pa_hm wh = pa_wh mov = pa_mov else: print('No model exists.') assert 0 # ADDED: minus mem (only detect on current clip) #mov = None detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0) #hm = hm[:,42:63,:,:] #detections = moc_decode_multihm(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0) return detections def post_process(self, detections, height, width, output_height, output_width, num_classes, K): detections = detections.detach().cpu().numpy() results = [] for i in range(detections.shape[0]): # batch top_preds = {} for j in range((detections.shape[2] - 2) // 2): # tailor bbox to prevent out of bounds detections[i, :, 2 * j] = np.maximum( 0, np.minimum(width - 1, detections[i, :, 2 * j] / output_width * width)) detections[i, :, 2 * j + 1] = np.maximum( 0, np.minimum( height - 1, detections[i, :, 2 * j + 1] / output_height * height)) classes = detections[i, :, -1] # gather bbox for each class for c in range(self.opt.num_classes): inds = (classes == c) top_preds[c + 1] = detections[i, inds, :4 * (K - 0) + 1].astype( np.float32) # ORIG: just K results.append(top_preds) return results def run(self, data): flows = None images = None if self.rgb_model is not None: images = data['images'] for i in range(len(images)): ''' # ADDED: vis for debug # data[i] = ((data[i] / 255.) - mean) / std image_temp = images[i].numpy().squeeze().transpose(1,2,0) image_temp = ((image_temp * self.opt.std + self.opt.mean) * 255).astype(np.uint8) image_temp = cv2.cvtColor(image_temp, cv2.COLOR_BGR2RGB) plt.imshow(image_temp) plt.show() ''' images[i] = images[i].to(self.opt.device) if self.flow_model is not None: flows = data['flows'] for i in range(len(flows)): flows[i] = flows[i].to(self.opt.device) if self.pa_model is not None: flows = data['flows'] for i in range(len(flows)): flows[i] = flows[i].to(self.opt.device) meta = data['meta'] meta = {k: v.numpy()[0] for k, v in meta.items()} detection_start = time.time() detections = self.process(images, flows) detection_end = time.time() self.total_time += detection_end - detection_start detections = self.post_process(detections, meta['height'], meta['width'], meta['output_height'], meta['output_width'], self.opt.num_classes, self.opt.K) return detections, self.total_time
class MOCTrainer(object): def __init__(self, opt, model, optimizer=None): self.opt = opt self.optimizer = optimizer self.loss_stats = [ 'loss', 'loss_hm', 'loss_mov', 'loss_wh', 'loss_mgan' ] self.model_with_loss = ModleWithLoss(model, MOCTrainLoss(opt)) def train(self, epoch, data_loader, writer): return self.run_epoch('train', epoch, data_loader, writer) def val(self, epoch, data_loader, writer): return self.run_epoch('val', epoch, data_loader, writer) def run_epoch(self, phase, epoch, data_loader, writer): model_with_loss = self.model_with_loss if phase == 'train': model_with_loss.train() else: model_with_loss.eval() torch.cuda.empty_cache() opt = self.opt avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} num_iters = len(data_loader) # num_iters = 10 bar = Bar(opt.exp_id, max=num_iters) for iter, batch in enumerate(data_loader): if iter >= num_iters: break for k in batch: if k == 'input': assert len(batch[k]) == self.opt.K for i in range(len(batch[k])): # MODIFY for pytorch 0.4.0 # batch[k][i] = batch[k][i].to(device=opt.device) batch[k][i] = batch[k][i].to(device=opt.device, non_blocking=True) else: # MODIFY for pytorch 0.4.0 # batch[k] = batch[k].to(device=opt.device) batch[k] = batch[k].to(device=opt.device, non_blocking=True) output, loss, loss_stats = model_with_loss(batch) loss = loss.mean() if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( epoch, iter, num_iters, phase=phase, total=bar.elapsed_td, eta=bar.eta_td) step = iter // opt.visual_per_inter + num_iters // opt.visual_per_inter * ( epoch - 1) for l in self.loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'][0].size(0)) if phase == 'train' and iter % opt.visual_per_inter == 0 and iter != 0: writer.add_scalar('train/{}'.format(l), avg_loss_stats[l].avg, step) writer.flush() Bar.suffix = Bar.suffix + '|{} {:.4f} '.format( l, avg_loss_stats[l].avg) bar.next() del output, loss, loss_stats bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. return ret def set_device(self, gpus, chunk_sizes, device): if len(gpus) > 1: self.model_with_loss = DataParallel( self.model_with_loss, device_ids=gpus, chunk_sizes=chunk_sizes).to(device) else: self.model_with_loss = self.model_with_loss.to(device) for state in self.optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): # MODIFY for pytorch 0.4.0 state[k] = v.to(device=device, non_blocking=True)
class MOCDetector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: assert 'cpu is not supported!' self.rgb_model_backbone, self.rgb_model_branch = None, None self.flow_model_backbone, self.flow_model_branch = None, None if opt.rgb_model != '': self.rgb_model_backbone, self.rgb_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) print('create rgb model', flush=True) self.rgb_model_backbone, self.rgb_model_branch = load_inference_model( self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model) print('load rgb model', flush=True) self.rgb_model_backbone = DataParallel(self.rgb_model_backbone, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) self.rgb_model_branch = DataParallel(self.rgb_model_branch, device_ids=[opt.gpus[0]], chunk_sizes=[1 ]).to(opt.device) print('put rgb model to gpu', flush=True) self.rgb_model_backbone.eval() self.rgb_model_branch.eval() if opt.flow_model != '': self.flow_model_backbone, self.flow_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model_backbone = convert2flow(opt.ninput, self.flow_model_backbone) print('create flow model', flush=True) self.flow_model_backbone, self.flow_model_branch = load_inference_model( self.flow_model_backbone, self.flow_model_branch, opt.flow_model) print('load flow model', flush=True) self.flow_model_backbone = DataParallel(self.flow_model_backbone, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) self.flow_model_branch = DataParallel(self.flow_model_branch, device_ids=[opt.gpus[0]], chunk_sizes=[1]).to( opt.device) print('put flow model to gpu', flush=True) self.flow_model_backbone.eval() self.flow_model_branch.eval() self.num_classes = opt.num_classes self.opt = opt self.rgb_buffer = [] self.flow_buffer = [] self.rgb_buffer_flip = [] self.flow_buffer_flip = [] def pre_process(self, images, is_flow=False, ninput=1): K = self.opt.K images = [ cv2.resize(im, (self.opt.resize_width, self.opt.resize_height), interpolation=cv2.INTER_LINEAR) for im in images ] if self.opt.flip_test: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) for i in range(K * 2) ] else: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) for i in range(K) ] mean = np.tile( np.array(self.opt.mean, dtype=np.float32)[:, None, None], (ninput, 1, 1)) std = np.tile( np.array(self.opt.std, dtype=np.float32)[:, None, None], (ninput, 1, 1)) for i in range(K): for ii in range(ninput): data[i][3 * ii:3 * ii + 3, :, :] = np.transpose( images[i + ii], (2, 0, 1)) if self.opt.flip_test: # TODO if is_flow: temp = images[i + ii].copy() temp = temp[:, ::-1, :] temp[:, :, 2] = 255 - temp[:, :, 2] data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose( temp, (2, 0, 1)) else: data[i + K][3 * ii:3 * ii + 3, :, :] = np.transpose( images[i + ii], (2, 0, 1))[:, :, ::-1] # normalize data[i] = ((data[i] / 255.) - mean) / std if self.opt.flip_test: data[i + K] = ((data[i + K] / 255.) - mean) / std return data def pre_process_single_frame(self, images, is_flow=False, ninput=1, data_last=None, data_last_flip=None): images = cv2.resize(images, (self.opt.resize_height, self.opt.resize_width), interpolation=cv2.INTER_LINEAR) data = np.empty( (3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) data_flip = np.empty( (3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) mean = np.array(self.opt.mean, dtype=np.float32)[:, None, None] std = np.array(self.opt.std, dtype=np.float32)[:, None, None] if not is_flow: data = np.transpose(images, (2, 0, 1)) if self.opt.flip_test: data_flip = np.transpose(images, (2, 0, 1))[:, :, ::-1] data = ((data / 255.) - mean) / std if self.opt.flip_test: data_flip = ((data_flip / 255.) - mean) / std else: data[:3 * ninput - 3, :, :] = data_last[3:, :, :] data[3 * ninput - 3:, :, :] = (np.transpose(images, (2, 0, 1)) / 255. - mean) / std if self.opt.flip_test: temp = images.copy() temp = temp[:, ::-1, :] temp[:, :, 2] = 255 - temp[:, :, 2] data_flip[:3 * ninput - 3, :, :] = data_last_flip[3:, :, :] data_flip[3 * ninput - 3:, :, :] = (np.transpose(temp, (2, 0, 1)) / 255. - mean) / std return data, data_flip def process(self, images, flows, video_tag): with torch.no_grad(): if self.rgb_model_backbone is not None: if video_tag == 0: rgb_features = [ self.rgb_model_backbone(images[i]) for i in range(self.opt.K) ] self.rgb_buffer = rgb_features if self.opt.flip_test: rgb_features_flip = [ self.rgb_model_backbone(images[i + self.opt.K]) for i in range(self.opt.K) ] self.rgb_buffer_flip = rgb_features_flip else: del self.rgb_buffer[0] self.rgb_buffer.append( self.rgb_model_backbone(images[self.opt.K - 1])) if self.opt.flip_test: del self.rgb_buffer_flip[0] self.rgb_buffer_flip.append( self.rgb_model_backbone(images[-1])) rgb_output = self.rgb_model_branch(self.rgb_buffer, self.rgb_buffer_flip) rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None: if video_tag == 0: flow_features = [ self.flow_model_backbone(flows[i]) for i in range(self.opt.K) ] self.flow_buffer = flow_features if self.opt.flip_test: flow_features_flip = [ self.flow_model_backbone(flows[i + self.opt.K]) for i in range(self.opt.K) ] self.flow_buffer_flip = flow_features_flip else: del self.flow_buffer[0] self.flow_buffer.append( self.flow_model_backbone(flows[self.opt.K - 1])) if self.opt.flip_test: del self.flow_buffer_flip[0] self.flow_buffer_flip.append( self.flow_model_backbone(flows[-1])) flow_output = self.flow_model_branch(self.flow_buffer, self.flow_buffer_flip) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.flow_model_backbone is not None and self.rgb_model_backbone is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model_backbone is not None and self.rgb_model_backbone is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model_backbone is not None and self.flow_model_backbone is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov else: print('No model exists.') assert 0 detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K) return detections def post_process(self, detections, height, width, output_height, output_width, num_classes, K): detections = detections.detach().cpu().numpy() results = [] for i in range(detections.shape[0]): top_preds = {} for j in range((detections.shape[2] - 2) // 2): # tailor bbox to prevent out of bounds detections[i, :, 2 * j] = np.maximum( 0, np.minimum(width - 1, detections[i, :, 2 * j] / output_width * width)) detections[i, :, 2 * j + 1] = np.maximum( 0, np.minimum( height - 1, detections[i, :, 2 * j + 1] / output_height * height)) classes = detections[i, :, -1] # gather bbox for each class for c in range(self.opt.num_classes): inds = (classes == c) top_preds[c + 1] = detections[i, inds, :4 * K + 1].astype( np.float32) results.append(top_preds) return results def run(self, data): flows = None images = None if self.rgb_model_backbone is not None: images = data['images'] for i in range(len(images)): images[i] = images[i].to(self.opt.device) if self.flow_model_backbone is not None: flows = data['flows'] for i in range(len(flows)): flows[i] = flows[i].to(self.opt.device) meta = data['meta'] meta = {k: v.numpy()[0] for k, v in meta.items()} detections = self.process(images, flows, data['video_tag']) detections = self.post_process(detections, meta['height'], meta['width'], meta['output_height'], meta['output_width'], self.opt.num_classes, self.opt.K) return detections
class MOCDetector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.rgb_model_backbone, self.rgb_model_branch = None, None self.flow_model_backbone, self.flow_model_branch = None, None self.num_classes = opt.num_classes self.opt = opt def load_backbone(self): opt = self.opt if opt.rgb_model != '': print('create rgb model') self.rgb_model_backbone, self.rgb_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.rgb_model_backbone, self.rgb_model_branch = load_inference_model( self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model) self.rgb_model_backbone = DataParallel( self.rgb_model_backbone, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to(opt.device) self.rgb_model_backbone.eval() if opt.flow_model != '': print('create flow model') self.flow_model_backbone, self.flow_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model_backbone = convert2flow(opt.ninput, self.flow_model_backbone) self.flow_model_backbone, self.flow_model_branch = load_inference_model( self.flow_model_backbone, self.flow_model_branch, opt.flow_model) self.flow_model_backbone = DataParallel( self.flow_model_backbone, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to(opt.device) self.flow_model_backbone.eval() def load_branch(self): opt = self.opt if opt.rgb_model != '': print('create rgb model') self.rgb_model_backbone, self.rgb_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.rgb_model_backbone, self.rgb_model_branch = load_inference_model( self.rgb_model_backbone, self.rgb_model_branch, opt.rgb_model) self.rgb_model_branch = DataParallel( self.rgb_model_branch, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to(opt.device) self.rgb_model_branch.eval() if opt.flow_model != '': print('create flow model') self.flow_model_backbone, self.flow_model_branch = create_inference_model( opt.arch, opt.branch_info, opt.head_conv, opt.K, flip_test=opt.flip_test) self.flow_model_backbone = convert2flow(opt.ninput, self.flow_model_backbone) self.flow_model_backbone, self.flow_model_branch = load_inference_model( self.flow_model_backbone, self.flow_model_branch, opt.flow_model) self.flow_model_branch = DataParallel( self.flow_model_branch, device_ids=opt.gpus, chunk_sizes=opt.chunk_sizes).to(opt.device) self.flow_model_branch.eval() def pre_process(self, images, is_flow=False, ninput=1): images = [ cv2.resize(im, (self.opt.resize_height, self.opt.resize_width), interpolation=cv2.INTER_LINEAR) for im in images ] if self.opt.flip_test: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) for i in range(2) ] else: data = [ np.empty((3 * ninput, self.opt.resize_height, self.opt.resize_width), dtype=np.float32) ] mean = np.tile( np.array(self.opt.mean, dtype=np.float32)[:, None, None], (ninput, 1, 1)) std = np.tile( np.array(self.opt.std, dtype=np.float32)[:, None, None], (ninput, 1, 1)) for ii in range(ninput): data[0][3 * ii:3 * ii + 3, :, :] = np.transpose( images[ii], (2, 0, 1)) if self.opt.flip_test: if is_flow: temp = images[ii].copy() temp = temp[:, ::-1, :] temp[:, :, 2] = 255 - temp[:, :, 2] data[1][3 * ii:3 * ii + 3, :, :] = np.transpose( temp, (2, 0, 1)) else: data[1][3 * ii:3 * ii + 3, :, :] = np.transpose( images[ii], (2, 0, 1))[:, :, ::-1] # normalize data[0] = ((data[0] / 255.) - mean) / std if self.opt.flip_test: data[1] = ((data[1] / 255.) - mean) / std return data def extract_feature(self, data): flows = None images = None if self.rgb_model_backbone is not None: images = data['images'] for i in range(len(images)): images[i] = images[i].to(self.opt.device) if self.flow_model_backbone is not None: flows = data['flows'] for i in range(len(flows)): flows[i] = flows[i].to(self.opt.device) rgb_features, rgb_features_flip, flow_features, flow_features_flip = None, None, None, None with torch.no_grad(): if self.rgb_model_backbone is not None: rgb_features = self.rgb_model_backbone(images[0]) if self.opt.flip_test: rgb_features_flip = self.rgb_model_backbone(images[1]) if self.flow_model_backbone is not None: if self.flow_model_backbone is not None: flow_features = self.flow_model_backbone(flows[0]) if self.opt.flip_test: flow_features_flip = self.flow_model_backbone(flows[1]) return rgb_features, rgb_features_flip, flow_features, flow_features_flip def det_process(self, feature): with torch.no_grad(): if self.rgb_model_backbone is not None: rgb_output = self.rgb_model_branch( feature['rgb_features'], feature['rgb_features_flip']) rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None: flow_output = self.flow_model_branch( feature['flow_features'], feature['flow_features_flip']) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.flow_model_backbone is not None and self.rgb_model_backbone is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model_backbone is not None and self.rgb_model_backbone is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model_backbone is not None and self.flow_model_backbone is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov else: print('No model exists.') assert 0 detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K) return detections def post_process(self, detections, height, width, output_height, output_width, num_classes, K): detections = detections.detach().cpu().numpy() results = [] for i in range(detections.shape[0]): top_preds = {} for j in range((detections.shape[2] - 2) // 2): # tailor bbox to prevent out of bounds detections[i, :, 2 * j] = np.maximum( 0, np.minimum(width - 1, detections[i, :, 2 * j] / output_width * width)) detections[i, :, 2 * j + 1] = np.maximum( 0, np.minimum( height - 1, detections[i, :, 2 * j + 1] / output_height * height)) classes = detections[i, :, -1] # gather bbox for each class for c in range(self.opt.num_classes): inds = (classes == c) top_preds[c + 1] = np.concatenate([ detections[i, inds, :4 * K].astype(np.float32), detections[i, inds, 4 * K:4 * K + 1].astype(np.float32) ], axis=1).tolist() results.append(top_preds) for i in range(len(results)): for j in range(1, self.num_classes + 1): results[i][j] = np.array(results[i][j], dtype=np.float32).reshape( -1, self.opt.K * 4 + 1) return results def run(self, data): if self.rgb_model_backbone is not None: for i in range(self.opt.K): data['rgb_features'][i] = data['rgb_features'][i].to( self.opt.device) if self.opt.flip_test: for i in range(self.opt.K): data['rgb_features_flip'][i] = data['rgb_features_flip'][ i].to(self.opt.device) if self.flow_model_backbone is not None: for i in range(self.opt.K): data['flow_features'][i] = data['flow_features'][i].to( self.opt.device) if self.opt.flip_test: for i in range(self.opt.K): data['flow_features_flip'][i] = data['flow_features_flip'][ i].to(self.opt.device) meta = data['meta'] meta = {k: v.numpy()[0] for k, v in meta.items()} # detections--->[b, N, 4*K+1+1] (bboxes, scores, classes) detections = self.det_process(data) # detections--->[b, class, 4*K+1] (bboxes, scores) detections = self.post_process(detections, meta['height'], meta['width'], meta['output_height'], meta['output_width'], self.opt.num_classes, self.opt.K) return detections