Пример #1
0
    def det_process(self, feature):
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                rgb_output = self.rgb_model_branch(
                    feature['rgb_features'], feature['rgb_features_flip'])
                rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None:
                flow_output = self.flow_model_branch(
                    feature['flow_features'], feature['flow_features_flip'])
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.flow_model_backbone is not None and self.rgb_model_backbone is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model_backbone is not None and self.rgb_model_backbone is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model_backbone is not None and self.flow_model_backbone is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov
            else:
                print('No model exists.')
                assert 0

            detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K)
            return detections
Пример #2
0
    def process(self, images, flows):
        with torch.no_grad():
            if self.rgb_model is not None:
                rgb_output = self.rgb_model(images)
                #rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_hm = rgb_output[0]['hm']
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']

                # ADDED: one additional loss
                #rgb_hmc = rgb_output[0]['hmc']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model is not None:
                flow_output = self.flow_model(flows)
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.pa_model is not None:
                pa_output = self.pa_model(flows)
                pa_hm = pa_output[0]['hm'].sigmoid_()
                pa_wh = pa_output[0]['wh']
                pa_mov = pa_output[0]['mov']

            if self.flow_model is not None and self.rgb_model is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model is not None and self.rgb_model is None and self.pa_model is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model is not None and self.flow_model is None and self.pa_model is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov

            # TODO: two stream for rgb + pa
            elif self.pa_model is not None and self.rgb_model is not None and self.flow_model is None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * pa_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * pa_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * pa_mov + self.opt.mov_fusion_rgb * rgb_mov

            elif self.pa_model is not None and self.rgb_model is None and self.flow_model is None:
                hm = pa_hm
                wh = pa_wh
                mov = pa_mov

            else:
                print('No model exists.')
                assert 0

            # ADDED: minus mem (only detect on current clip)

            #mov = None
            detections = moc_decode(hm,
                                    wh,
                                    mov,
                                    N=self.opt.N,
                                    K=self.opt.K - 0)

            #hm = hm[:,42:63,:,:]
            #detections = moc_decode_multihm(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0)
            return detections
Пример #3
0
    def process(self, images, flows, video_tag):
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                if video_tag == 0:

                    rgb_features = [
                        self.rgb_model_backbone(images[i])
                        for i in range(self.opt.K)
                    ]
                    rgb_features = [
                        self.rgb_model_deconv(rgb_features[i])
                        for i in range(self.opt.K)
                    ]

                    self.rgb_buffer = rgb_features
                    if self.opt.flip_test:

                        rgb_features_flip = [
                            self.rgb_model_backbone(images[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        rgb_features_flip = [
                            self.rgb_model_deconv(
                                rgb_features_flip[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]

                        self.rgb_buffer_flip = rgb_features_flip
                else:
                    del self.rgb_buffer[0]
                    self.rgb_buffer.append(
                        self.rgb_model_deconv(
                            self.rgb_model_backbone(images[self.opt.K - 1])))
                    if self.opt.flip_test:
                        del self.rgb_buffer_flip[0]
                        self.rgb_buffer_flip.append(
                            self.rgb_model_deconv(
                                self.rgb_model_backbone(images[-1])))
                rgb_output = self.rgb_model_branch(self.rgb_buffer,
                                                   self.rgb_buffer_flip)

                #rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_hm = rgb_output[0]['hm']
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None:
                if video_tag == 0:
                    flow_features = [
                        self.flow_model_backbone(flows[i])
                        for i in range(self.opt.K)
                    ]
                    self.flow_buffer = flow_features
                    if self.opt.flip_test:
                        flow_features_flip = [
                            self.flow_model_backbone(flows[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        self.flow_buffer_flip = flow_features_flip
                else:
                    del self.flow_buffer[0]
                    self.flow_buffer.append(
                        self.flow_model_backbone(flows[self.opt.K - 1]))
                    if self.opt.flip_test:
                        del self.flow_buffer_flip[0]
                        self.flow_buffer_flip.append(
                            self.flow_model_backbone(flows[-1]))
                flow_output = self.flow_model_branch(self.flow_buffer,
                                                     self.flow_buffer_flip)
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.flow_model_backbone is not None and self.rgb_model_backbone is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model_backbone is not None and self.rgb_model_backbone is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model_backbone is not None and self.flow_model_backbone is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov
            else:
                print('No model exists.')
                assert 0

            detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K)
            return detections
Пример #4
0
    def process(self, images, flows, video_tag):
        with torch.no_grad():
            if self.rgb_model_backbone is not None:
                if video_tag == 0:

                    rgb_features = [
                        self.rgb_model_backbone(images[i])
                        for i in range(self.opt.K)
                    ]
                    rgb_features = [
                        self.rgb_model_deconv(rgb_features[i])
                        for i in range(self.opt.K)
                    ]

                    self.rgb_buffer = rgb_features
                    if self.opt.flip_test:

                        rgb_features_flip = [
                            self.rgb_model_backbone(images[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        rgb_features_flip = [
                            self.rgb_model_deconv(
                                rgb_features_flip[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]

                        self.rgb_buffer_flip = rgb_features_flip
                else:
                    del self.rgb_buffer[0]
                    self.rgb_buffer.append(
                        self.rgb_model_deconv(
                            self.rgb_model_backbone(images[self.opt.K - 1])))
                    if self.opt.flip_test:
                        del self.rgb_buffer_flip[0]
                        self.rgb_buffer_flip.append(
                            self.rgb_model_deconv(
                                self.rgb_model_backbone(images[-1])))
                rgb_output = self.rgb_model_branch(self.rgb_buffer,
                                                   self.rgb_buffer_flip)

                #rgb_hm = rgb_output[0]['hm'].sigmoid_()
                rgb_hm = rgb_output[0]['hm']
                rgb_wh = rgb_output[0]['wh']
                rgb_mov = rgb_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None:
                if video_tag == 0:
                    flow_features = [
                        self.flow_model_backbone(flows[i])
                        for i in range(self.opt.K)
                    ]
                    self.flow_buffer = flow_features
                    if self.opt.flip_test:
                        flow_features_flip = [
                            self.flow_model_backbone(flows[i + self.opt.K])
                            for i in range(self.opt.K)
                        ]
                        self.flow_buffer_flip = flow_features_flip
                else:
                    del self.flow_buffer[0]
                    self.flow_buffer.append(
                        self.flow_model_backbone(flows[self.opt.K - 1]))
                    if self.opt.flip_test:
                        del self.flow_buffer_flip[0]
                        self.flow_buffer_flip.append(
                            self.flow_model_backbone(flows[-1]))
                flow_output = self.flow_model_branch(self.flow_buffer,
                                                     self.flow_buffer_flip)
                flow_hm = flow_output[0]['hm'].sigmoid_()
                flow_wh = flow_output[0]['wh']
                flow_mov = flow_output[0]['mov']
                if self.opt.flip_test:
                    flow_hm_f = flow_output[1]['hm'].sigmoid_()
                    flow_wh_f = flow_output[1]['wh']

                    flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2
                    flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2

            if self.pa_model_backbone is not None:

                # added to  allow reshape
                bb, cc, hh, ww = flows[0].size()

                if video_tag == 0:

                    flows = torch.cat(flows, dim=0)

                    flows_pa, flows_rgb = self.pa_model_PA(flows)
                    pa_features = self.pa_model_backbone(flows_rgb, flows_pa)
                    pa_features = self.pa_model_deconv(pa_features)

                    self.pa_buffer = pa_features
                    ''' not used hence NOT IMPLEMENTED for now!
                    if self.opt.flip_test:
                        
                        rgb_features_flip = [self.rgb_model_backbone(images[i + self.opt.K]) for i in range(self.opt.K)]
                        rgb_features_flip = [self.rgb_model_deconv(rgb_features_flip[i + self.opt.K]) for i in range(self.opt.K)]
                        
                        self.rgb_buffer_flip = rgb_features_flip
                    '''

                else:

                    # when self.pa_buffer is a list
                    #del self.pa_buffer[0] # del does not work here cus self.pa_buffer is no longer a list
                    #self.pa_buffer.append(self.pa_model_deconv(self.pa_model_PA(flows.view(-1, cc, hh, ww))))

                    flow_cur_clip = flows[-1]
                    flows_pa, flows_rgb = self.pa_model_PA(flow_cur_clip)
                    pa_buffer_clip = self.pa_model_backbone(
                        flows_rgb, flows_pa)
                    pa_buffer_clip = self.pa_model_deconv(pa_buffer_clip)

                    self.pa_buffer = torch.cat(
                        (self.pa_buffer[1:], pa_buffer_clip), dim=0)
                    ''' not used hence NOT IMPLEMENTED for now!
                    if self.opt.flip_test:
                        del self.rgb_buffer_flip[0]
                        self.rgb_buffer_flip.append(self.rgb_model_deconv(self.rgb_model_backbone(images[-1])))
                    '''

                pa_output = self.pa_model_branch(self.pa_buffer)

                #rgb_hm = rgb_output[0]['hm'].sigmoid_()
                pa_hm = pa_output[0]['hm']
                pa_wh = pa_output[0]['wh']
                pa_mov = pa_output[0]['mov']
                if self.opt.flip_test:
                    rgb_hm_f = rgb_output[1]['hm'].sigmoid_()
                    rgb_wh_f = rgb_output[1]['wh']

                    rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2
                    rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2

            if self.flow_model_backbone is not None and self.rgb_model_backbone is not None:
                hm = (1 - self.opt.hm_fusion_rgb
                      ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm
                wh = (1 - self.opt.wh_fusion_rgb
                      ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh
                mov = (1 - self.opt.mov_fusion_rgb
                       ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov
            elif self.flow_model_backbone is not None and self.rgb_model_backbone is None:
                hm = flow_hm
                wh = flow_wh
                mov = flow_mov
            elif self.rgb_model_backbone is not None and self.flow_model_backbone is None:
                hm = rgb_hm
                wh = rgb_wh
                mov = rgb_mov
            elif self.pa_model_backbone is not None and self.rgb_model_backbone is None and self.flow_model_backbone is None:
                hm = pa_hm
                wh = pa_wh
                mov = pa_mov

            else:
                print('No model exists.')
                assert 0

            detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K)
            return detections  # size: (1, 100, 18): last dim == 4K + 1 + 1 (box, score, cls)