예제 #1
0
    def classify(self, video_name, model_mask=None, verbose=False):
        """
        Input a file on harddisk
        Args:
            filename:

        Returns:
            cls: classification scores
            frm_scores: frame-wise classification scores
        """
        video_idx = self.__data_manager.get_video_idx_by_name(video_name)
        rgb_frm_it = self.__data_manager.vidoe_frame_iterator(video_idx,
                                                              frame_type=0,
                                                              batch_size=1,
                                                              step=5)
        flow_frm_it = None
        if self.__need_flow:
            flow_frm_it = self.__data_manager.vidoe_frame_iterator(
                video_idx, frame_type=3, batch_size=10, step=1)
        all_scores = []
        all_start = time.clock()

        cnt = 0

        # process model mask
        mask = [True] * self.__num_net
        n_model = self.__num_net
        if model_mask is not None:
            for i in range(len(model_mask)):
                mask[i] = model_mask[i]
                if not mask[i]:
                    n_model -= 1

        for rgb_stack in rgb_frm_it:
            start = time.clock()
            cnt += 1
            frm_scores = []
            flow_stack = None
            if self.__need_flow:
                assert (flow_frm_it is not None)
                flow_stack = flow_frm_it.next()
                if len(
                        flow_stack
                ) < 10:  # the spatial net's input channel is 10, so discard the stack
                    continue
            for net, run, in_type, data_aug in zip(self.__net_vec, mask,
                                                   self.__input_type,
                                                   self.__data_aug):
                if not run:
                    continue
                if in_type == 0:  # RGB input
                    frm_scores.append(
                        net.predict_single_frame(rgb_stack[:1],
                                                 self.__score_name,
                                                 over_sample=data_aug))
                elif in_type == 1:  # Flow input
                    assert (flow_stack is not None)
                    frm_scores.append(
                        net.predict_single_flow_stack(flow_stack,
                                                      self.__score_name,
                                                      over_sample=data_aug))
            all_scores.append(frm_scores)
            end = time.clock()
            elapsed = end - start
            if verbose:
                print("frame sample {}: {} second".format(cnt, elapsed))

        if len(
                all_scores
        ) == 0:  # all_score is of size (#model, #frames, #models, #classes)
            if verbose:
                print('warning: no frames found for ' + video_name)
            return None, None, None, None

        # aggregate frame-wise scores
        model_scores = []
        for i in range(n_model):
            model_scores.append(
                sliding_window_aggregation_func(np.array(
                    [x[i] for x in all_scores]),
                                                norm=False))

        final_scores = default_fusion_func(
            np.zeros_like(model_scores[0]), model_scores,
            [w for w, m in zip(self.__net_weights, mask) if m])

        all_end = time.clock()
        total_time = all_end - all_start
        if verbose:
            print("total time: {} second".format(total_time))

        return final_scores, model_scores, all_scores, total_time
    def _classify_from_file(self, filename, model_mask, cache_manager=None):
        """
        Input a file on harddisk
        Args:
            filename:
            cache: cache intermediate results and use previously cached intermediate result is possible

        Returns:
            cls: classification scores
            frm_scores: frame-wise classification scores
        """
        vid_info = _dummy_vid_info()
        vid_info.path = filename
        video_proc = VideoProc(vid_info)
        video_proc.open_video(True)

        # here we use interval of 30, roughly 1FPS
        frm_it = None
        cached_flow = None
        if cache_manager is not None:
            frm_it = cache_manager.load(videoname=filename, type="framestack")
            cached_flow = cache_manager.load(videoname=filename,
                                             type="flowstack")
        if frm_it is None:
            frm_it = video_proc.frame_iter(timely=False,
                                           ignore_err=True,
                                           interval=30,
                                           length=6 if self.__need_flow else 1,
                                           new_size=(340, 256))

        all_scores = []
        all_start = time.clock()

        # process model mask
        mask = [True] * self.__num_net
        n_model = self.__num_net
        if model_mask is not None:
            for i in xrange(len(model_mask)):
                mask[i] = model_mask[i]
                if not mask[i]:
                    n_model -= 1

        frame_cache = []
        flow_cache = []
        cnt = 0
        for frm_stack in frm_it:

            if cache_manager is not None:
                frame_cache.append(frm_stack)

            start = time.clock()
            cnt += 1
            frm_scores = []

            flow_stack = None
            for net, run, in_type, conv_support, net_input_size in \
                    zip(self.__net_vec, mask, self.__input_type, self.__conv_support, self.__input_size):
                if not run:
                    continue

                frame_size = (340 * net_input_size / 224,
                              256 * net_input_size / 224)

                if in_type == 0:
                    # RGB input
                    frm_scores.append(
                        net.predict_single_frame(
                            frm_stack[:1],
                            self.__score_name,
                            over_sample=not conv_support,
                            frame_size=None
                            if net_input_size == 224 else frame_size))
                elif in_type == 1:
                    # Flow input
                    if flow_stack is None:
                        # Extract flow if necessary
                        if cached_flow is not None:
                            flow_stack = cached_flow[cnt - 1]
                        else:
                            flow_stack = self.__flow_extractor.extract_flow(
                                frm_stack, frame_size)
                        if cache_manager is not None:
                            flow_cache.append(flow_stack)

                    frm_scores.append(
                        net.predict_single_flow_stack(
                            flow_stack,
                            self.__score_name,
                            over_sample=not conv_support))
            all_scores.append(frm_scores)
            end = time.clock()
            elapsed = end - start
            # print "frame sample {}: {} second".format(cnt, elapsed)

        if cache_manager is not None:
            if len(frame_cache) != 0:
                cache_manager.dump(frame_cache, filename, "framestack")
            if len(flow_cache) != 0:
                cache_manager.dump(flow_cache, filename, "flowstack")

        # aggregate frame-wise scores
        agg_scores = []
        for i in xrange(n_model):
            model_scores = sliding_window_aggregation_func(np.array(
                [x[i] for x in all_scores]),
                                                           norm=False)
            agg_scores.append(model_scores)

        final_scores = default_fusion_func(
            np.zeros_like(agg_scores[0]), agg_scores,
            [w for w, m in zip(self.__net_weights, mask) if m])

        all_end = time.clock()
        total_time = all_end - all_start
        # print "total time: {} second".format(total_time)
        print('{0} processed.'.format(filename))
        return final_scores, all_scores, total_time
예제 #3
0
    def _classify_from_file(self, filename, model_mask):
        """
        Input a file on harddisk
        Args:
            filename:

        Returns:
            cls: classification scores
            frm_scores: frame-wise classification scores
        """
        vid_info = _dummy_vid_info()
        vid_info.path = filename
        video_proc = VideoProc(vid_info)
        video_proc.open_video(True)

        # here we use interval of 30, roughly 1FPS
        frm_it = video_proc.frame_iter(timely=False, ignore_err=True, interval=30,
                                       length=6 if self.__need_flow else 1,
                                       new_size=(340, 256))

        all_scores = []
        all_start = time.clock()

        cnt = 0

        # process model mask
        mask = [True] * self.__num_net
        n_model = self.__num_net
        if model_mask is not None:
            for i in xrange(len(model_mask)):
                mask[i] = model_mask[i]
                if not mask[i]:
                    n_model -= 1


        for frm_stack in frm_it:

            start = time.clock()
            cnt += 1
            frm_scores = []

            flow_stack = None
            for net, run, in_type, conv_support, net_input_size in \
                    zip(self.__net_vec, mask, self.__input_type, self.__conv_support, self.__input_size):
                if not run:
                    continue

                frame_size = (340 * net_input_size / 224, 256 * net_input_size / 224)

                if in_type == 0:
                    # RGB input

                    frm_scores.append(net.predict_single_frame(frm_stack[:1], self.__score_name,
                                                               over_sample=not conv_support,
                                                               frame_size=None if net_input_size == 224 else frame_size
                                                               ))
                elif in_type == 1:
                    # Flow input
                    if flow_stack is None:
                        # Extract flow if necessary
                        flow_stack = self.__flow_extractor.extract_flow(frm_stack, frame_size)

                    frm_scores.append(net.predict_single_flow_stack(flow_stack, self.__score_name,
                                                                    over_sample=not conv_support))

            all_scores.append(frm_scores)
            end = time.clock()
            elapsed = end - start
            print "frame sample {}: {} second".format(cnt, elapsed)

        # aggregate frame-wise scores
        agg_scores = []
        for i in xrange(n_model):
            model_scores = sliding_window_aggregation_func(np.array([x[i] for x in all_scores]), norm=False)
            agg_scores.append(model_scores)

        final_scores = default_fusion_func(np.zeros_like(agg_scores[0]), agg_scores, [w for w, m in zip(self.__net_weights, mask) if m])

        all_end = time.clock()
        total_time = all_end - all_start
        print "total time: {} second".format(total_time)

        return final_scores, all_scores, total_time