Пример #1
0
    def test_filter_length(self):
        intrvl1 = Interval(1., 2., 1)
        intrvl2 = Interval(1.5, 3.5, 2)

        intrvls1 = IntervalList([intrvl1, intrvl2])

        intrvls1 = intrvls1.filter_length(min_length=1.1)
        self.assertEqual(len(intrvls1.intrvls), 1)
        self.assertEqual(intrvls1.intrvls[0].__repr__(),
                         "<Interval start:1.5 end:3.5 payload:2>")

        intrvls1 = intrvls1.filter_length(max_length=1.8)
        self.assertEqual(len(intrvls1.intrvls), 0)
Пример #2
0
def detect_commercial_rekall(video, transcript_path, blackframe_list=None, histogram=None, debug=True, verbose=False):
    """
    API for detecting commercial blocks from TV news video using rekall
    
    @video: django query set
    @transcript_path: transcript_path
    @blackframe_list: list of black frames index
    @histogram: list of histogram 16x3 bin for each frame, not used if blackframe_list is provided  
    
    Return: commercial_list (list of tuple((start_fid, start_sec), (end_fid, end_sec)), None if failed)
    """
    if type(video) == dict:
        fps = video['fps']
        video_length = video['num_frames'] / fps
    else:
        fps = video.fps
        video_length = video.num_frames / video.fps
    
    transcript = load_transcript(transcript_path)
    if blackframe_list is None:
        blackframe_intervallist = get_blackframe_list(histogram)
    else:
        blackframe_intervallist = IntervalList([(fid2second(fid, fps),
                                                fid2second(fid + 1, fps),
                                                0) for fid in blackframe_list])
    
    # get black windows
    black_windows = blackframe_intervallist \
            .dilate(1. / fps) \
            .coalesce() \
            .dilate(-1. / fps)
#             .filter_length(min_length=MIN_BLACKWINDOW * 1. / video.fps)
    if verbose:
        print("black window: ({})\n".format(black_windows.size()))
        for idx, win in enumerate(black_windows.get_intervals()):
            print(idx, win)
    
    # get all instances of >>
    arrow_intervals = get_text_intervals(">>", transcript)
    arrow_announcer_intervals = get_text_intervals(">> Announcer:", transcript)
    arrow_having_intervals = get_text_intervals(">> HAVING", transcript)
    if verbose:
        print("arrow_text: ({})\n".format(arrow_intervals.size()), arrow_intervals)
        print("arrow_announcer_text: ({})\n".format(arrow_announcer_intervals.size()), arrow_announcer_intervals)
    
    # get intervals for the whole transcript
    transcript_intervals = IntervalList([
        (start_sec, end_sec, 0)
        for text, start_sec, end_sec in transcript
        if not '{' in text
    ]).dilate(1)  \
      .coalesce() \
      .dilate(-1) \
    
    # get an interval for the whole video
    whole_video = IntervalList([(0., video_length, 0)])

    # whole video minus black windows to get segments in between black windows
    # then filter out anything that overlaps with ">>" as long as it's not ">> Announcer:"
    # then coalesce, as long as it doesn't get too long
    def fold_fn(stack, interval):
        if interval.length() > MAX_COMMERCIAL_TIME:
            interval = Interval(interval.start, interval.start + MAX_COMMERCIAL_TIME, interval.payload)
        if len(stack) == 0:
            stack.append(interval)
        else:
            last = stack.pop()
            if or_pred(overlaps(), after(max_dist=5), arity=2)(interval, last):
                if last.merge(interval).length() > MAX_COMMERCIAL_TIME:
                    stack.append(Interval(
                        last.start, 
                        last.start + MAX_COMMERCIAL_TIME, 
                        last.payload))
                else:
                    stack.append(last.merge(interval))
            else:
                stack.append(last)
                stack.append(interval)
        return stack
    
    # get reliable double arrow intervals
    reliable_transcripts = transcript_intervals.filter_length(min_length=RELIABLE_TEXT_DURATION)
    arrow_intervals = arrow_intervals \
        .minus(arrow_announcer_intervals) \
        .minus(arrow_having_intervals) \
        .filter_against(
            reliable_transcripts,
            predicate=overlaps()   
        )
    
    # get non-commercial blocks by filtering out intervals overlaps with >>
    all_blocks = whole_video.minus(black_windows)
    non_commercial_blocks = all_blocks.filter_against(
        arrow_intervals,
        predicate=overlaps()
    )
    
    commercial_blocks = whole_video.minus(non_commercial_blocks.set_union(black_windows))
    if verbose:
        print("commercial blocks candidates: ({})\n".format(commercial_blocks.size()))
        for idx, win in enumerate(commercial_blocks.get_intervals()):
            print(idx, win)
    
    commercials = commercial_blocks \
        .fold_list(fold_fn, []) \
        .filter_length(min_length = MIN_COMMERCIAL_TIME)
    commercials_raw = copy.deepcopy(commercials)
    if verbose:
        print("commercials from blackwindow:\n", commercials)
    
    
    # add in lowercase intervals
    lowercase_intervals = get_lowercase_intervals(transcript)
    if verbose:
        print("lowercase intervals:\n", lowercase_intervals)
    commercials = commercials.set_union(lowercase_intervals) 
    if verbose:
        print("commercials merge with lowercase:\n", commercials)
    
    
    # get blank intervals
    blank_intervals = whole_video \
        .minus(transcript_intervals) \
        .filter_length(min_length=MIN_BLANKWINDOW, max_length=MAX_BLANKWINDOW)
    # remove last one minute segment due to no aligned transcripts
    blank_intervals = blank_intervals \
        .minus(IntervalList([(video_length-60, video_length, 0)])) \
        .filter_length(min_length=MIN_BLANKWINDOW)
    if verbose:
        print("blank intervals:\n", blank_intervals)

    # add in blank intervals
    commercials = commercials.set_union(blank_intervals) 
        
#     commercials = commercials.merge(blank_intervals,
#             predicate=or_pred(before(max_dist=MAX_MERGE_GAP),
#                 after(max_dist=MAX_MERGE_GAP), arity=2),
#             working_window=MAX_MERGE_GAP
#             ) \
#             .filter_length(max_length=MAX_MERGE_DURATION) \
#             .set_union(commercials) \
#             .dilate(MIN_COMMERCIAL_GAP / 2) \
#             .coalesce() \
#             .dilate(-MIN_COMMERCIAL_GAP / 2)
    if verbose:
        print("commercials merge with blank intervals:\n", commercials)
        
        
    # merge with small gaps, but only if that doesn't make things too long
    commercials = commercials \
            .dilate(MAX_MERGE_GAP / 2) \
            .coalesce() \
            .dilate(-MAX_MERGE_GAP / 2) \
            .filter_length(max_length=MAX_COMMERCIAL_TIME) \
            .set_union(commercials_raw) \
            .set_union(lowercase_intervals) \
            .set_union(blank_intervals) \
            .coalesce()
    

#     # post-process commercials to get rid of gaps, small commercials, and
#     #   islated blocks
#     small_gaps = whole_video \
#             .minus(commercials) \
#             .filter_length(max_length = MAX_COMMERCIAL_GAP) \
#             .filter_against(
#                     arrow_text.filter_against(
#                         announcer_text,
#                         predicate=not_pred(overlaps()),
#                         working_window=1.0
#                     ), predicate=not_pred(overlaps()),
#                     working_window=1.0)
    
#     # merge with small gaps, but only if that doesn't make things too long
#     commercials = commercials \
#             .set_union(small_gaps.dilate(0.1)) \
#             .coalesce() \
#             .filter_length(max_length=MAX_COMMERCIAL_TIME) \
#             .set_union(commercials) \
#             .coalesce()

#     # get isolated commercials
#     not_isolated_commercials = commercials.filter_against(commercials,
#             predicate=or_pred(before(max_dist=MAX_COMMERCIAL_TIME),
#                 after(max_dist=MAX_COMMERCIAL_TIME), arity=2),
#             working_window=MAX_COMMERCIAL_TIME)
#     isolated_commercials = commercials.minus(not_isolated_commercials)
#     commercials_to_delete = isolated_commercials \
#             .filter_length(max_length=MIN_COMMERCIAL_TIME_FINAL) \
#             .set_union(isolated_commercials \
#                 .filter_against(blank_intervals, predicate=equal()) \
#                 .filter_length(max_length=MAX_ISOLATED_BLANK_TIME))
#     commercials = commercials.minus(commercials_to_delete)

    if debug:
        result = {'black': black_windows.dilate(2),
                  'arrow': arrow_intervals.dilate(2),
                  'commercials_raw': commercials_raw,
                  'lowercase': lowercase_intervals,
                  'blank': blank_intervals,
                  'commercials': commercials,
                  }
        return result
    else:
        result = [(i.start, i.end) for i in commercials.get_intervals()]
        return result