def __init__( self, # TODO why can it be a dict? video: typing.Union[VideoObject, typing.Dict], start: int, end: int, # TODO need refactored ? ssim: typing.List[float], mse: typing.List[float], psnr: typing.List[float], start_time: float, end_time: float, ): if isinstance(video, dict): self.video = VideoObject(**video) else: self.video = video self.start = start self.end = end self.ssim = ssim self.mse = mse self.psnr = psnr self.start_time = start_time self.end_time = end_time # if length is 1 # https://github.com/williamfzc/stagesepx/issues/9 if start > end: self.start, self.end = self.end, self.start self.start_time, self.end_time = self.end_time, self.start_time logger.debug( f"new a range: {self.start}({self.start_time}) - {self.end}({self.end_time})" )
def test_boost(): video = VideoObject(VIDEO_PATH) video.load_frames() # test cut res, data_home = _cut(video) # test classify classify_result = _classify(video, data_home) # --- draw --- r = Reporter() r.draw( classify_result, report_path=os.path.join(data_home, "report.html"), cut_result=res, ) # test compressing r = Reporter() r.draw( classify_result, report_path=os.path.join(data_home, "report.html"), cut_result=res, compress_rate=0.1, ) r = Reporter() r.draw( classify_result, report_path=os.path.join(data_home, "report.html"), cut_result=res, target_size=(600, 800), )
def handle(self, video_path: str) -> bool: super(KerasHandler, self).handle(video_path) video = VideoObject(video_path) if self.preload: video.load_frames() # --- cutter --- cutter = VideoCutter() res = cutter.cut(video) stable, unstable = res.get_range(threshold=0.98, offset=3) # --- classify --- cl = KerasClassifier() if self.model_path: logger.info("load existed pre-train model") cl.load_model(self.model_path) else: data_home = res.pick_and_save(stable, self.frame_count, to_dir=self.result_path) cl.train(data_home) self.classifier_result = cl.classify(video, stable) # --- draw --- r = Reporter() r.draw(self.classifier_result, report_path=self.result_report_path) return True
def test_read_from_mem(): v = VideoObject(VIDEO_PATH) v.load_frames() count = 0 for f in v: assert isinstance(f, VideoFrame) count += 1 assert count == 30
def test_boost(): video = VideoObject(VIDEO_PATH) video.load_frames() # test cut res, data_home = _cut(video) # test classify classify_result = _classify(video, data_home) # --- draw --- r = Reporter() r.draw( classify_result, report_path=os.path.join(data_home, "report.html"), cut_result=res, )
def cut(self, video: typing.Union[str, VideoObject], *args, **kwargs) -> VideoCutResult: """ convert video file, into a VideoCutResult :param video: video file path or VideoObject :param kwargs: parameters of toolbox.compress_frame can be used here :return: """ start_time = time.time() if isinstance(video, str): video = VideoObject(video) logger.info(f"start cutting: {video.path}") # if video contains 100 frames # it starts from 1, and length of list is 99, not 100 # [Range(1-2), Range(2-3), Range(3-4) ... Range(99-100)] range_list = self._convert_video_into_range_list( video, *args, **kwargs) logger.info(f"cut finished: {video}") end_time = time.time() logger.debug(f"cutter cost: {end_time - start_time}") # TODO other analysis results can be added to VideoCutResult, such as AI cutter? return VideoCutResult(video, range_list, cut_kwargs=kwargs)
def classify( self, video: typing.Union[str, VideoObject], limit_range: typing.List[VideoCutRange] = None, step: int = None, *args, **kwargs, ) -> ClassifierResult: """ start classification :param video: path to target video or VideoObject :param limit_range: frames out of these ranges will be ignored :param step: step between frames, default to 1 :param args: :param kwargs: :return: """ logger.debug(f"classify with {self.__class__.__name__}") if not step: step = 1 final_result: typing.List[SingleClassifierResult] = list() if isinstance(video, str): video = VideoObject(video) operator = video.get_operator() frame = operator.get_frame_by_id(1) while frame is not None: if limit_range: if not any( [each.contain(frame.frame_id) for each in limit_range]): logger.debug( f"frame {frame.frame_id} ({frame.timestamp}) not in target range, skip" ) final_result.append( SingleClassifierResult( video.path, frame.frame_id, frame.timestamp, constants.IGNORE_FLAG, )) frame = operator.get_frame_by_id(frame.frame_id + step) continue # hook frame.data = self._apply_hook(frame.frame_id, frame.data, *args, **kwargs) result = self._classify_frame(frame, *args, **kwargs) logger.debug( f"frame {frame.frame_id} ({frame.timestamp}) belongs to {result}" ) final_result.append( SingleClassifierResult(video.path, frame.frame_id, frame.timestamp, result)) frame = operator.get_frame_by_id(frame.frame_id + step) return ClassifierResult(final_result)
def cut(self, video_path: str, *args, **kwargs) -> VideoCutResult: """ convert video file, into a VideoCutResult :param video_path: video file path :param kwargs: parameters of toolbox.compress_frame can be used here :return: """ logger.info(f'start cutting: {video_path}') video = VideoObject(video_path) # if video contains 100 frames # it starts from 1, and length of list is 99, not 100 # [Range(1-2), Range(2-3), Range(3-4) ... Range(99-100)] range_list = self._convert_video_into_range_list( video, *args, **kwargs) logger.info(f'cut finished: {video_path}') # TODO other analysis results can be added to VideoCutResult, such as AI cutter? return VideoCutResult( video, range_list, )
def test_pathlib_path(): v = VideoObject(VIDEO_PATHLIB_PATH) count = 0 for f in v: assert isinstance(f, VideoFrame) count += 1 assert count == 30
def _classify( video: typing.Union[str, VideoObject], data_home: str = None, model: str = None, # optional: these args below are sent for `cutter` compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, limit_range: typing.List[VideoCutRange] = None, ) -> ClassifierResult: """ classify a video with some tagged pictures optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal. :param video: video path or object :param data_home: output path (dir) :param model: LinearSVC model (path) :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param limit_range: :return: typing.List[ClassifierResult] """ if isinstance(video, str): video = VideoObject(video) assert data_home or model, "classification should based on dataset or trained model" cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size) if model: cl.load_model(model) else: cl.load(data_home) cl.train() return cl.classify(video, limit_range=limit_range)
def test_custom_ffmpeg(): from stagesepx import constants constants.FFMPEG = "unknown" try: VideoObject(VIDEO_PATH, fps=30) except FileNotFoundError: pass
def _diff( video_before: typing.Union[str, VideoObject], video_after: typing.Union[str, VideoObject], pre_hooks: typing.List[BaseHook] = None, *args, **kwargs, ): cutter = VideoCutter() if isinstance(video_before, str): video_before = VideoObject(video_before) video_before.load_frames() if isinstance(video_after, str): video_after = VideoObject(video_after) video_after.load_frames() res = cutter.cut(video_before) res1 = cutter.cut(video_after) return res.diff(res1, pre_hooks, *args, **kwargs)
def test_cut_result(): cutter = VideoCutter() v = VideoObject(VIDEO_PATH) res = cutter.cut(v) stable, _ = res.get_range() assert len(stable) == len(res.get_stable_range()) assert isinstance(res.diff(res, auto_merge=True), dict) assert isinstance(res.thumbnail(stable[0]), np.ndarray) assert isinstance(res.thumbnail(stable[0], is_vertical=True), np.ndarray) assert isinstance(res.thumbnail(stable[0], to_dir="somewhere"), np.ndarray)
def test_read_from_mem(): v = VideoObject(VIDEO_PATH) print(str(v)) v.load_frames() count = 0 for f in v: assert isinstance(f, VideoFrame) print(str(f)) count += 1 assert count == 30 v = VideoObject(VIDEO_PATH, pre_load=True) v.clean_frames() assert not v.data
def test_cut_result(): cutter = VideoCutter() v = VideoObject(VIDEO_PATH) res = cutter.cut(v) stable, _ = res.get_range() assert len(stable) == len(res.get_stable_range()) assert isinstance(res.diff(res, auto_merge=True), VideoCutResultDiff) assert isinstance(res.thumbnail(stable[0]), np.ndarray) assert isinstance(res.thumbnail(stable[0], is_vertical=True), np.ndarray) assert isinstance(res.thumbnail(stable[0], to_dir="somewhere"), np.ndarray) res.get_range_dynamic([4, 5], threshold=0.95) res.get_range_dynamic([1, 2], threshold=0.85)
def handle(self, video_path: str) -> bool: super(NormalHandler, self).handle(video_path) video = VideoObject(video_path) if self.preload: video.load_frames() # --- cutter --- cutter = VideoCutter() res = cutter.cut(video) stable, unstable = res.get_range(threshold=0.98, offset=3) data_home = res.pick_and_save(stable, self.frame_count, to_dir=self.result_path) # --- classify --- cl = SVMClassifier() cl.load(data_home) cl.train() self.classifier_result = cl.classify(video, stable) # --- draw --- r = Reporter() r.draw(self.classifier_result, report_path=self.result_report_path) return True
def classify( video: typing.Union[str, VideoObject], data_home: str = None, model: str = None, # optional: these args below are sent for `cutter` compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, offset: int = 3, limit: int = None, threshold: float = 0.95, ) -> ClassifierResult: """ classify a video with some tagged pictures optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal. :param video: video path or object :param data_home: output path (dir) :param model: LinearSVC model (path) :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param offset: it will change the way to decided whether two ranges can be merged before: first_range.end == second_range.start after: first_range.end + offset >= secord_range.start :param limit: ignore some ranges which are too short, 5 means ignore stable ranges which length < 5 :param threshold: cutter threshold :return: typing.List[ClassifierResult] """ if isinstance(video, str): video = VideoObject(video) assert data_home or model, "classification should based on dataset or trained model" cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size) if model: cl.load_model(model) else: cl.load(data_home) cl.train() # re cut cut_result, _ = cut(video, compress_rate=compress_rate, threshold=threshold) stable, _ = cut_result.get_range(offset=offset, limit=limit) return cl.classify(video, stable)
def _cut( video: typing.Union[str, VideoObject], output_path: str = None, threshold: float = constants.DEFAULT_THRESHOLD, frame_count: int = 5, compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, offset: int = 3, limit: int = None, ) -> typing.Tuple[VideoCutResult, str]: """ cut the video, and get series of pictures (with tag) :param video: video path or object :param output_path: output path (dir) :param threshold: float, 0-1, default to 0.98. decided whether a range is stable. larger => more unstable ranges :param frame_count: default to 5, and finally you will get 5 frames for each range :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param offset: it will change the way to decided whether two ranges can be merged before: first_range.end == second_range.start after: first_range.end + offset >= secord_range.start :param limit: ignore some ranges which are too short, 5 means ignore stable ranges which length < 5 :return: tuple, (VideoCutResult, data_home) """ if isinstance(video, str): video = VideoObject(video) cutter = VideoCutter() res = cutter.cut(video, compress_rate=compress_rate, target_size=target_size) stable, unstable = res.get_range(threshold=threshold, limit=limit, offset=offset) data_home = res.pick_and_save(stable, frame_count, to_dir=output_path) res_json_path = os.path.join(output_path or data_home, constants.CUT_RESULT_FILE_NAME) res.dump(res_json_path) return res, data_home
def __init__(self, video: typing.Union[VideoObject, typing.Dict], start: int, end: int, ssim: typing.List[float], start_time: float, end_time: float): if isinstance(video, dict): self.video = VideoObject(**video) else: self.video = video self.start = start self.end = end self.ssim = ssim self.start_time = start_time self.end_time = end_time # if length is 1 # https://github.com/williamfzc/stagesepx/issues/9 if start > end: self.start, self.end = self.end, self.start self.start_time, self.end_time = self.end_time, self.start_time
def cut( self, video: typing.Union[str, VideoObject], block: int = None, window_size: int = None, window_coefficient: int = None, *_, **kwargs, ) -> VideoCutResult: """ convert video file, into a VideoCutResult :param video: video file path or VideoObject :param block: default to 3. when block == 3, frame will be split into 3 * 3 = 9 parts :param window_size: :param window_coefficient: :return: """ # args if not block: block = 3 if not window_size: window_size = 1 if not window_coefficient: window_coefficient = 2 start_time = time.time() if isinstance(video, str): video = VideoObject(video) logger.info(f"start cutting: {video.path}") # if video contains 100 frames # it starts from 1, and length of list is 99, not 100 # [Range(1-2), Range(2-3), Range(3-4) ... Range(99-100)] range_list = self._convert_video_into_range_list( video, block, window_size, window_coefficient) logger.info(f"cut finished: {video}") end_time = time.time() logger.debug(f"cutter cost: {end_time - start_time}") # TODO other analysis results can be added to VideoCutResult, such as AI cutter? return VideoCutResult(video, range_list, cut_kwargs=kwargs)
def analyse( video: typing.Union[str, VideoObject], output_path: str, pre_load: bool = True, threshold: float = 0.98, offset: int = 3, boost_mode: bool = True, ): """ designed for https://github.com/williamfzc/stagesepx/issues/123 """ if isinstance(video, str): video = VideoObject(video, pre_load=pre_load) cutter = VideoCutter() res = cutter.cut(video) stable, unstable = res.get_range( threshold=threshold, offset=offset, ) with tempfile.TemporaryDirectory() as temp_dir: res.pick_and_save( stable, 5, to_dir=temp_dir, ) cl = SVMClassifier() cl.load(temp_dir) cl.train() classify_result = cl.classify(video, stable, boost_mode=boost_mode) r = Reporter() r.draw( classify_result, report_path=output_path, unstable_ranges=unstable, cut_result=res, )
def test_convert_first(): v = VideoObject(VIDEO_PATH, fps=30) v.load_frames() assert len(v.data) == 36
def loads(cls, content: str) -> "VideoCutResult": json_dict: dict = json.loads(content) return cls( VideoObject(**json_dict["video"]), [VideoCutRange(**each) for each in json_dict["range_list"]], )
from stagesepx.cutter import VideoCutter from stagesepx.classifier import SVMClassifier from stagesepx.reporter import Reporter from stagesepx.video import VideoObject video_path = "../videos/long.mp4" video = VideoObject(video_path) video.load_frames() # --- cutter --- cutter = VideoCutter() res = cutter.cut(video) stable, unstable = res.get_range() data_home = res.pick_and_save(stable, 5) # --- classify --- cl = SVMClassifier(compress_rate=0.4) cl.load(data_home) cl.train() classify_result = cl.classify(video, stable, keep_data=True) result_dict = classify_result.to_dict() # --- draw --- r = Reporter() r.draw(classify_result)
from stagesepx.cutter import VideoCutter, VideoCutResult from stagesepx.classifier import SVMClassifier from stagesepx.reporter import Reporter from stagesepx.hook import ExampleHook, CropHook, IgnoreHook import os video = "../demo.mp4" from stagesepx.video import VideoObject video = VideoObject( video, # fps 参数(>=0.9.0) # 结合 ffmpeg,在加载前对视频进行 fps 重整,使表现更加标准 # 需要预先安装 ffmpeg,并配置到环境变量中。即人工在命令行下运行 ffmpeg 有正常提示 # 例如 fps=30 即将视频转换为fps30的格式(不会覆盖原视频) # fps=30, ) # 预加载(>=0.8.0,会消耗一定内存) # 你可以利用视频预加载模式,大幅度提升分析速度 video.load_frames() # --- cut --- cutter = VideoCutter( # 步长,默认为1,通过它可以自行把握效率与颗粒度 # 设定为2时,会以2帧为一个单位进行遍历 # 即跳过一帧 step=1, # 默认为0.2,即将图片缩放为0.2倍 # 主要为了提高计算效率 # 如果你担心影响分析效果,可以将其提高
class VideoCutRange(object): def __init__( self, # TODO why can it be a dict? video: typing.Union[VideoObject, typing.Dict], start: int, end: int, # TODO need refactored ? ssim: typing.List[float], mse: typing.List[float], psnr: typing.List[float], start_time: float, end_time: float, ): if isinstance(video, dict): self.video = VideoObject(**video) else: self.video = video self.start = start self.end = end self.ssim = ssim self.mse = mse self.psnr = psnr self.start_time = start_time self.end_time = end_time # if length is 1 # https://github.com/williamfzc/stagesepx/issues/9 if start > end: self.start, self.end = self.end, self.start self.start_time, self.end_time = self.end_time, self.start_time logger.debug( f"new a range: {self.start}({self.start_time}) - {self.end}({self.end_time})" ) def can_merge(self, another: "VideoCutRange", offset: int = None, **_): if not offset: is_continuous = self.end == another.start else: is_continuous = self.end + offset >= another.start return is_continuous and self.video.path == another.video.path def merge(self, another: "VideoCutRange", **kwargs) -> "VideoCutRange": assert self.can_merge(another, **kwargs) return __class__( self.video, self.start, another.end, self.ssim + another.ssim, self.mse + another.mse, self.psnr + another.psnr, self.start_time, another.end_time, ) def contain(self, frame_id: int) -> bool: # in python: # range(0, 10) => [0, 10) # range(0, 10 + 1) => [0, 10] return frame_id in range(self.start, self.end + 1) # alias contain_frame_id = contain def contain_image(self, image_path: str = None, image_object: np.ndarray = None, *args, **kwargs) -> typing.Dict[str, typing.Any]: # todo pick only one picture? target_id = self.pick(*args, **kwargs)[0] operator = self.video.get_operator() frame = operator.get_frame_by_id(target_id) return frame.contain_image(image_path=image_path, image_object=image_object, **kwargs) def pick(self, frame_count: int = None, is_random: bool = None, *_, **__) -> typing.List[int]: if not frame_count: frame_count = 3 logger.debug(f"pick {frame_count} frames " f"from {self.start}({self.start_time}) " f"to {self.end}({self.end_time}) " f"on video {self.video.path}") result = list() if is_random: return random.sample(range(self.start, self.end), frame_count) length = self.get_length() # https://github.com/williamfzc/stagesepx/issues/37 frame_count += 1 for _ in range(1, frame_count): cur = int(self.start + length / frame_count * _) result.append(cur) return result def get_frames(self, frame_id_list: typing.List[int], *_, **__) -> typing.List[VideoFrame]: """ return a list of VideoFrame, usually works with pick """ out = list() operator = self.video.get_operator() for each_id in frame_id_list: frame = operator.get_frame_by_id(each_id) out.append(frame) return out def pick_and_get(self, *args, **kwargs) -> typing.List[VideoFrame]: picked = self.pick(*args, **kwargs) return self.get_frames(picked, *args, **kwargs) def get_length(self): return self.end - self.start + 1 def is_stable(self, threshold: float = None, psnr_threshold: float = None, **_) -> bool: # IMPORTANT function! # it decided whether a range is stable => everything is based on it! if not threshold: threshold = 0.95 # ssim res = np.mean(self.ssim) > threshold # psnr (double check if stable) if res and psnr_threshold: res = np.mean(self.psnr) > psnr_threshold return res def is_loop(self, threshold: float = None, **_) -> bool: if not threshold: threshold = 0.95 operator = self.video.get_operator() start_frame = operator.get_frame_by_id(self.start) end_frame = operator.get_frame_by_id(self.end) return toolbox.compare_ssim(start_frame.data, end_frame.data) > threshold def diff(self, another: "VideoCutRange", *args, **kwargs) -> typing.List[float]: self_picked = self.pick_and_get(*args, **kwargs) another_picked = another.pick_and_get(*args, **kwargs) return toolbox.multi_compare_ssim(self_picked, another_picked) def __str__(self): return f"<VideoCutRange [{self.start}({self.start_time})-{self.end}({self.end_time})] ssim={self.ssim}>" __repr__ = __str__
def test_contain_image(): v = VideoObject(VIDEO_PATH) v.load_frames() ret = v.data[0].contain_image(image_path=IMAGE_PATH) assert ret["ok"]
def one_step( video: typing.Union[str, VideoObject], output_path: str = None, threshold: float = 0.95, frame_count: int = 5, compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, offset: int = 3, limit: int = None, ): """ one step => cut, classifier, draw :param video: video path or object :param output_path: output path (dir) :param threshold: float, 0-1, default to 0.95. decided whether a range is stable. larger => more unstable ranges :param frame_count: default to 5, and finally you will get 5 frames for each range :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param offset: it will change the way to decided whether two ranges can be merged before: first_range.end == second_range.start after: first_range.end + offset >= secord_range.start :param limit: ignore some ranges which are too short, 5 means ignore stable ranges which length < 5 :return: """ if isinstance(video, str): video = VideoObject(video) # --- cutter --- res, data_home = _cut( video, output_path, threshold=threshold, frame_count=frame_count, compress_rate=compress_rate, target_size=target_size, offset=offset, limit=limit, ) stable, _ = res.get_range(threshold=threshold, limit=limit, offset=offset) # --- classify --- classify_result = _classify( video, data_home=data_home, compress_rate=compress_rate, target_size=target_size, limit_range=stable, ) # --- draw --- r = Reporter() r.draw( classify_result, report_path=os.path.join(data_home, constants.REPORT_FILE_NAME), cut_result=res, # kwargs of get_range # otherwise these thumbnails may become different threshold=threshold, limit=limit, offset=offset, )
def _convert_video_into_range_list(self, video: VideoObject, block: int = None, *args, **kwargs) -> typing.List[VideoCutRange]: range_list: typing.List[VideoCutRange] = list() logger.info( f"total frame count: {video.frame_count}, size: {video.frame_size}" ) # load the first two frames video_operator = video.get_operator() cur_frame = video_operator.get_frame_by_id(1) next_frame = video_operator.get_frame_by_id(1 + self.step) # hook cur_frame.data = self._apply_hook(cur_frame.frame_id, cur_frame.data) # check block if not block: block = 2 if not self.is_block_valid(cur_frame.data, block): logger.warning( "array split does not result in an equal division, set block to 1" ) block = 1 while True: # hook next_frame.data = self._apply_hook(next_frame.frame_id, next_frame.data, *args, **kwargs) logger.debug( f"computing {cur_frame.frame_id}({cur_frame.timestamp}) & {next_frame.frame_id}({next_frame.timestamp}) ..." ) start_part_list = self.pic_split(cur_frame.data, block) end_part_list = self.pic_split(next_frame.data, block) # find the min ssim and the max mse / psnr ssim = 1.0 mse = 0.0 psnr = 0.0 for part_index, (each_start, each_end) in enumerate( zip(start_part_list, end_part_list)): part_ssim = toolbox.compare_ssim(each_start, each_end) if part_ssim < ssim: ssim = part_ssim # mse is very sensitive part_mse = toolbox.calc_mse(each_start, each_end) if part_mse > mse: mse = part_mse part_psnr = toolbox.calc_psnr(each_start, each_end) if part_psnr > psnr: psnr = part_psnr logger.debug( f"part {part_index}: ssim={part_ssim}; mse={part_mse}; psnr={part_psnr}" ) logger.debug( f"between {cur_frame.frame_id} & {next_frame.frame_id}: ssim={ssim}; mse={mse}; psnr={psnr}" ) range_list.append( VideoCutRange( video, start=cur_frame.frame_id, end=next_frame.frame_id, ssim=[ssim], mse=[mse], psnr=[psnr], start_time=cur_frame.timestamp, end_time=next_frame.timestamp, )) # load the next one cur_frame = next_frame next_frame = video_operator.get_frame_by_id(next_frame.frame_id + self.step) if next_frame is None: break return range_list
def test_preload_with_hook(): v = VideoObject(VIDEO_PATH) hook = ExampleHook() v.add_preload_hook(hook) v.load_frames()