def _classify( video: typing.Union[str, VideoObject], data_home: str = None, model: str = None, # optional: these args below are sent for `cutter` compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, limit_range: typing.List[VideoCutRange] = None, ) -> ClassifierResult: """ classify a video with some tagged pictures optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal. :param video: video path or object :param data_home: output path (dir) :param model: LinearSVC model (path) :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param limit_range: :return: typing.List[ClassifierResult] """ if isinstance(video, str): video = VideoObject(video) assert data_home or model, "classification should based on dataset or trained model" cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size) if model: cl.load_model(model) else: cl.load(data_home) cl.train() return cl.classify(video, limit_range=limit_range)
def test_work_with_cutter(): cl = SVMClassifier() cl.load_model(MODEL_PATH) stable, _ = cutter_res.get_range() classify_result = cl.classify(VIDEO_PATH, stable) # --- draw --- _draw_report(classify_result)
def test_save_and_load(): # test save and load cl = SVMClassifier() cl.load_model(MODEL_PATH) classify_result = cl.classify(VIDEO_PATH) # --- draw --- _draw_report(classify_result)
def test_keep_data(): cl = SVMClassifier() cl.load_model(MODEL_PATH) stable, _ = cutter_res.get_range() classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True) # todo findit bug here image_object = toolbox.imread(IMAGE_PATH)[0:20, 0:20] assert classify_result.data[0].contain_image(image_object=image_object)
def test_dump_and_load(): cl = SVMClassifier() cl.load_model(MODEL_PATH) classify_result = cl.classify(VIDEO_PATH, boost_mode=False) json_path = "classify_result.json" classify_result.dump(json_path) res_from_file = ClassifierResult.load(json_path) assert classify_result.dumps() == res_from_file.dumps()
def test_result(): cl = SVMClassifier() cl.load_model(MODEL_PATH) stable, _ = cutter_res.get_range() classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True) assert classify_result.to_dict() classify_result.mark_range(1, 3, "0") classify_result.mark_range_unstable(1, 3) classify_result.get_important_frame_list()
def test_save_and_load(): cl = SVMClassifier() cl.load_model(MODEL_PATH) classify_result = cl.classify(VIDEO_PATH) result_file = "save.json" reporter = Reporter() reporter.save(result_file, classify_result) assert os.path.isfile(result_file) classify_result_after = Reporter.load(result_file) assert len(classify_result) == len(classify_result_after) for i, j in zip(classify_result, classify_result_after): assert i.to_dict() == j.to_dict()
def test_result(): cl = SVMClassifier() cl.load_model(MODEL_PATH) stable, _ = cutter_res.get_range() classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True) assert classify_result.to_dict() classify_result.mark_range(1, 3, "0") classify_result.mark_range_unstable(1, 3) classify_result.get_important_frame_list() classify_result.get_stage_range() classify_result.get_specific_stage_range("0") classify_result.get_not_stable_stage_range() assert classify_result.first("1").frame_id == 20 assert classify_result.last("1").frame_id == 21
def test_save_and_load(): cl = SVMClassifier() cl.load_model(MODEL_PATH) classify_result = cl.classify(VIDEO_PATH, boost_mode=False) result_file = "save.json" reporter = Reporter() reporter.add_extra("some_name", "some_value") reporter.save(result_file, classify_result) assert os.path.isfile(result_file) classify_result_after = Reporter.load(result_file) assert classify_result.get_length() == classify_result_after.get_length() for i, j in zip(classify_result.data, classify_result_after.data): assert i.to_dict() == j.to_dict() assert isinstance(reporter.get_stable_stage_sample(classify_result), np.ndarray)
def classify( video: typing.Union[str, VideoObject], data_home: str = None, model: str = None, # optional: these args below are sent for `cutter` compress_rate: float = 0.2, target_size: typing.Tuple[int, int] = None, offset: int = 3, limit: int = None, threshold: float = 0.95, ) -> ClassifierResult: """ classify a video with some tagged pictures optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal. :param video: video path or object :param data_home: output path (dir) :param model: LinearSVC model (path) :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2 :param target_size: (100, 200) :param offset: it will change the way to decided whether two ranges can be merged before: first_range.end == second_range.start after: first_range.end + offset >= secord_range.start :param limit: ignore some ranges which are too short, 5 means ignore stable ranges which length < 5 :param threshold: cutter threshold :return: typing.List[ClassifierResult] """ if isinstance(video, str): video = VideoObject(video) assert data_home or model, "classification should based on dataset or trained model" cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size) if model: cl.load_model(model) else: cl.load(data_home) cl.train() # re cut cut_result, _ = cut(video, compress_rate=compress_rate, threshold=threshold) stable, _ = cut_result.get_range(offset=offset, limit=limit) return cl.classify(video, stable)
def calculate_result(_cl, _SVM_or_Keras, _param, _from_movie_2_picture, _model_file, _video_path_for_forecast): if _SVM_or_Keras == '1\n': print('使用SVM进行预测') cl = SVMClassifier( # 默认情况下使用 HoG 进行特征提取。你可以将其关闭从而直接对原始图片进行训练与测试:feature_type='raw' feature_type="hog", # 默认为0.2,即将图片缩放为0.2倍。主要为了提高计算效率,如果你担心影响分析效果,可以将其提高 compress_rate=_param[0], ) # 加载待训练数据 cl.load_model(_model_file) elif _SVM_or_Keras == '2\n': # 分析视频_聚类 print('使用KerasClassifier进行预测') cl = KerasClassifier( compress_rate=_param[0], # 在使用时需要保证数据集格式统一(与训练集)。因为 train_model.py 用了 600x800,所以这里设定成一样的 # target_size=(600, 800), ) cl.load_model(_model_file) # 开始预测 _forecast_result = [] # 获取forecast文件夹的mp4文件列表 forecast_video_list = public_fun.get_mp4file_name(_video_path_for_forecast) for i in forecast_video_list: # 分析视频_切割视频 stable = get_data.get_range('forecast', i, _param, _from_movie_2_picture) classify_result = cl.classify(i, stable, keep_data=True) result_dict = classify_result.to_dict() _forecast_result.append( public_fun.write_result_to_local(i, _from_movie_2_picture, result_dict, classify_result)) # _forecast_result = [['5.mp4', '3.161888888888889', '1', # '4.155888888888889', '1', '8.040888888888889', '1']] return _forecast_result
def test_result(): cl = SVMClassifier() cl.load_model(MODEL_PATH) stable, _ = cutter_res.get_range() classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True) assert classify_result.to_dict() classify_result.mark_range(1, 3, "0") classify_result.mark_range_unstable(1, 3) classify_result.get_important_frame_list() classify_result.get_stage_range() classify_result.get_specific_stage_range("0") classify_result.get_not_stable_stage_range() classify_result.mark_range_ignore(23, 24) classify_result.time_cost_between("0", "1") assert classify_result.contain("1") assert classify_result.first("1").frame_id == 20 assert classify_result.last("1").frame_id == 21 assert classify_result.is_order_correct(["0", "0", "1", "2"]) assert classify_result.is_order_correct(["0", "0", "2"]) assert classify_result.is_order_correct(["0", "1"]) assert classify_result.is_order_correct(["0", "2"]) assert classify_result.is_order_correct(["1", "2"])
def test_boost(): cl = SVMClassifier() cl.load_model(MODEL_PATH) classify_result = cl.classify(VIDEO_PATH, boost_mode=True) assert classify_result
""" 二次训练 在业务变更时,难免出现需要调整模型的情况 - 如果业务变更较大,推荐重新训练新模型 - 如果业务变更不大,可以按照下面的方法对原有模型进行调整 """ from stagesepx.classifier import SVMClassifier DATA_HOME = './cut_result' cl = SVMClassifier() # 加载数据 cl.load(DATA_HOME) # 加载旧模型 cl.load_model('model.pkl') # 在加载数据完成之后需要先训练 cl.train() # 保存新模型 cl.save_model('new_model.pkl') # 或者你可以直接覆盖掉旧的模型 # cl.save_model('model.pkl', overwrite=True)
from stagesepx.classifier import SVMClassifier from stagesepx.cutter import VideoCutter from stagesepx.reporter import Reporter TARGET_VIDEO = '../../demo.mp4' # cut # 这里依旧使用了 cut,主要目的还是为了可以比较好的处理变化中的过程 # 但这次我们不需要用到 pick_and_save,因为这次 classifier 不会使用 cutter 的数据 cutter = VideoCutter() res = cutter.cut(TARGET_VIDEO) stable, _ = res.get_range() # classify # 这里的参数需要保持与train.py一致,如果你有改动的话 cl = SVMClassifier() cl.load_model('./model.pkl') classify_result = cl.classify( TARGET_VIDEO, stable, ) r = Reporter() r.draw( classify_result, report_path='report.html', cut_result=res, )
def run(config: typing.Union[dict, str]): """ run with config :param config: config file path, or a preload dict :return: """ class _VideoUserConfig(BaseModel): path: str pre_load: bool = True fps: int = None class _CutterUserConfig(BaseModel): threshold: float = None frame_count: int = None offset: int = None limit: int = None block: int = None # common compress_rate: float = None target_size: typing.Tuple[int, int] = None class _ClassifierType(Enum): SVM = "svm" KERAS = "keras" class _ClassifierUserConfig(BaseModel): boost_mode: bool = None classifier_type: _ClassifierType = _ClassifierType.SVM model: str = None # common compress_rate: float = None target_size: typing.Tuple[int, int] = None class _CalcOperatorType(Enum): BETWEEN = "between" DISPLAY = "display" class _CalcOperator(BaseModel): name: str calc_type: _CalcOperatorType args: dict = dict() class _CalcUserConfig(BaseModel): output: str = None ignore_error: bool = None operators: typing.List[_CalcOperator] = None class _ExtraUserConfig(BaseModel): save_train_set: str = None class UserConfig(BaseModel): output: str video: _VideoUserConfig cutter: _CutterUserConfig = _CutterUserConfig() classifier: _ClassifierUserConfig = _ClassifierUserConfig() calc: _CalcUserConfig = _CalcUserConfig() extras: _ExtraUserConfig = _ExtraUserConfig() if isinstance(config, str): # path config_path = pathlib.Path(config) assert config_path.is_file(), f"no config file found in {config_path}" # todo: support different types in the future assert config_path.as_posix().endswith( ".json"), "config file should be json format" with open(config_path, encoding=constants.CHARSET) as f: config = json.load(f) config = UserConfig(**config) logger.info(f"config: {config}") # main flow video = VideoObject( # fmt: off path=config.video.path, fps=config.video.fps, ) if config.video.pre_load: video.load_frames() # cut cutter = VideoCutter( # fmt: off compress_rate=config.cutter.compress_rate, target_size=config.cutter.target_size, ) res = cutter.cut( # fmt: off video=video, block=config.cutter.block, ) stable, unstable = res.get_range( # fmt: off threshold=config.cutter.threshold, offset=config.cutter.offset, ) with tempfile.TemporaryDirectory() as temp_dir: # classify if config.classifier.classifier_type is _ClassifierType.SVM: cl = SVMClassifier( # fmt: off compress_rate=config.classifier.compress_rate, target_size=config.classifier.target_size, ) elif config.classifier.classifier_type is _ClassifierType.KERAS: from stagesepx.classifier.keras import KerasClassifier cl = KerasClassifier( # fmt: off compress_rate=config.classifier.compress_rate, target_size=config.classifier.target_size, ) # validation has been applied by pydantic # so no `else` if config.classifier.model: # no need to retrain model_path = pathlib.Path(config.classifier.model) assert model_path.is_file(), f"file {model_path} not existed" cl.load_model(model_path) else: # train a new model train_set_dir = config.extras.save_train_set or temp_dir os.makedirs(train_set_dir, exist_ok=True) res.pick_and_save( # fmt: off stable, frame_count=config.cutter.frame_count, to_dir=train_set_dir, ) cl.train(data_path=train_set_dir) # start classifying classify_result = cl.classify( # fmt: off video, stable, boost_mode=config.classifier.boost_mode, ) # calc def _calc_display() -> dict: # jsonify return json.loads(classify_result.dumps()) def _calc_between(*, from_stage: str = None, to_stage: str = None) -> dict: assert classify_result.contain( from_stage), f"no stage {from_stage} found in result" assert classify_result.contain( to_stage), f"no stage {to_stage} found in result" from_frame = classify_result.last(from_stage) to_frame = classify_result.first(to_stage) cost = to_frame.timestamp - from_frame.timestamp return { "from": from_frame.frame_id, "to": to_frame.frame_id, "cost": cost, } _calc_func_dict = { _CalcOperatorType.BETWEEN: _calc_between, _CalcOperatorType.DISPLAY: _calc_display, } calc_output = config.calc.output if calc_output: output_path = pathlib.Path(calc_output) assert not output_path.is_file(), f"file {output_path} already existed" result = [] for each_calc in config.calc.operators: func = _calc_func_dict[each_calc.calc_type] try: func_ret = func(**each_calc.args) except Exception as e: if not config.calc.ignore_error: raise logger.warning(e) func_ret = traceback.format_exc() calc_ret = { "name": each_calc.name, "type": each_calc.calc_type.value, "result": func_ret, } result.append(calc_ret) with open(output_path, "w", encoding=constants.CHARSET) as f: json.dump(result, f) # draw r = Reporter() r.draw( # fmt: off classify_result, report_path=config.output, )