示例#1
0
def _classify(
    video: typing.Union[str, VideoObject],
    data_home: str = None,
    model: str = None,
    # optional: these args below are sent for `cutter`
    compress_rate: float = 0.2,
    target_size: typing.Tuple[int, int] = None,
    limit_range: typing.List[VideoCutRange] = None,
) -> ClassifierResult:
    """
    classify a video with some tagged pictures
    optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal.

    :param video: video path or object
    :param data_home: output path (dir)
    :param model: LinearSVC model (path)
    :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2
    :param target_size: (100, 200)
    :param limit_range:

    :return: typing.List[ClassifierResult]
    """
    if isinstance(video, str):
        video = VideoObject(video)

    assert data_home or model, "classification should based on dataset or trained model"
    cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size)

    if model:
        cl.load_model(model)
    else:
        cl.load(data_home)
        cl.train()
    return cl.classify(video, limit_range=limit_range)
def test_work_with_cutter():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    stable, _ = cutter_res.get_range()
    classify_result = cl.classify(VIDEO_PATH, stable)

    # --- draw ---
    _draw_report(classify_result)
示例#3
0
def test_save_and_load():
    # test save and load
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    classify_result = cl.classify(VIDEO_PATH)

    # --- draw ---
    _draw_report(classify_result)
def test_keep_data():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    stable, _ = cutter_res.get_range()
    classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True)

    # todo findit bug here
    image_object = toolbox.imread(IMAGE_PATH)[0:20, 0:20]
    assert classify_result.data[0].contain_image(image_object=image_object)
def test_dump_and_load():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    classify_result = cl.classify(VIDEO_PATH, boost_mode=False)

    json_path = "classify_result.json"
    classify_result.dump(json_path)

    res_from_file = ClassifierResult.load(json_path)
    assert classify_result.dumps() == res_from_file.dumps()
示例#6
0
def test_result():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    stable, _ = cutter_res.get_range()
    classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True)

    assert classify_result.to_dict()
    classify_result.mark_range(1, 3, "0")
    classify_result.mark_range_unstable(1, 3)
    classify_result.get_important_frame_list()
def test_save_and_load():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    classify_result = cl.classify(VIDEO_PATH)

    result_file = "save.json"
    reporter = Reporter()
    reporter.save(result_file, classify_result)
    assert os.path.isfile(result_file)
    classify_result_after = Reporter.load(result_file)

    assert len(classify_result) == len(classify_result_after)
    for i, j in zip(classify_result, classify_result_after):
        assert i.to_dict() == j.to_dict()
示例#8
0
def test_result():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    stable, _ = cutter_res.get_range()
    classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True)

    assert classify_result.to_dict()
    classify_result.mark_range(1, 3, "0")
    classify_result.mark_range_unstable(1, 3)
    classify_result.get_important_frame_list()
    classify_result.get_stage_range()
    classify_result.get_specific_stage_range("0")
    classify_result.get_not_stable_stage_range()
    assert classify_result.first("1").frame_id == 20
    assert classify_result.last("1").frame_id == 21
def test_save_and_load():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    classify_result = cl.classify(VIDEO_PATH, boost_mode=False)

    result_file = "save.json"
    reporter = Reporter()
    reporter.add_extra("some_name", "some_value")
    reporter.save(result_file, classify_result)
    assert os.path.isfile(result_file)
    classify_result_after = Reporter.load(result_file)

    assert classify_result.get_length() == classify_result_after.get_length()
    for i, j in zip(classify_result.data, classify_result_after.data):
        assert i.to_dict() == j.to_dict()

    assert isinstance(reporter.get_stable_stage_sample(classify_result),
                      np.ndarray)
示例#10
0
def classify(
    video: typing.Union[str, VideoObject],
    data_home: str = None,
    model: str = None,
    # optional: these args below are sent for `cutter`
    compress_rate: float = 0.2,
    target_size: typing.Tuple[int, int] = None,
    offset: int = 3,
    limit: int = None,
    threshold: float = 0.95,
) -> ClassifierResult:
    """
    classify a video with some tagged pictures
    optional: if you have changed the default value in `cut`, you'd better keep them(offset and limit) equal.

    :param video: video path or object
    :param data_home: output path (dir)
    :param model: LinearSVC model (path)
    :param compress_rate: before_pic * compress_rate = after_pic. default to 0.2
    :param target_size: (100, 200)
    :param offset:
        it will change the way to decided whether two ranges can be merged
        before: first_range.end == second_range.start
        after: first_range.end + offset >= secord_range.start
    :param limit: ignore some ranges which are too short, 5 means ignore stable ranges which length < 5
    :param threshold: cutter threshold

    :return: typing.List[ClassifierResult]
    """
    if isinstance(video, str):
        video = VideoObject(video)

    assert data_home or model, "classification should based on dataset or trained model"
    cl = SVMClassifier(compress_rate=compress_rate, target_size=target_size)

    if model:
        cl.load_model(model)
    else:
        cl.load(data_home)
        cl.train()
    # re cut
    cut_result, _ = cut(video, compress_rate=compress_rate, threshold=threshold)
    stable, _ = cut_result.get_range(offset=offset, limit=limit)
    return cl.classify(video, stable)
示例#11
0
def calculate_result(_cl, _SVM_or_Keras, _param, _from_movie_2_picture,
                     _model_file, _video_path_for_forecast):

    if _SVM_or_Keras == '1\n':
        print('使用SVM进行预测')
        cl = SVMClassifier(
            # 默认情况下使用 HoG 进行特征提取。你可以将其关闭从而直接对原始图片进行训练与测试:feature_type='raw'
            feature_type="hog",
            # 默认为0.2,即将图片缩放为0.2倍。主要为了提高计算效率,如果你担心影响分析效果,可以将其提高
            compress_rate=_param[0],
        )
        # 加载待训练数据
        cl.load_model(_model_file)
    elif _SVM_or_Keras == '2\n':
        # 分析视频_聚类
        print('使用KerasClassifier进行预测')
        cl = KerasClassifier(
            compress_rate=_param[0],
            # 在使用时需要保证数据集格式统一(与训练集)。因为 train_model.py 用了 600x800,所以这里设定成一样的
            # target_size=(600, 800),
        )
        cl.load_model(_model_file)

    # 开始预测
    _forecast_result = []

    # 获取forecast文件夹的mp4文件列表
    forecast_video_list = public_fun.get_mp4file_name(_video_path_for_forecast)

    for i in forecast_video_list:
        # 分析视频_切割视频
        stable = get_data.get_range('forecast', i, _param,
                                    _from_movie_2_picture)
        classify_result = cl.classify(i, stable, keep_data=True)
        result_dict = classify_result.to_dict()

        _forecast_result.append(
            public_fun.write_result_to_local(i, _from_movie_2_picture,
                                             result_dict, classify_result))
        # _forecast_result = [['5.mp4', '3.161888888888889', '1',
        # '4.155888888888889', '1', '8.040888888888889', '1']]

    return _forecast_result
示例#12
0
def test_result():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    stable, _ = cutter_res.get_range()
    classify_result = cl.classify(VIDEO_PATH, stable, keep_data=True)

    assert classify_result.to_dict()
    classify_result.mark_range(1, 3, "0")
    classify_result.mark_range_unstable(1, 3)
    classify_result.get_important_frame_list()
    classify_result.get_stage_range()
    classify_result.get_specific_stage_range("0")
    classify_result.get_not_stable_stage_range()
    classify_result.mark_range_ignore(23, 24)
    classify_result.time_cost_between("0", "1")
    assert classify_result.contain("1")
    assert classify_result.first("1").frame_id == 20
    assert classify_result.last("1").frame_id == 21
    assert classify_result.is_order_correct(["0", "0", "1", "2"])
    assert classify_result.is_order_correct(["0", "0", "2"])
    assert classify_result.is_order_correct(["0", "1"])
    assert classify_result.is_order_correct(["0", "2"])
    assert classify_result.is_order_correct(["1", "2"])
示例#13
0
def test_boost():
    cl = SVMClassifier()
    cl.load_model(MODEL_PATH)
    classify_result = cl.classify(VIDEO_PATH, boost_mode=True)
    assert classify_result
示例#14
0
"""
二次训练

在业务变更时,难免出现需要调整模型的情况
- 如果业务变更较大,推荐重新训练新模型
- 如果业务变更不大,可以按照下面的方法对原有模型进行调整
"""

from stagesepx.classifier import SVMClassifier

DATA_HOME = './cut_result'
cl = SVMClassifier()

# 加载数据
cl.load(DATA_HOME)
# 加载旧模型
cl.load_model('model.pkl')
# 在加载数据完成之后需要先训练
cl.train()
# 保存新模型
cl.save_model('new_model.pkl')

# 或者你可以直接覆盖掉旧的模型
# cl.save_model('model.pkl', overwrite=True)
示例#15
0
from stagesepx.classifier import SVMClassifier
from stagesepx.cutter import VideoCutter
from stagesepx.reporter import Reporter

TARGET_VIDEO = '../../demo.mp4'

# cut
# 这里依旧使用了 cut,主要目的还是为了可以比较好的处理变化中的过程
# 但这次我们不需要用到 pick_and_save,因为这次 classifier 不会使用 cutter 的数据
cutter = VideoCutter()
res = cutter.cut(TARGET_VIDEO)
stable, _ = res.get_range()

# classify
# 这里的参数需要保持与train.py一致,如果你有改动的话
cl = SVMClassifier()
cl.load_model('./model.pkl')

classify_result = cl.classify(
    TARGET_VIDEO,
    stable,
)

r = Reporter()
r.draw(
    classify_result,
    report_path='report.html',
    cut_result=res,
)
示例#16
0
def run(config: typing.Union[dict, str]):
    """
    run with config

    :param config: config file path, or a preload dict
    :return:
    """
    class _VideoUserConfig(BaseModel):
        path: str
        pre_load: bool = True
        fps: int = None

    class _CutterUserConfig(BaseModel):
        threshold: float = None
        frame_count: int = None
        offset: int = None
        limit: int = None
        block: int = None

        # common
        compress_rate: float = None
        target_size: typing.Tuple[int, int] = None

    class _ClassifierType(Enum):
        SVM = "svm"
        KERAS = "keras"

    class _ClassifierUserConfig(BaseModel):
        boost_mode: bool = None
        classifier_type: _ClassifierType = _ClassifierType.SVM
        model: str = None

        # common
        compress_rate: float = None
        target_size: typing.Tuple[int, int] = None

    class _CalcOperatorType(Enum):
        BETWEEN = "between"
        DISPLAY = "display"

    class _CalcOperator(BaseModel):
        name: str
        calc_type: _CalcOperatorType
        args: dict = dict()

    class _CalcUserConfig(BaseModel):
        output: str = None
        ignore_error: bool = None
        operators: typing.List[_CalcOperator] = None

    class _ExtraUserConfig(BaseModel):
        save_train_set: str = None

    class UserConfig(BaseModel):
        output: str
        video: _VideoUserConfig
        cutter: _CutterUserConfig = _CutterUserConfig()
        classifier: _ClassifierUserConfig = _ClassifierUserConfig()
        calc: _CalcUserConfig = _CalcUserConfig()
        extras: _ExtraUserConfig = _ExtraUserConfig()

    if isinstance(config, str):
        # path
        config_path = pathlib.Path(config)
        assert config_path.is_file(), f"no config file found in {config_path}"

        # todo: support different types in the future
        assert config_path.as_posix().endswith(
            ".json"), "config file should be json format"
        with open(config_path, encoding=constants.CHARSET) as f:
            config = json.load(f)

    config = UserConfig(**config)
    logger.info(f"config: {config}")

    # main flow
    video = VideoObject(
        # fmt: off
        path=config.video.path,
        fps=config.video.fps,
    )
    if config.video.pre_load:
        video.load_frames()

    # cut
    cutter = VideoCutter(
        # fmt: off
        compress_rate=config.cutter.compress_rate,
        target_size=config.cutter.target_size,
    )
    res = cutter.cut(
        # fmt: off
        video=video,
        block=config.cutter.block,
    )
    stable, unstable = res.get_range(
        # fmt: off
        threshold=config.cutter.threshold,
        offset=config.cutter.offset,
    )

    with tempfile.TemporaryDirectory() as temp_dir:
        # classify
        if config.classifier.classifier_type is _ClassifierType.SVM:
            cl = SVMClassifier(
                # fmt: off
                compress_rate=config.classifier.compress_rate,
                target_size=config.classifier.target_size,
            )
        elif config.classifier.classifier_type is _ClassifierType.KERAS:
            from stagesepx.classifier.keras import KerasClassifier

            cl = KerasClassifier(
                # fmt: off
                compress_rate=config.classifier.compress_rate,
                target_size=config.classifier.target_size,
            )
        # validation has been applied by pydantic
        # so no `else`

        if config.classifier.model:
            # no need to retrain
            model_path = pathlib.Path(config.classifier.model)
            assert model_path.is_file(), f"file {model_path} not existed"
            cl.load_model(model_path)
        else:
            # train a new model
            train_set_dir = config.extras.save_train_set or temp_dir
            os.makedirs(train_set_dir, exist_ok=True)

            res.pick_and_save(
                # fmt: off
                stable,
                frame_count=config.cutter.frame_count,
                to_dir=train_set_dir,
            )
            cl.train(data_path=train_set_dir)

    # start classifying
    classify_result = cl.classify(
        # fmt: off
        video,
        stable,
        boost_mode=config.classifier.boost_mode,
    )

    # calc
    def _calc_display() -> dict:
        # jsonify
        return json.loads(classify_result.dumps())

    def _calc_between(*, from_stage: str = None, to_stage: str = None) -> dict:
        assert classify_result.contain(
            from_stage), f"no stage {from_stage} found in result"
        assert classify_result.contain(
            to_stage), f"no stage {to_stage} found in result"
        from_frame = classify_result.last(from_stage)
        to_frame = classify_result.first(to_stage)
        cost = to_frame.timestamp - from_frame.timestamp
        return {
            "from": from_frame.frame_id,
            "to": to_frame.frame_id,
            "cost": cost,
        }

    _calc_func_dict = {
        _CalcOperatorType.BETWEEN: _calc_between,
        _CalcOperatorType.DISPLAY: _calc_display,
    }
    calc_output = config.calc.output
    if calc_output:
        output_path = pathlib.Path(calc_output)
        assert not output_path.is_file(), f"file {output_path} already existed"
        result = []
        for each_calc in config.calc.operators:
            func = _calc_func_dict[each_calc.calc_type]
            try:
                func_ret = func(**each_calc.args)
            except Exception as e:
                if not config.calc.ignore_error:
                    raise
                logger.warning(e)
                func_ret = traceback.format_exc()
            calc_ret = {
                "name": each_calc.name,
                "type": each_calc.calc_type.value,
                "result": func_ret,
            }
            result.append(calc_ret)
        with open(output_path, "w", encoding=constants.CHARSET) as f:
            json.dump(result, f)

    # draw
    r = Reporter()
    r.draw(
        # fmt: off
        classify_result,
        report_path=config.output,
    )