Пример #1
0
    def Model_Params(self, config_file, checkpoint_file, use_gpu=True):
        self.system_dict["local"]["config_file"] = config_file
        self.system_dict["local"]["checkpoint_file"] = checkpoint_file

        if (use_gpu):
            self.system_dict["local"]["model"] = init_recognizer(
                config_file, checkpoint_file, device='cuda')
        else:
            self.system_dict["local"]["model"] = init_recognizer(
                config_file, checkpoint_file, device='cpu')
Пример #2
0
def test_frames_inference_recognizer():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    rgb_model = init_recognizer(frame_config_file, None, device)
    flow_model = init_recognizer(flow_frame_config_file, None, device)

    with pytest.raises(RuntimeError):
        # video path doesn't exist
        inference_recognizer(rgb_model, 'missing_path')

    for ops in rgb_model.cfg.data.test.pipeline:
        if ops['type'] in ('TenCrop', 'ThreeCrop'):
            # Use CenterCrop to reduce memory in order to pass CI
            ops['type'] = 'CenterCrop'
            ops['crop_size'] = 224
    for ops in flow_model.cfg.data.test.pipeline:
        if ops['type'] in ('TenCrop', 'ThreeCrop'):
            # Use CenterCrop to reduce memory in order to pass CI
            ops['type'] = 'CenterCrop'
            ops['crop_size'] = 224

    top5_label = inference_recognizer(rgb_model, frames_path)
    scores = [item[1] for item in top5_label]
    assert len(top5_label) == 5
    assert scores == sorted(scores, reverse=True)

    _, feat = inference_recognizer(flow_model,
                                   frames_path,
                                   outputs=('backbone', 'cls_head'),
                                   as_tensor=False)
    assert isinstance(feat, dict)
    assert 'backbone' in feat and 'cls_head' in feat
    assert isinstance(feat['backbone'], np.ndarray)
    assert isinstance(feat['cls_head'], np.ndarray)
    assert feat['backbone'].shape == (25, 2048, 7, 7)
    assert feat['cls_head'].shape == (1, 400)

    _, feat = inference_recognizer(rgb_model,
                                   frames_path,
                                   outputs=('backbone.layer3',
                                            'backbone.layer3.1.conv1'))

    assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat
    assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor)
    assert isinstance(feat['backbone.layer3'], torch.Tensor)
    assert feat['backbone.layer3'].size() == (25, 1024, 14, 14)
    assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14)
Пример #3
0
def main():
    args = parse_args()
    # assign the desired device.
    device = torch.device(args.device)
    # build the recognizer from a config file and checkpoint file
    model = init_recognizer(
        args.config,
        args.checkpoint,
        device=device,
        use_frames=args.use_frames)
    # test a single video or rawframes of a single video
    results = inference_recognizer(
        model, args.video, args.label, use_frames=args.use_frames)

    print('The top-5 labels with corresponding scores are:')
    for result in results:
        print(f'{result[0]}: ', result[1])

    if args.out_filename is not None:
        get_output(
            args.video,
            args.out_filename,
            results[0][0],
            font_size=args.font_size,
            font_color=args.font_color,
            resize_algorithm=args.resize_algorithm,
            use_frames=args.use_frames)
Пример #4
0
def main():
    global frame_queue, threshold, sample_length, data, test_pipeline, model, \
        out_file, video_path, device, input_step, label, result_queue

    args = parse_args()
    input_step = args.input_step
    threshold = args.threshold
    video_path = args.video
    out_file = args.out_file

    device = torch.device(args.device)
    model = init_recognizer(args.config, args.checkpoint, device=device)
    data = dict(img_shape=None, modality='RGB', label=-1)
    with open(args.label, 'r') as f:
        label = [line.strip() for line in f]

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    sample_length = 0
    pipeline = cfg.test_pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if 'SampleFrames' in step['type']:
            sample_length = step['clip_len'] * step['num_clips']
            data['num_clips'] = step['num_clips']
            data['clip_len'] = step['clip_len']
            pipeline_.remove(step)
        if step['type'] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)
    assert sample_length > 0
    frame_queue = deque(maxlen=sample_length)
    result_queue = deque(maxlen=1)
    show_results()
Пример #5
0
def main():
    args = parse_args()

    args.device = torch.device(args.device)

    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)

    model = init_recognizer(cfg, args.checkpoint, device=args.device)
    data = dict(img_shape=None, modality='RGB', label=-1)
    with open(args.label, 'r') as f:
        label = [line.strip() for line in f]

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    sample_length = 0
    pipeline = cfg.data.test.pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if 'SampleFrames' in step['type']:
            sample_length = step['clip_len'] * step['num_clips']
            data['num_clips'] = step['num_clips']
            data['clip_len'] = step['clip_len']
            pipeline_.remove(step)
        if step['type'] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)

    assert sample_length > 0
    args.sample_length = sample_length
    args.test_pipeline = test_pipeline

    show_results(model, data, label, args)
Пример #6
0
def main():
    global label, device, model, test_pipeline, \
        camera, sample_length, average_size, threshold

    args = parse_args()
    device = torch.device(args.device)
    model = init_recognizer(args.config, args.checkpoint, device=device)
    camera = cv2.VideoCapture(args.camera_id)

    sample_length = args.sample_length
    average_size = args.average_size
    threshold = args.threshold

    with open(args.label, 'r') as f:
        label = [line.strip() for line in f]

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    pipeline = cfg.test_pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if 'SampleFrames' in step['type']:
            # Remove step to sample frames
            if sample_length == 0:
                sample_length = step['clip_len'] * step['num_clips']
            pipeline_.remove(step)
        if step['type'] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)

    assert sample_length > 0

    print('Press "Esc", "q" or "Q" to exit')
    predict_webcam_video()
Пример #7
0
def main():
    args = parse_args()
    # build the model from a config file and a checkpoint file
    model = init_recognizer(args.config, args.checkpoint)
    # fuse conv and bn layers of the model
    fused_model = fuse_module(model)
    save_checkpoint(fused_model, args.out)
Пример #8
0
def test_inference_recognizer():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    model = init_recognizer(video_config_file, None, device)

    with pytest.raises(RuntimeError):
        # video path doesn't exist
        inference_recognizer(model, 'missing.mp4', label_path)

    with pytest.raises(RuntimeError):
        # ``video_path`` should be consist with the ``use_frames``
        inference_recognizer(model, video_path, label_path, use_frames=True)

    with pytest.raises(RuntimeError):
        # ``video_path`` should be consist with the ``use_frames``
        inference_recognizer(model, 'demo/', label_path)

    for ops in model.cfg.data.test.pipeline:
        if ops['type'] == 'TenCrop':
            # Use CenterCrop to reduce memory in order to pass CI
            ops['type'] = 'CenterCrop'

    top5_label = inference_recognizer(model, video_path, label_path)
    scores = [item[1] for item in top5_label]
    assert len(top5_label) == 5
    assert scores == sorted(scores, reverse=True)
Пример #9
0
def test_init_recognizer():
    with pytest.raises(TypeError):
        # config must be a filename or Config object
        init_recognizer(dict(config_file=None))

    with pytest.raises(RuntimeError):
        # input data type should be consist with the dataset type
        init_recognizer(frame_config_file)

    with pytest.raises(RuntimeError):
        # input data type should be consist with the dataset type
        init_recognizer(video_config_file, use_frames=True)

    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    model = init_recognizer(video_config_file, None, device)

    config = mmcv.Config.fromfile(video_config_file)
    config.model.backbone.pretrained = None

    isinstance(model, nn.Module)
    if torch.cuda.is_available():
        assert next(model.parameters()).is_cuda is True
    else:
        assert next(model.parameters()).is_cuda is False
    assert model.cfg.model.backbone.pretrained is None
Пример #10
0
def main():
    args = parse_args()
    # assign the desired device.
    device = torch.device(args.device)

    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)

    # build the recognizer from a config file and checkpoint file/url
    model = init_recognizer(cfg,
                            args.checkpoint,
                            device=device,
                            use_frames=args.use_frames)

    # e.g. use ('backbone', ) to return backbone feature
    output_layer_names = None

    # test a single video or rawframes of a single video
    if output_layer_names:
        results, returned_feature = inference_recognizer(
            model,
            args.video,
            args.label,
            use_frames=args.use_frames,
            outputs=output_layer_names)
    else:
        results = inference_recognizer(model,
                                       args.video,
                                       args.label,
                                       use_frames=args.use_frames)

    print('The top-5 labels with corresponding scores are:')
    for result in results:
        print(f'{result[0]}: ', result[1])

    if args.out_filename is not None:

        if args.target_resolution is not None:
            if args.target_resolution[0] == -1:
                args.target_resolution[0] = None
            if args.target_resolution[1] == -1:
                args.target_resolution[1] = None
            args.target_resolution = tuple(args.target_resolution)
        else:
            args.target_resolution = (None, None)

        get_output(args.video,
                   args.out_filename,
                   results[0][0],
                   fps=args.fps,
                   font_size=args.font_size,
                   font_color=args.font_color,
                   target_resolution=args.target_resolution,
                   resize_algorithm=args.resize_algorithm,
                   use_frames=args.use_frames)
Пример #11
0
def test_init_recognizer():
    with pytest.raises(TypeError):
        init_recognizer(dict(config_file=None))

    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    model = init_recognizer(config_file, None, device)

    config = mmcv.Config.fromfile(config_file)
    config.model.backbone.pretrained = None

    isinstance(model, nn.Module)
    if torch.cuda.is_available():
        assert next(model.parameters()).is_cuda is True
    else:
        assert next(model.parameters()).is_cuda is False
    assert model.cfg.model.backbone.pretrained is None
Пример #12
0
def main():
    args = parse_args()
    # assign the desired device.
    device = torch.device(args.device)
    # build the recognizer from a config file and checkpoint file
    model = init_recognizer(args.config, args.checkpoint, device=device)
    # test a single video
    results = inference_recognizer(model, args.video, args.label)

    print('The top-5 labels with corresponding scores are:')
    for result in results:
        print(f'{result[0]}: ', result[1])
Пример #13
0
def main():
    global frame_queue, camera, frame, results, threshold, sample_length, \
        data, test_pipeline, model, device, average_size, label, \
        result_queue, drawing_fps, inference_fps

    args = parse_args()
    average_size = args.average_size
    threshold = args.threshold
    drawing_fps = args.drawing_fps
    inference_fps = args.inference_fps

    device = torch.device(args.device)

    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)

    model = init_recognizer(cfg, args.checkpoint, device=device)
    camera = cv2.VideoCapture(args.camera_id)
    data = dict(img_shape=None, modality='RGB', label=-1)

    with open(args.label, 'r') as f:
        label = [line.strip() for line in f]

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    sample_length = 0
    pipeline = cfg.data.test.pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if 'SampleFrames' in step['type']:
            sample_length = step['clip_len'] * step['num_clips']
            data['num_clips'] = step['num_clips']
            data['clip_len'] = step['clip_len']
            pipeline_.remove(step)
        if step['type'] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)

    assert sample_length > 0

    try:
        frame_queue = deque(maxlen=sample_length)
        result_queue = deque(maxlen=1)
        pw = Thread(target=show_results, args=(), daemon=True)
        pr = Thread(target=inference, args=(), daemon=True)
        pw.start()
        pr.start()
        pw.join()
    except KeyboardInterrupt:
        pass
Пример #14
0
def main():
    global frame_queue, camera, frame, results, threshold, sample_length, \
        data, test_pipeline, model, device, average_size, label, result_queue

    args = parse_args()
    average_size = args.average_size
    threshold = args.threshold

    device = torch.device(args.device)
    model = init_recognizer(args.config, args.checkpoint, device=device)

    camera = cv2.VideoCapture(args.camera_id)
    #camera = cv2.VideoCapture('/home/ww/tools/image/office/2020-12-10_14-54-03.mp4')

    camera.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    data = dict(img_shape=None, modality='RGB', label=-1)

    with open(args.label, 'r') as f:
        label = [line.strip() for line in f]

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    sample_length = 0
    pipeline = cfg.test_pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if 'SampleFrames' in step['type']:
            sample_length = step['clip_len'] * step['num_clips']
            data['num_clips'] = step['num_clips']
            data['clip_len'] = step['clip_len']
            pipeline_.remove(step)
        if step['type'] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)

    assert sample_length > 0

    try:
        frame_queue = deque(maxlen=sample_length)
        result_queue = deque(maxlen=1)
        pw = Thread(target=show_results, args=(), daemon=True)
        pr = Thread(target=inference, args=(), daemon=True)
        pw.start()
        pr.start()
        pw.join()
    except KeyboardInterrupt:
        pass
Пример #15
0
def test_inference_recognizer():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    model = init_recognizer(config_file, None, device)

    for ops in model.cfg.data.test.pipeline:
        if ops['type'] == 'TenCrop':
            # Use CenterCrop to reduce memory in order to pass CI
            ops['type'] = 'CenterCrop'

    top5_label = inference_recognizer(model, video_path, label_path)
    scores = [item[1] for item in top5_label]
    assert len(top5_label) == 5
    assert scores == sorted(scores, reverse=True)
Пример #16
0
def main():
    args = parse_args()
    device = torch.device(args.device)
    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)
    model = init_recognizer(cfg, args.checkpoint, device=device)

    if not args.audio.endswith('.npy'):
        raise NotImplementedError('Demo works on extracted audio features')
    results = inference_recognizer(model, args.audio)

    labels = open(args.label).readlines()
    labels = [x.strip() for x in labels]
    results = [(labels[k[0]], k[1]) for k in results]

    print('Scores:')
    for result in results:
        print(f'{result[0]}: ', result[1])
Пример #17
0
def main():
    args = parse_args()

    # assign the desired device.
    device = torch.device(args.device)

    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)

    # build the recognizer from a config file and checkpoint file/url
    model = init_recognizer(cfg,
                            args.checkpoint,
                            device=device,
                            use_frames=args.use_frames)

    inputs = build_inputs(model, args.video, use_frames=args.use_frames)
    gradcam = GradCAM(model, args.target_layer_name)
    results = gradcam(inputs)

    if args.out_filename is not None:
        try:
            from moviepy.editor import ImageSequenceClip
        except ImportError:
            raise ImportError('Please install moviepy to enable output file.')

        # frames_batches shape [B, T, H, W, 3], in RGB order
        frames_batches = (results[0] * 255.).numpy().astype(np.uint8)
        frames = frames_batches.reshape(-1, *frames_batches.shape[-3:])

        frame_list = list(frames)
        frame_list = _resize_frames(frame_list,
                                    args.target_resolution,
                                    interpolation=args.resize_algorithm)

        video_clips = ImageSequenceClip(frame_list, fps=args.fps)
        out_type = osp.splitext(args.out_filename)[1][1:]
        if out_type == 'gif':
            video_clips.write_gif(args.out_filename)
        else:
            video_clips.write_videofile(args.out_filename, remove_temp=True)
Пример #18
0
    def initialize(self, context):
        properties = context.system_properties
        self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.device = torch.device(self.map_location + ':' +
                                   str(properties.get('gpu_id')) if torch.cuda.
                                   is_available() else self.map_location)
        self.manifest = context.manifest

        model_dir = properties.get('model_dir')
        serialized_file = self.manifest['model']['serializedFile']
        checkpoint = os.path.join(model_dir, serialized_file)
        self.config_file = os.path.join(model_dir, 'config.py')

        mapping_file_path = osp.join(model_dir, 'label_map.txt')
        if not os.path.isfile(mapping_file_path):
            warnings.warn('Missing the label_map.txt file. '
                          'Inference output will not include class name.')
            self.mapping = None
        else:
            lines = open(mapping_file_path).readlines()
            self.mapping = [x.strip() for x in lines]

        self.model = init_recognizer(self.config_file, checkpoint, self.device)
        self.initialized = True
Пример #19
0
parser.add_argument('--device',
                    type=str,
                    default='cuda:0',
                    help='CPU/CUDA device option')
parser.add_argument('--video', help='video file/url')
parser.add_argument('--labels', help='dataset labels')
args = parser.parse_args()

#config file
config_file = args.config

# download the checkpoint from model zoo and put it in `checkpoints/`
checkpoint_file = args.checkpoint

# assign the desired device.
device = args.device  # 'cuda:0' or 'cpu'
device = torch.device(device)

# build the model from a config file and a checkpoint file
model = init_recognizer(config_file, checkpoint_file, device=device)

# test a single video and show the result:
video = args.video
labels = args.labels
results = inference_recognizer(model, video, labels)

# show the results
print(f'The top-5 labels with corresponding scores are:')
for result in results:
    print(f'{result[0]}: ', result[1])
Пример #20
0
import torch
from mmaction.apis import init_recognizer, inference_recognizer

config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
device = 'cuda:0' # or 'cpu'
device = torch.device(device)

model = init_recognizer(config_file, device=device)
# inference the demo video
inference_recognizer(model, 'demo/demo.mp4', 'demo/label_map_k400.txt')
Пример #21
0
def main():
    args = parse_args()
    # assign the desired device.
    device = torch.device(args.device)
    # build the recognizer from a config file and checkpoint file/url
    model = init_recognizer(args.config,
                            args.checkpoint,
                            device=device,
                            use_frames=args.use_frames)

    # e.g. use ('backbone', ) to return backbone feature
    output_layer_names = None

    # test a single video or rawframes of a single video
    if args.split_time is None:
        if output_layer_names:
            results, returned_feature = inference_recognizer(
                model,
                args.video,
                args.label,
                use_frames=args.use_frames,
                outputs=output_layer_names)
        else:
            results = inference_recognizer(model,
                                           args.video,
                                           args.label,
                                           use_frames=args.use_frames)

        print('The top-5 labels with corresponding scores are:')
        for result in results:
            print(f'{result[0]}: ', result[1])

        if args.out_filename is not None:

            if args.target_resolution is not None:
                if args.target_resolution[0] == -1:
                    args.target_resolution[0] = None
                if args.target_resolution[1] == -1:
                    args.target_resolution[1] = None
                args.target_resolution = tuple(args.target_resolution)
            else:
                args.target_resolution = (None, None)
            label_show = ''
            for result in results:
                label_show = label_show + result[0] + ': {:.2g}'.format(
                    result[1]) + '\n'

            get_output(args.video,
                       args.out_filename,
                       label_show[:-1],
                       fps=args.fps,
                       font_size=args.font_size,
                       font_color=args.font_color,
                       target_resolution=args.target_resolution,
                       resize_algorithm=args.resize_algorithm,
                       use_frames=args.use_frames)

    if args.split_time is not None:
        #https://stackoverflow.com/questions/28884159/using-python-script-to-cut-long-videos-into-chunks-in-ffmpeg
        #https://nico-lab.net/segment_muxer_with_ffmpeg/
        import re
        import math
        length_regexp = 'Duration: (\d{2}):(\d{2}):(\d{2})\.\d+,'
        re_length = re.compile(length_regexp)

        from subprocess import check_call, PIPE, Popen
        import shlex
        import os
        if args.split_time <= 0:
            print("Split length can't be 0")
            raise SystemExit

        p1 = Popen(["ffmpeg", "-i", args.video],
                   stdout=PIPE,
                   stderr=PIPE,
                   universal_newlines=True)
        # get p1.stderr as input
        output = Popen(["grep", 'Duration'],
                       stdin=p1.stderr,
                       stdout=PIPE,
                       universal_newlines=True)
        p1.stdout.close()
        matches = re_length.search(output.stdout.read())
        if matches:
            video_length = int(matches.group(1)) * 3600 + \
                        int(matches.group(2)) * 60 + \
                        int(matches.group(3))
            print("Video length in seconds: {}".format(video_length))
        else:
            print("Can't determine video length.")
            raise SystemExit
        split_count = math.ceil(video_length / args.split_time)
        if split_count == 1:
            print("Video length is less than the target split length.")
            raise SystemExit

        fname = os.path.basename(args.video)
        dirname = os.path.dirname(args.video)
        fname_base, ext = fname.rsplit(".", 1)
        tmp_path = os.path.join(dirname, 'tmpdir')
        dummy_filenames = []
        if not os.path.isdir(tmp_path):
            os.makedirs(tmp_path)

        #copied_fname = "{}.{}".format(os.path.join(tmp_path,fname_base), ext)
        #cmd = "ffmpeg -i {} -vf scale=640:360  -y {}".\
        #    format(args.video, copied_fname)
        #check_call(shlex.split(cmd), universal_newlines=True)
        #print(split_count)
        '''for n in range(split_count):
            split_start = args.split_time * n
            cmd = "ffmpeg -i {} -vcodec copy  -strict -2 -ss {} -t {} -y {}-{}.{}".\
                format(args.video, split_start, args.split_time, os.path.join(tmp_path,fname_base), n, ext)
            dummy_filenames.append("{}-{}.{}".format(os.path.join(tmp_path,fname_base), n, ext))
            print("About to run: {}".format(cmd))
            check_call(shlex.split(cmd), universal_newlines=True)
            tmp_fname = "{}-{}.{}".format(os.path.join(tmp_path,fname_base), n, ext)'''


        cmd = "ffmpeg -i {} -map 0 -c copy -flags +global_header -f segment -segment_time {} -y -segment_list {} -segment_format_options movflags=+faststart -reset_timestamps 1 {}-%02d.{}".\
            format(args.video, args.split_time, os.path.join(tmp_path,'list_gen.txt'), os.path.join(tmp_path,fname_base), ext)
        print("About to run: {}".format(cmd))
        check_call(shlex.split(cmd), universal_newlines=True)
        #    cmd = "ffmpeg -i {} -vf scale=640:360 -y {}".\
        #        format(tmp_fname,tmp_fname)
        #    print("About to run: {}".format(cmd))
        #    check_call(shlex.split(cmd), universal_newlines=True)

        with open(os.path.join(tmp_path, 'list_gen.txt'), 'r') as tmp_file:
            lines = tmp_file.readlines()
        for line in lines:
            dummy_filenames.append(
                os.path.join(tmp_path, line.replace('\n', '')))
        #print(dummy_filenames)

        import pandas as pd

        with open(args.label, 'r') as f:
            label = [line.strip() for line in f]
        list_df = pd.DataFrame(columns=label,
                               index=range(len(dummy_filenames)))
        #index_time = 0
        for i, video_block in enumerate(dummy_filenames):
            video_block_out = os.path.join(
                os.path.dirname(video_block),
                'out_' + os.path.basename(video_block))
            output_layer_names = ('cls_head', )
            if output_layer_names:
                results, returned_feature = inference_recognizer(
                    model,
                    video_block,
                    args.label,
                    use_frames=args.use_frames,
                    outputs=output_layer_names)
                ret_feature = returned_feature['cls_head'].cpu().detach(
                ).numpy()
                #list_df = list_df.append( ret_feature, ignore_index=True )
                #list_df = list_df.append(pd.DataFrame(ret_feature, columns=label, index= index_time)
                #import pdb;pdb.set_trace()
                list_df.iloc[i, :] = ret_feature[0, :len(label)]
                #index_time = index_time + args.split_time
            else:
                results = inference_recognizer(model,
                                               video_block,
                                               args.label,
                                               use_frames=args.use_frames)

            if args.out_filename is not None:
                if args.target_resolution is not None:
                    if args.target_resolution[0] == -1:
                        args.target_resolution[0] = None
                    if args.target_resolution[1] == -1:
                        args.target_resolution[1] = None
                    args.target_resolution = tuple(args.target_resolution)
                else:
                    args.target_resolution = (None, None)
                print('The top-5 labels with corresponding scores are:')
                for result in results:
                    print(f'{result[0]}: ', result[1])
                label_show = ''
                for result in results:
                    label_show = label_show + result[0] + ': {:.2g}'.format(
                        result[1]) + '\n'

                get_output(video_path=video_block,
                           out_filename=video_block_out,
                           label=label_show[:-1],
                           fps=args.fps,
                           font_size=args.font_size,
                           font_color=args.font_color,
                           target_resolution=args.target_resolution,
                           resize_algorithm=args.resize_algorithm,
                           use_frames=args.use_frames)
        # concatnate files
        with open(os.path.join(tmp_path, 'list.txt'), 'w') as tmp_file:
            for video_block in dummy_filenames:
                tmp_file.write("file " + 'out_' +
                               os.path.basename(video_block) + "\n")
        cmd = "ffmpeg -f concat -i {} -c copy -y {}".\
            format(os.path.join(tmp_path,'list.txt'), args.out_filename)
        #cmd = "ffmpeg -i {} -c copy -segment_format_options movflags=+faststart {}".\
        #    format(os.path.join(tmp_path,'list.txt'), args.out_filename)
        print("About to run: {}".format(cmd))
        check_call(shlex.split(cmd), universal_newlines=True)
        import shutil
        #import pdb
        #pdb.set_trace()
        shutil.rmtree(tmp_path)
        import matplotlib
        import matplotlib.pyplot as plt
        plt.figure()
        list_df.plot(
            y=label
        )  #, x=range(0, args.split_time*len(dummy_filenames),args.split_time)
        fig_outdir = os.path.dirname(args.out_filename)
        fig_outname = os.path.basename(args.out_filename)
        fig_outname = fig_outname.rsplit(".", 1)[0]
        plt.savefig(os.path.join(fig_outdir, fig_outname + '.png'))
        plt.close('all')
        list_df.to_csv(os.path.join(fig_outdir, fig_outname + '.csv'),
                       index=False)
Пример #22
0
def test_video_inference_recognizer():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    model = init_recognizer(video_config_file, None, device)

    with pytest.raises(RuntimeError):
        # video path doesn't exist
        inference_recognizer(model, 'missing.mp4')

    for ops in model.cfg.data.test.pipeline:
        if ops['type'] in ('TenCrop', 'ThreeCrop'):
            # Use CenterCrop to reduce memory in order to pass CI
            ops['type'] = 'CenterCrop'

    top5_label = inference_recognizer(model, video_path)
    scores = [item[1] for item in top5_label]
    assert len(top5_label) == 5
    assert scores == sorted(scores, reverse=True)

    _, feat = inference_recognizer(model,
                                   video_path,
                                   outputs=('backbone', 'cls_head'),
                                   as_tensor=False)
    assert isinstance(feat, dict)
    assert 'backbone' in feat and 'cls_head' in feat
    assert isinstance(feat['backbone'], np.ndarray)
    assert isinstance(feat['cls_head'], np.ndarray)
    assert feat['backbone'].shape == (25, 2048, 7, 7)
    assert feat['cls_head'].shape == (1, 400)

    _, feat = inference_recognizer(model,
                                   video_path,
                                   outputs=('backbone.layer3',
                                            'backbone.layer3.1.conv1'))
    assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat
    assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor)
    assert isinstance(feat['backbone.layer3'], torch.Tensor)
    assert feat['backbone.layer3'].size() == (25, 1024, 14, 14)
    assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14)

    cfg_file = 'configs/recognition/slowfast/slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py'  # noqa: E501
    sf_model = init_recognizer(cfg_file, None, device)
    for ops in sf_model.cfg.data.test.pipeline:
        # Changes to reduce memory in order to pass CI
        if ops['type'] in ('TenCrop', 'ThreeCrop'):
            ops['type'] = 'CenterCrop'
        if ops['type'] == 'SampleFrames':
            ops['num_clips'] = 1
    _, feat = inference_recognizer(sf_model,
                                   video_path,
                                   outputs=('backbone', 'cls_head'))
    assert isinstance(feat, dict) and isinstance(feat['backbone'], tuple)
    assert 'backbone' in feat and 'cls_head' in feat
    assert len(feat['backbone']) == 2
    assert isinstance(feat['backbone'][0], torch.Tensor)
    assert isinstance(feat['backbone'][1], torch.Tensor)
    assert feat['backbone'][0].size() == (1, 2048, 4, 8, 8)
    assert feat['backbone'][1].size() == (1, 256, 32, 8, 8)
    assert feat['cls_head'].size() == (1, 400)
import argparse
import os
from mmaction.apis import init_recognizer, inference_recognizer

parser = argparse.ArgumentParser(description="parsing...")
parser.add_argument("--root", type=str, default="/home/administrator/Z/Algorithms/mmaction2/", help="mmaction2 root")
args = parser.parse_args()

config_file = os.path.join(args.root, 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py')
config_file = os.path.join(args.root, config_file)
# download the checkpoint from model zoo and put it in `checkpoints/`
checkpoint_file = os.path.join(args.root, 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth')

# assign the desired device.
device = 'cuda:0' # or 'cpu'
device = torch.device(device)

# build the model from a config file and a checkpoint file
model = init_recognizer(config_file, checkpoint_file, device=device, use_frames=True)

# test a single video and show the result:
video = os.path.join(args.root,'data/kinetics400/rawframes_video/...')
labels = os.path.join(args.root,'demo/label_map_k400.txt')
results = inference_recognizer(model, video, labels, use_frames=True)


# show the results
print(f'The top-5 labels with corresponding scores are:')
for result in results:
    print(f'{result[0]}: ', result[1])
Пример #24
0
from mmaction.apis import inference_recognizer, init_recognizer
import os
# Choose to use a config and initialize the recognizer
config = '/home/workspace/2021_capstone/mmaction2/configs/recognition/slowfast/custom.py'
# Setup a checkpoint file to load
checkpoint = '/home/workspace/2021_capstone/mmaction2/data_center/assult/best_top1_acc_epoch_185.pth'
# Initialize the recognizer
model = init_recognizer(config, checkpoint, device='cuda:0')
# path_dir="../2021_capstone/mmaction2/data_center/fight_assault"
path_dir = '.'
# path_dir="../2021_capstone/mmaction2/data_center/fight_assault"
normal_path = path_dir + "/test_normal"
kicking_path = path_dir + "/test_kicking"
punching_path = path_dir + "/test_punching"
normal_file_list = os.listdir(normal_path)
kicking_file_list = os.listdir(kicking_path)
punching_file_list = os.listdir(punching_path)
label = '/home/workspace/2021_capstone/mmaction2/demo/custom_map.txt'

dir = [kicking_file_list, normal_file_list, punching_file_list]

total_testSet = 0

for i in dir:
    total_testSet += len(i)

kicking_cnt = 0
punching_cnt = 0
normal_cnt = 0
iter = 0
for i in dir:
Пример #25
0
def main():
    args = parse_args()

    device = torch.device(args.device)
    use_frames = False
    if args.use_frames == "False":
        use_frames = False
    if args.use_frames == "True":
        use_frames = True
    model = init_recognizer(args.config, device=device, use_frames=use_frames)

    # Target FPGA Zynq UltraScale+ MPSoC ZCU104. Assuming clock frequency of 100 MHz.
    # The actual BRAM size is 11 Mbits (1.375 MBytes). This divided by the 18 Kbits size of each BRAM gives a total of 624 BRAM units.
    # The ZCU104 has also 27 Mbits (3.375 MBytes) of URAM. This divided by the 288 Kbits size of each URAM gives a total of 96 URAM units.
    # The ZCU104 has 20 GTH gigabit transceivers (16.3 Gb/s or 2.03 GB/s) on the PL-size
    feature_maps = ModelFeatureMaps(model=model,
                                    word_length=16,
                                    clock_freq=100,
                                    bram=624,
                                    dsp=1728)
    feature_maps.get_inter_feature_maps()

    random_img = np.random.randn(args.imshape[0], args.imshape[1],
                                 args.imshape[2])

    data = dict(img_shape=None, modality="RGB", label=-1)

    # prepare test pipeline from non-camera pipeline
    cfg = model.cfg
    sample_length = 0
    pipeline = cfg.test_pipeline
    pipeline_ = pipeline.copy()
    for step in pipeline:
        if "SampleFrames" in step["type"]:
            step["num_clips"] = 1
            sample_length = step["clip_len"] * step["num_clips"]
            data["num_clips"] = step["num_clips"]
            data["clip_len"] = step["clip_len"]
            pipeline_.remove(step)
        if step["type"] in EXCLUED_STEPS:
            # remove step to decode frames
            pipeline_.remove(step)
    test_pipeline = Compose(pipeline_)
    print(test_pipeline)
    assert sample_length > 0

    data_in = []
    for _ in range(data["clip_len"]):
        data_in.append(random_img)

    data["imgs"] = data_in
    if data["img_shape"] is None:
        data["img_shape"] = random_img.shape[:2]

    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        data = scatter(data, [device])[0]

    with torch.no_grad():
        scores = model(return_loss=False, **data)[0]

    feature_maps.get_info()

    feature_maps.get_conv_layers(file_name=args.model_name)
def StrConverter(filename:str)->dict:
    key=['year','month','day','hour','min','sec']
    filename=filename.replace(".",' ')
    filename=filename.replace(":",' ')
    data={ i:j for i,j in list(zip(key,filename.split())) }
    return data
receive_path= 'receive_video/'
path_dir = 'video_for_process/'
db       = "assult_candidate/"
device = torch.device("cuda")
# build the recognizer from a config file and checkpoint file/url
config="../configs/recognition/slowfast/custom.py"
checkpoint="../data_center/fight_assault/BinaryDataTree/tanos_lr_improve_checkpoints/epoch_70.pth"
model = init_recognizer(
    config,
    checkpoint,
    device=device,
    )
label="../demo/custom_map.txt"
while True:
    if not os.listdir(receive_path):
        continue
    os.system(f"mv {receive_path}* {path_dir}")
    file_list = os.listdir(path_dir)
    file_list.sort()#시간순 정렬
    for i in file_list:
        results = inference_recognizer(model,path_dir+i,label)
        if results[0][0]=="abnormal" and results[0][1]>0.86:
            '''
            폭력 발생 db로 영상 보내야 함 and 처리 완료이므로 디렉토리에서 pop
            '''
Пример #27
0
def main():
    args = parse_args()

    frame_paths, original_frames = frame_extraction(args.video,
                                                    args.short_side)
    num_frame = len(frame_paths)
    h, w, _ = original_frames[0].shape

    # Get clip_len, frame_interval and calculate center index of each clip
    config = mmcv.Config.fromfile(args.config)
    config.merge_from_dict(args.cfg_options)

    model = init_recognizer(config, args.checkpoint, args.device)

    # Load label_map
    label_map = [x.strip() for x in open(args.label_map).readlines()]

    # Get Human detection results
    det_results = detection_inference(args, frame_paths)
    torch.cuda.empty_cache()

    pose_results = pose_inference(args, frame_paths, det_results)
    torch.cuda.empty_cache()

    fake_anno = dict(frame_dir='',
                     label=-1,
                     img_shape=(h, w),
                     original_shape=(h, w),
                     start_index=0,
                     modality='Pose',
                     total_frames=num_frame)
    num_person = max([len(x) for x in pose_results])
    # Current PoseC3D models are trained on COCO-keypoints (17 keypoints)
    num_keypoint = 17
    keypoint = np.zeros((num_person, num_frame, num_keypoint, 2),
                        dtype=np.float16)
    keypoint_score = np.zeros((num_person, num_frame, num_keypoint),
                              dtype=np.float16)
    for i, poses in enumerate(pose_results):
        for j, pose in enumerate(poses):
            pose = pose['keypoints']
            keypoint[j, i] = pose[:, :2]
            keypoint_score[j, i] = pose[:, 2]
    fake_anno['keypoint'] = keypoint
    fake_anno['keypoint_score'] = keypoint_score

    results = inference_recognizer(model, fake_anno)

    action_label = label_map[results[0][0]]

    pose_model = init_pose_model(args.pose_config, args.pose_checkpoint,
                                 args.device)
    vis_frames = [
        vis_pose_result(pose_model, frame_paths[i], pose_results[i])
        for i in range(num_frame)
    ]
    for frame in vis_frames:
        cv2.putText(frame, action_label, (10, 30), FONTFACE, FONTSCALE,
                    FONTCOLOR, THICKNESS, LINETYPE)

    vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames], fps=24)
    vid.write_videofile(args.out_filename, remove_temp=True)

    tmp_frame_dir = osp.dirname(frame_paths[0])
    shutil.rmtree(tmp_frame_dir)