Exemplos de Action_Detector em Python, exemplos de action_detection.action_detector.Action_Detector em Python

Exemplo n.º 1

0

Exibir arquivo

def test_on_local_segment():
    actors = [0, 1, 2]
    size = [400, 400]
    timesteps = 32
    batch_np = np.zeros([len(actors), timesteps] + size + [3])
    rois_np = np.zeros([len(actors), 4])
    batch_indices_np = np.array(range(len(actors)))

    for bb, actor_id in enumerate(actors):
        vid_path = 'person_%i.mp4' % actor_id
        reader = imageio.get_reader(vid_path, 'ffmpeg')
        for tt, frame in enumerate(reader):
            batch_np[bb, tt, :] = frame

        roi_path = "person_%i_roi.json" % actor_id
        with open(roi_path) as fp:
            rois_np[bb] = json.load(fp)

    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn')
    ckpt_name = 'model_ckpt_RGB_soft_attn-9'
    input_seq, rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders(
    )
    sess = act_detector.session

    #main_folder = sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
    main_folder = "../"
    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights',
                             ckpt_name)
    act_detector.restore_model(ckpt_path)

    feed_dict = {
        input_seq: batch_np,
        rois: rois_np,
        roi_batch_indices: batch_indices_np
    }
    probs = sess.run(pred_probs, feed_dict=feed_dict)
    # debug = sess.run(tf.get_collection('debug'), feed_dict=feed_dict)
    # import pdb;pdb.set_trace()

    # highest_conf_actions = np.argsort(probs, axis=1)
    print_top_k = 5
    for ii in range(len(actors)):
        act_probs = probs[ii]
        order = np.argsort(act_probs)[::-1]
        print("Person %i" % actors[ii])
        for pp in range(print_top_k):
            print('\t %s: %.3f' %
                  (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: activity_recognition_engine.py Projeto: cmusatyalab/activity-recognition-server

    def __init__(self):
        obj_detection_graph = os.path.join("object_detection", "weights",
                                           OBJ_DETECTION_MODEL,
                                           "frozen_inference_graph.pb")
        self.obj_detector = obj.Object_Detector(obj_detection_graph)

        self.act_detector = act.Action_Detector('soft_attn',
                                                timesteps=NUM_INPUT_FRAMES)
        crop_in_tubes = self.act_detector.crop_tubes_in_tf(
            [NUM_INPUT_FRAMES, HEIGHT, WIDTH, 3])
        (self.input_frames, self.temporal_rois,
         self.temporal_roi_batch_indices, self.cropped_frames) = crop_in_tubes
        self.rois, self.roi_batch_indices, self.pred_probs = (
            self.act_detector.define_inference_with_placeholders_noinput(
                self.cropped_frames))

        ckpt_path = os.path.join(MAIN_FOLDER, 'action_detection', 'weights',
                                 CKPT_NAME)
        self.act_detector.restore_model(ckpt_path)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: simple_detect_actions_on_tube.py Projeto: cmusatyalab/activity-recognition-server

def set_up_detector():

    act_detector = act.Action_Detector('soft_attn')
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'


    input_seq, rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders()

    ckpt_path = os.path.join('action_detection', 'weights', ckpt_name)
    act_detector.restore_model(ckpt_path)

    detector_dict = {   'detector':act_detector, 
                        'input_seq': input_seq,
                        'rois': rois,
                        'roi_batch_indices': roi_batch_indices,
                        'pred_probs': pred_probs}

    return detector_dict

Exemplo n.º 4

0

Exibir arquivo

Arquivo: detect_actions.py Projeto: zhengqun/ACAM_Demo

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-v',
                        '--video_path',
                        type=str,
                        required=False,
                        default="")
    parser.add_argument('-d',
                        '--display',
                        type=str,
                        required=False,
                        default="True")

    args = parser.parse_args()
    display = (args.display == "True" or args.display == "true")

    #actor_to_display = 6 # for cams

    video_path = args.video_path
    basename = os.path.basename(video_path).split('.')[0]
    out_vid_path = "./output_videos/%s_output.mp4" % (
        basename if not SHOW_CAMS else basename +
        '_cams_actor_%.2d' % actor_to_display)
    #out_vid_path = './output_videos/testing.mp4'

    # video_path = "./tests/chase1Person1View3Point0.mp4"
    # out_vid_path = 'output.mp4'

    main_folder = './'

    # NAS

    obj_detection_model = 'ssd_mobilenet_v2_coco_2018_03_29'
    obj_detection_graph = os.path.join("object_detection", "weights",
                                       obj_detection_model,
                                       "frozen_inference_graph.pb")

    print("Loading object detection model at %s" % obj_detection_graph)

    obj_detector = obj.Object_Detector(obj_detection_graph)
    tracker = obj.Tracker()

    print("Reading video file %s" % video_path)
    reader = imageio.get_reader(video_path, 'ffmpeg')
    action_freq = 8
    # fps_divider = 1
    print('Running actions every %i frame' % action_freq)
    fps = reader.get_meta_data()['fps']  #// fps_divider
    W, H = reader.get_meta_data()['size']
    T = tracker.timesteps
    if not display:
        writer = imageio.get_writer(out_vid_path, fps=fps)
        print("Writing output to %s" % out_vid_path)

    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn')
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'

    #input_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf([T,H,W,3])
    memory_size = act_detector.timesteps - action_freq
    updated_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf_with_memory(
        [T, H, W, 3], memory_size)

    rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders_noinput(
        cropped_frames)

    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights',
                             ckpt_name)
    act_detector.restore_model(ckpt_path)

    prob_dict = {}
    frame_cnt = 0
    for cur_img in reader:
        frame_cnt += 1
        #tracker.add_frame(cur_img)
        print("frame_cnt: %i" % frame_cnt)
        # Object Detection
        expanded_img = np.expand_dims(cur_img, axis=0)
        #expanded_img = np.tile(expanded_img, [10,1,1,1]) # test the speed
        t1 = time.time()
        detection_list = obj_detector.detect_objects_in_np(expanded_img)
        detection_info = [info[0] for info in detection_list]
        t2 = time.time()
        print('obj det %.2f seconds' % (t2 - t1))
        tracker.update_tracker(detection_info, cur_img)
        t3 = time.time()
        print('tracker %.2f seconds' % (t3 - t2))
        no_actors = len(tracker.active_actors)

        if tracker.active_actors and frame_cnt % action_freq == 0:
            probs = []

            cur_input_sequence = np.expand_dims(np.stack(
                tracker.frame_history[-action_freq:], axis=0),
                                                axis=0)

            rois_np, temporal_rois_np = tracker.generate_all_rois()
            if no_actors > 14:
                no_actors = 14
                rois_np = rois_np[:14]
                temporal_rois_np = temporal_rois_np[:14]

            #feed_dict = {input_frames:cur_input_sequence,
            feed_dict = {
                updated_frames:
                cur_input_sequence,  # only update last #action_freq frames
                temporal_rois: temporal_rois_np,
                temporal_roi_batch_indices: np.zeros(no_actors),
                rois: rois_np,
                roi_batch_indices: np.arange(no_actors)
            }
            run_dict = {'pred_probs': pred_probs}
            if SHOW_CAMS:
                run_dict['cropped_frames'] = cropped_frames
                #import pdb;pdb.set_trace()
                run_dict[
                    'final_i3d_feats'] = act_detector.act_graph.get_collection(
                        'final_i3d_feats')[0]
                #run_dict['cls_weights'] = [var for var in tf.global_variables() if var.name == "CLS_Logits/kernel:0"][0]
                run_dict[
                    'cls_weights'] = act_detector.act_graph.get_collection(
                        'variables')[-2]  # this is the kernel
            #import pdb;pdb.set_trace()
            out_dict = act_detector.session.run(run_dict, feed_dict=feed_dict)
            probs = out_dict['pred_probs']
            # associate probs with actor ids
            print_top_k = 5
            for bb in range(no_actors):
                act_probs = probs[bb]
                order = np.argsort(act_probs)[::-1]
                cur_actor_id = tracker.active_actors[bb]['actor_id']
                print("Person %i" % cur_actor_id)
                cur_results = []
                for pp in range(print_top_k):
                    print(
                        '\t %s: %.3f' %
                        (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    cur_results.append(
                        (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                prob_dict[cur_actor_id] = cur_results

            t5 = time.time()
            print('action %.2f seconds' % (t5 - t3))
        # # Action detection
        # no_actors = len(tracker.active_actors)
        # #batch_np = np.zeros([no_actors, act_detector.timesteps] + act_detector.input_size + [3], np.uint8)
        # batch_list = []
        # rois_np = np.zeros([no_actors, 4])
        # batch_indices_np = np.array(range(no_actors))
        # for bb, actor_info in enumerate(tracker.active_actors):
        #     actor_no = actor_info['actor_id']
        #     tube, roi = tracker.crop_person_tube(actor_no)
        #     #batch_np[bb, :] = tube
        #     batch_list.append(tube)
        #     rois_np[bb]= roi
        #t4 = time.time(); print('cropping %.2f seconds' % (t4-t3))

        # if tracker.active_actors:
        #     batch_np = np.stack(batch_list, axis=0)
        #     max_batch_size = 10
        #     prob_list = []
        #     cur_index = 0
        #     while cur_index < no_actors:
        #         cur_batch = batch_np[cur_index:cur_index+max_batch_size]
        #         cur_roi = rois_np[cur_index:cur_index+max_batch_size]
        #         cur_indices = batch_indices_np[cur_index:cur_index+max_batch_size] - cur_index
        #         feed_dict = {input_seq:cur_batch, rois:cur_roi, roi_batch_indices:cur_indices}
        #         #t51 = time.time(); print('action before run %.2f seconds' % (t51-t4))
        #         cur_probs = act_detector.session.run(pred_probs, feed_dict=feed_dict)
        #         #t52 = time.time(); print('action after run %.2f seconds' % (t52-t51))
        #         prob_list.append(cur_probs)
        #         cur_index += max_batch_size
        #     probs = np.concatenate(prob_list, axis=0)

        #t5 = time.time(); print('action %.2f seconds' % (t5-t4))
        # Print top_k probs
        #out_img = visualize_detection_results(cur_img, tracker.active_actors, prob_dict)
        if frame_cnt > 16:
            out_img = visualize_detection_results(tracker.frame_history[-16],
                                                  tracker.active_actors,
                                                  prob_dict)
            if SHOW_CAMS:
                if tracker.active_actors:
                    actor_indices = [
                        ii for ii in range(no_actors)
                        if tracker.active_actors[ii]['actor_id'] ==
                        actor_to_display
                    ]
                    if actor_indices:
                        out_img = visualize_cams(out_img, cur_input_sequence,
                                                 out_dict, actor_indices[0])
                    else:
                        continue
                else:
                    continue
            if display:
                cv2.imshow('results', out_img[:, :, ::-1])
                cv2.waitKey(10)
            else:
                writer.append_data(out_img)

    if not display:
        writer.close()

Exemplo n.º 5

0

Exibir arquivo

def run_act_detector(shape, detection_q, actions_q, act_gpu):
    import tensorflow as tf  # there is a bug. if you dont import tensorflow within the process you cant use the same gpus for both processes.
    os.environ['CUDA_VISIBLE_DEVICES'] = act_gpu
    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn', timesteps=T)
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    #ckpt_name = 'model_ckpt_soft_attn_pooled_ava-52'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'
    main_folder = "./"
    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights',
                             ckpt_name)

    #input_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf([T,H,W,3])
    memory_size = act_detector.timesteps - ACTION_FREQ
    updated_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf_with_memory(
        shape, memory_size)

    rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders_noinput(
        cropped_frames)

    act_detector.restore_model(ckpt_path)

    processed_frames_cnt = 0

    while True:
        images = []
        for _ in range(ACTION_FREQ):
            cur_img, active_actors, rois_np, temporal_rois_np = detection_q.get(
            )
            images.append(cur_img)
            #print("action frame: %i" % len(images))

        if not active_actors:
            prob_dict = {}
            if SHOW_CAMS:
                prob_dict = {"cams": visualize_cams({})}
        else:
            # use the last active actors and rois vectors
            no_actors = len(active_actors)

            cur_input_sequence = np.expand_dims(np.stack(images, axis=0),
                                                axis=0)

            if no_actors > 14:
                no_actors = 14
                rois_np = rois_np[:14]
                temporal_rois_np = temporal_rois_np[:14]
                active_actors = active_actors[:14]

            #feed_dict = {input_frames:cur_input_sequence,
            feed_dict = {
                updated_frames:
                cur_input_sequence,  # only update last #action_freq frames
                temporal_rois: temporal_rois_np,
                temporal_roi_batch_indices: np.zeros(no_actors),
                rois: rois_np,
                roi_batch_indices: np.arange(no_actors)
            }
            run_dict = {'pred_probs': pred_probs}

            if SHOW_CAMS:
                run_dict['cropped_frames'] = cropped_frames
                #import pdb;pdb.set_trace()
                run_dict[
                    'final_i3d_feats'] = act_detector.act_graph.get_collection(
                        'final_i3d_feats')[0]
                #run_dict['cls_weights'] = [var for var in tf.global_variables() if var.name == "CLS_Logits/kernel:0"][0]
                run_dict[
                    'cls_weights'] = act_detector.act_graph.get_collection(
                        'variables')[-2]  # this is the kernel

            out_dict = act_detector.session.run(run_dict, feed_dict=feed_dict)
            probs = out_dict['pred_probs']

            if not SHOW_CAMS:
                # associate probs with actor ids
                print_top_k = 5
                prob_dict = {}
                for bb in range(no_actors):
                    act_probs = probs[bb]
                    order = np.argsort(act_probs)[::-1]
                    cur_actor_id = active_actors[bb]['actor_id']
                    print("Person %i" % cur_actor_id)
                    cur_results = []
                    for pp in range(print_top_k):
                        print('\t %s: %.3f' % (act.ACTION_STRINGS[order[pp]],
                                               act_probs[order[pp]]))
                        cur_results.append((act.ACTION_STRINGS[order[pp]],
                                            act_probs[order[pp]]))
                    prob_dict[cur_actor_id] = cur_results
            else:
                # prob_dict = out_dict
                prob_dict = {
                    "cams": visualize_cams(out_dict)
                }  # do it here so it doesnt slow down visualization process

        processed_frames_cnt += ACTION_FREQ  # each turn we process this many frames

        if processed_frames_cnt >= act_detector.timesteps / 2:
            # we are doing this so we can skip the initialization period
            # first frame needs timesteps / 2 frames to be processed before visualizing
            actions_q.put(prob_dict)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: detect_actions.py Projeto: CTewan/ACAM_Demo

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-v', '--video_path', type=str, required=False, default="")
    parser.add_argument('-d', '--display', type=str, required=False, default="True")

    args = parser.parse_args()
    display = (args.display == "True" or args.display == "true")
    
    #actor_to_display = 6 # for cams

    video_path = args.video_path
    basename = os.path.basename(video_path).split('.')[0]
    out_vid_path = "./output_videos/%s_output.mp4" % (basename if not SHOW_CAMS else basename+'_cams_actor_%.2d' % actor_to_display)
    clf_out_path = "./clf_output/{}_output.csv".format(basename if not SHOW_CAMS else basename+'_cams_actor_{}'.format(actor_to_display))
    #out_vid_path = './output_videos/testing.mp4'

    # video_path = "./tests/chase1Person1View3Point0.mp4"
    # out_vid_path = 'output.mp4'

    main_folder = './'

    # NAS

    obj_detection_model =  'ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
    obj_detection_graph = os.path.join("object_detection", "weights", obj_detection_model, "frozen_inference_graph.pb")



    print("Loading object detection model at %s" % obj_detection_graph)


    obj_detector = obj.Object_Detector(obj_detection_graph)
    tracker = obj.Tracker()

    


    print("Reading video file %s" % video_path)
    reader = imageio.get_reader(video_path, 'ffmpeg')
    action_freq = 8
    # fps_divider = 1
    print('Running actions every %i frame' % action_freq)
    fps = reader.get_meta_data()['fps'] #// fps_divider
    print("FPS: {}".format(fps))
    W, H = reader.get_meta_data()['size']
    T = tracker.timesteps
    #if not display:
    writer = imageio.get_writer(out_vid_path, fps=fps)
    csv_file = open(clf_out_path, 'w', newline='')
    csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(['Time', 'Person', 'Action', 'Probability'])
    print("Writing output to %s" % out_vid_path)

    
    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn')
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'

    #input_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf([T,H,W,3])
    memory_size = act_detector.timesteps - action_freq
    updated_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf_with_memory([T,H,W,3], memory_size)
    
    rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders_noinput(cropped_frames)
    

    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights', ckpt_name)
    act_detector.restore_model(ckpt_path)

    prob_dict = {}
    frame_cnt = 0

    # Tewan
    min_teacher_features = 3
    teacher_identified = 0
    #missed_frame_cnt = 0
    #max_age = 120
    #frame_skips = 60
    #next_frame = 0
    teacher_ids = []
    matched_id = None
    # Tewan

    for cur_img in reader:
        frame_cnt += 1

        #if frame_cnt < next_frame:
        #    continue

        # Detect objects and make predictions every 8 frames (0.3 seconds)
        #if frame_cnt % action_freq == 0:

        # Object Detection
        expanded_img = np.expand_dims(cur_img, axis=0)
        detection_list = obj_detector.detect_objects_in_np(expanded_img) 
        detection_info = [info[0] for info in detection_list]
        # Updates active actors in tracker
        tracker.update_tracker(detection_info, cur_img)
        no_actors = len(tracker.active_actors)

        """
        if no_actors == 0:
            missed_frame_cnt += 1

            if missed_frame_cnt >= max_age:
                tracker.update_tracker(detection_info, cur_img)
                no_actors = len(tracker.active_actors)
                teacher_identified = False
                tracker.set_invalid_track()
                missed_frame_cnt = 0

                print("Reset active actors. Current number: {}".format(no_actors))

        """

        if frame_cnt % action_freq == 0 and frame_cnt > 16:
            if no_actors == 0:
                print("No actor found.")
                continue

            video_time = round(frame_cnt / fps, 1)
            valid_actor_ids = [actor["actor_id"] for actor in tracker.active_actors]

            print("frame count: {}, video time: {}s".format(frame_cnt, video_time))
            probs = []

            cur_input_sequence = np.expand_dims(np.stack(tracker.frame_history[-action_freq:], axis=0), axis=0)

            rois_np, temporal_rois_np = tracker.generate_all_rois()

            if teacher_identified < min_teacher_features:
                prompt_img = visualize_detection_results(img_np=tracker.frame_history[-16],
                                                         active_actors=tracker.active_actors,
                                                         prob_dict=None)
                cv2.imshow('prompt_img', prompt_img[:,:,::-1])
                cv2.waitKey(500)
                teacher_present = False

                teacher_id = _prompt_user_input()

                if not _check_teacher_in_frame(teacher_id=teacher_id):
                    print("Teacher not in this frame. Continuing.")
                    cv2.destroyWindow("prompt_img")
                    pass

                else:
                    if _check_valid_teacher_id(teacher_id=teacher_id, valid_actor_ids=valid_actor_ids):
                        teacher_id = int(teacher_id)
                        teacher_identified += 1
                        teacher_present = True

                    else:
                        while not teacher_present:
                            print("Invalid ID.")
                            teacher_id = _prompt_user_input()

                            if not _check_teacher_in_frame(teacher_id=teacher_id):
                                print("Teacher not in this frame. Continuing.")
                                cv2.destroyWindow("prompt_img")
                                break

                            else:
                                if _check_valid_teacher_id(teacher_id=teacher_id, valid_actor_ids=valid_actor_ids):
                                    teacher_id = int(teacher_id)
                                    teacher_identified += 1
                                    teacher_present = True
                                else:
                                    pass

                # Move on to next frame if teacher not in current frame
                if not teacher_present:
                    continue
                cv2.destroyWindow("prompt_img")

                if teacher_id not in teacher_ids:
                    teacher_ids.append(teacher_id)
                    tracker.update_teacher_candidate_ids(teacher_candidate_id=teacher_id)
            else:
                tracker.set_valid_track()

            # Identify idx of teacher for ROI selection                
            roi_idx = None
            found_id = False
            for idx, actor_info in enumerate(tracker.active_actors):
                actor_id = actor_info["actor_id"]
                for i in range(len(teacher_ids)-1, -1, -1):
                    if actor_id == teacher_ids[i]:
                        roi_idx = idx
                        matched_id = actor_info["actor_id"]
                        found_id = True
                        break
                if found_id:
                    break

            # Identify ROI and temporal ROI using ROI idx 
            if roi_idx is not None:
                rois_np = rois_np[roi_idx]
                temporal_rois_np = temporal_rois_np[roi_idx]
                rois_np = np.expand_dims(rois_np, axis=0)
                temporal_rois_np = np.expand_dims(temporal_rois_np, axis=0)
                no_actors = 1
            # If teacher not found (i.e. roi_idx is None) in current frame, move on to next frame
            else:
                continue

            #max_actors = 5
            #if no_actors > max_actors:
            #    no_actors = max_actors
            #    rois_np = rois_np[:max_actors]
            #    temporal_rois_np = temporal_rois_np[:max_actors]

            # Might have issue of not using attention map because only predict action for 1 actor (memory issue)
            feed_dict = {updated_frames:cur_input_sequence, # only update last #action_freq frames
                         temporal_rois: temporal_rois_np,
                         temporal_roi_batch_indices: np.zeros(no_actors),
                         rois:rois_np, 
                         roi_batch_indices:np.arange(no_actors)}
            run_dict = {'pred_probs': pred_probs}

            if SHOW_CAMS:
                run_dict['cropped_frames'] = cropped_frames
                run_dict['final_i3d_feats'] =  act_detector.act_graph.get_collection('final_i3d_feats')[0]
                run_dict['cls_weights'] = act_detector.act_graph.get_collection('variables')[-2] # this is the kernel

            out_dict = act_detector.session.run(run_dict, feed_dict=feed_dict)
            probs = out_dict['pred_probs']

            # associate probs with actor ids
            print_top_k = 5
            for bb in range(no_actors):
                #act_probs = probs[bb]
                #order = np.argsort(act_probs)[::-1]
                #cur_actor_id = tracker.active_actors[bb]['actor_id']
                act_probs = probs[bb]
                order = np.argsort(act_probs)[::-1]
                cur_actor_id = tracker.active_actors[roi_idx]["actor_id"]
                #print(cur_actor_id == actor_id)
                #print("Person %i" % cur_actor_id)
                #print("act_probs: {}".format(act_probs))
                #print("order: {}".format(order))
                #print("tracker.active_actors[bb]: {}".format(tracker.active_actors[bb]))
                cur_results = []
                for pp in range(print_top_k):
                    #print('\t %s: %.3f' % (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    cur_results.append((act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    csv_writer.writerow([video_time, cur_actor_id, act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]])

                prob_dict[cur_actor_id] = cur_results
        
        if frame_cnt > 16:
            out_img = visualize_detection_results(tracker.frame_history[-16],
                                                  tracker.active_actors,
                                                  prob_dict=prob_dict,
                                                  teacher_id=matched_id)
            if SHOW_CAMS:
                if tracker.active_actors:
                    actor_indices = [ii for ii in range(no_actors) if tracker.active_actors[ii]['actor_id'] == actor_to_display]
                    if actor_indices:
                        out_img = visualize_cams(out_img, cur_input_sequence, out_dict, actor_indices[0])
                    else:
                        continue
                else:
                    continue
            if display: 
                cv2.imshow('results', out_img[:,:,::-1])
                cv2.waitKey(10)

            writer.append_data(out_img)

    #if not display:
    writer.close()
    csv_file.close()