class actions(object):
    def __init__(self, arguments):
        self.arguments = arguments

        # Frame window dim
        self.winWidth = 640
        self.winHeight = 480

        actionPredictor_params.__init__(self)

        self.fps_time = 0
        #self.step = 15
        self.mode = {
            'Pose Estimation': 'estimation',
            'Tracking': 'tracking',
            'Action Recognition': 'recognition'
        }

        w, h = model_wh(self.arguments.resize)
        if w > 0 and h > 0:
            self.estimator = TfPoseEstimator(get_graph_path(
                self.arguments.model),
                                             target_size=(w, h))
        else:
            self.estimator = TfPoseEstimator(get_graph_path(
                self.arguments.model),
                                             target_size=(432, 368))

        self.cam = cv2.VideoCapture(self.arguments.camera)

        # Tracker based on Sort
        self.sort_max_age = 20
        self.sort_min_hit = 3
        self.tracker = Sort(self.sort_max_age, self.sort_min_hit)

    def proceed(self):
        self._read_frame_()

        if self.ret_val and self.arguments.mode == self.mode['Pose Estimation']:
            self._perform_estimation_()

        elif self.ret_val and self.arguments.mode == self.mode['Tracking']:
            self._perform_tracking_()

        elif self.ret_val and self.arguments.mode == self.mode[
                'Action Recognition']:
            self._perform_recognition_()

        else:
            sys.exit(
                'Abort...please choose correct action mode from "estimation" "tracking" "recognition"'
            )

        self._output_()

    def _joint_filter_(self, joint):
        if 1 not in joint:
            return False
        # Check exist of hip
        if 8 not in joint and 11 not in joint:
            return False
        # Check exist of knee
        if 9 not in joint and 12 not in joint:
            return False
        return True

    def _joint_complete_(self, joint):
        if 8 in joint and 11 not in joint:
            joint[11] = joint[8]
        elif 8 not in joint and 11 in joint:
            joint[8] = joint[11]
        if 9 in joint and 12 not in joint:
            joint[12] = joint[9]
        elif 9 not in joint and 12 in joint:
            joint[9] = joint[12]

        return joint

    def _perform_estimation_(self):
        self.humans = self.estimator.inference(self.image)
        self.image = TfPoseEstimator.draw_humans(self.image,
                                                 self.humans,
                                                 imgcopy=False)

    def _perform_tracking_(self):
        self.humans = self.estimator.inference(self.image)
        self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
            self.image, self.humans, imgcopy=False)

        height = self.image.shape[0]
        width = self.image.shape[1]

        if bboxes:
            result = np.array(bboxes)
            det = result[:, 0:5]
            det[:, 0] = det[:, 0] * width
            det[:, 1] = det[:, 1] * height
            det[:, 2] = det[:, 2] * width
            det[:, 3] = det[:, 3] * height
            trackers = self.tracker.update(det)

            for d in trackers:
                xmin = int(d[0])
                ymin = int(d[1])
                xmax = int(d[2])
                ymax = int(d[3])
                label = int(d[4])
                cv2.rectangle(
                    self.image, (xmin, ymin), (xmax, ymax),
                    (int(self.c[label % 32, 0]), int(
                        self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)

    def _perform_recognition_(self):

        self.predictor = actionPredictor()

        self.humans = self.estimator.inference(self.image)
        self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
            self.image, self.humans, imgcopy=False)

        height = self.image.shape[0]
        width = self.image.shape[1]

        if bboxes:
            result = np.array(bboxes)
            det = result[:, 0:5]
            det[:, 0] = det[:, 0] * width
            det[:, 1] = det[:, 1] * height
            det[:, 2] = det[:, 2] * width
            det[:, 3] = det[:, 3] * height
            trackers = self.tracker.update(det)

            self.current = [i[-1] for i in trackers]

            if len(self.previous) > 0:
                for item in self.previous:
                    if item not in self.current and item in self.data:
                        del self.data[item]
                    if item not in self.current and item in self.memory:
                        del self.memory[item]
            self.previous = self.current

            for d in trackers:
                xmin = int(d[0])
                ymin = int(d[1])
                xmax = int(d[2])
                ymax = int(d[3])
                label = int(d[4])

                #logger.debug('label is: %d' % (label))

                # Locate the current person object in current frame
                # Iterated thru xcenter for finding minimum distance between object center coord
                try:
                    j = np.argmin(
                        np.array(
                            [abs(i - (xmax + xmin) / 2.) for i in xcenter]))
                except:
                    j = 0
                # Check if major skeleton points are existing
                if self._joint_filter_(joints[j]):
                    joints[j] = self._joint_complete_(
                        self._joint_complete_(joints[j]))

                    if label not in self.data:
                        #logger.debug('label is: %d' % (label))

                        self.data[label] = [joints[j]]
                        self.memory[label] = 0
                    else:
                        self.data[label].append(joints[j])

                    if len(self.data[label]) == self.step:
                        pred = self.predictor.move_status(self.data[label])

                        #logger.debug(len(self.data[label]))

                        if pred == 0:
                            pred = self.memory[label]
                        else:
                            self.memory[label] = pred
                        self.data[label].pop(0)

                        location = self.data[label][-1][1]
                        #location = functools.reduce(lambda x, y: x + y, self.data[label][:][1]) / len(self.data[label][:][1])
                        #location = sum(self.data[label][:][1]) / float(len(self.data[label][:][1]))

                        if location[0] <= 30:
                            location = (51, location[1])
                        if location[1] <= 10:
                            location = (location[0], 31)

                        cv2.putText(self.image, self.move_status[pred],
                                    (location[0] - 30, location[1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                    (255, 255, 255), 2)

                cv2.rectangle(
                    self.image, (xmin, ymin), (xmax, ymax),
                    (int(self.c[label % 32, 0]), int(
                        self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)

    def _read_frame_(self):
        self.ret_val, self.image = self.cam.read()

        self.image = cv2.resize(self.image, (self.winWidth, self.winHeight))

    def _output_(self):
        # Calculate frame averaging step
        FPS = float(1.0 / (time.time() - self.fps_time))
        #logger.debug('FPS: %f' % FPS)

        #self.step = int(0.7 * FPS)
        #logger.debug('step: %d' % self.step)

        cv2.putText(self.image,
                    "FPS: %f" % (1.0 / (time.time() - self.fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                    2)
        cv2.imshow('tf-pose-estimation result', self.image)
        self.fps_time = time.time()
Exemplo n.º 2
0
import settings
from pose.estimator import TfPoseEstimator
from pose.networks import get_graph_path
from imutils.video import VideoStream

poseEstimator = None

poseEstimator = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368))

cap=cv2.VideoCapture(0)           
   
#cap = VideoStream(src='rtsp://*****:*****@192.168.51.162/PSIA/streaming/channels/102').start()

while True:
    
    ret,frame=cap.read()
    ret=True
    if ret :
        
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        
        humans = poseEstimator.inference(show)
                   
        show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)
        cv2.imshow("frame",show)
        
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
        
    if not os.path.exists(root_path + action + '/txt/'):
        os.mkdir(root_path + action + '/txt/')
        os.mkdir(root_path + action + '/imgs/')
    samples = len(os.listdir(root_path + action + '/txt/'))
    sample_count = 1000 if samples == 0 else 1000 + samples

    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(432, 368))
    cam = cv2.VideoCapture(0)
    ret_val, image = cam.read()
    joints = []
    joints_imgs = []
    while True:
        ret_val, image = cam.read()
        if ret_val:
            humans = e.inference(image)
            image, joint, *_, sk = TfPoseEstimator.get_humans(image,
                                                              humans,
                                                              imgcopy=False)
            if joint:
                if len(joints) < clip_length:
                    joints.append(joint[0])
                    joints_imgs.append(sk)
                else:
                    joints.pop(0)
                    joints_imgs.pop(0)
                    joints.append(joint[0])
                    joints_imgs.append(sk)
            cv2.putText(image, "FPS: %.2f" % (1.0 / (time.time() - fps_time)),
                        (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                        2)