Exemplo n.º 1
0
def PoseEstimatorPredict(image_path,plot = False,resolution ='432x368', scales = '[None]',model = 'mobilenet_thin'):
    '''
    input:
        image_path,图片路径,jpg
        plot = False,是否画图,如果True,两样内容,关键点信息+标点图片matrix
        resolution ='432x368', 规格
        scales = '[None]',
        model = 'mobilenet_thin',模型选择
    
    output:
        plot为false,返回一个内容:关键点信息
        plot为true,返回两个内容:关键点信息+标点图片matrix
    '''
    w, h = model_wh(resolution)
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    image = common.read_imgfile(image_path, None, None)
    t = time.time()
    humans = e.inference(image, scales=scales)  # 主要的预测函数
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (image_path, elapsed))
    centers = get_keypoint(image,humans)                                # 拿上关键点信息

    if plot:
        # 画图的情况下:
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)  # 画图函数
        fig = plt.figure()
        a = fig.add_subplot(2, 2, 1)
        a.set_title('Result')
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        return centers,image
    else:
        # 不画图的情况下:
        return centers
Exemplo n.º 2
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        #model = '432x368'
        camera = 0
        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]

        resolution = '432x368'
        w, h = model_wh(resolution)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        #print('ret_val', ret_val)
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        keypoints = self.mesh(image)
        print('keypoints', keypoints)

        self.points = gl.GLScatterPlotItem(pos=keypoints,
                                           color=pg.glColor((0, 255, 0)),
                                           size=15)
        #print(keypoints)
        self.window.addItem(self.points)

        for n, pts in enumerate(self.connection):
            self.lines[n] = gl.GLLinePlotItem(pos=np.array(
                [keypoints[p] for p in pts]),
                                              color=pg.glColor((0, 0, 255)),
                                              width=3,
                                              antialias=True)
            self.window.addItem(self.lines[n])
Exemplo n.º 3
0
 def __init__(self,
              zoom=1.0,
              resolution='656x368',
              model='cmu',
              show_process=False):
     self.zoom = zoom
     self.resolution = resolution
     self.model = model
     self.show_process = show_process
     logger.debug('initialization %s : %s' % (model, get_graph_path(model)))
     self.w, self.h = model_wh(resolution)
     self.e = TfPoseEstimator(get_graph_path(model),
                              target_size=(self.w, self.h))
Exemplo n.º 4
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        self.bitFalling = 0
        # Initialize plot.
        plt.ion()
        f2 = plt.figure(figsize=(6, 5))
        self.windowNeck = f2.add_subplot(1, 1, 1)
        self.windowNeck.set_title('Speed')
        self.windowNeck.set_xlabel('Time')
        self.windowNeck.set_ylabel('Speed')

        # plt.show()
        self.times = []
        self.recordVelocity = []
        self.recordNeck = []
        self.recordYTopRectangle = []
        self.recordHIP = []
        self.recordTimeList = []
        self.globalTime = 0
        self.highestNeck = 0
        # self.highestNeckTime = 0
        self.highestHIP = 0
        self.saveTimesStartFalling = -1

        self.surpriseMovingTime = -1
        self.detectedHIP_Y = 0
        self.detectedNECK_Y = 0
        self.extraDistance = 0

        self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1,
                                                       varThreshold=500,
                                                       detectShadows=False)
        self.secondNeck = 0
        self.human_in_frame = False
        self.lastTimesFoundNeck = -1
        self.width = 300
        self.height = 200
        self.quotaVirtureNeck = 3
        self.used_quotaVirtureNeck = 0
        self.quoutaFalling = 0
        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        camera = 0  # 1 mean external camera , 0 mean internal camera
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        # self.cam = cv2.VideoCapture('C:/outpy2.avi')
        self.cam.set(cv2.CAP_PROP_AUTOFOCUS, 0)  # turn the autofocus off
        self.recordAcceleration = []
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description='tf-pose-estimation run by folder')
    parser.add_argument('--folder', type=str, default='./images/')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    types = ('*.png', '*.jpg')
    files_grabbed = []
    for files in types:
        files_grabbed.extend(glob.glob(os.path.join(args.folder, files)))
    all_humans = dict()
    if not os.path.exists('output'):
        os.mkdir('output')
    for i, file in enumerate(files_grabbed):
        # estimate human poses from a single image !
        image = common.read_imgfile(file, None, None)
        t = time.time()
        humans = e.inference(image, scales=scales)
        elapsed = time.time() - t

        logger.info('inference image #%d: %s in %.4f seconds.' %
                    (i, file, elapsed))

        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
        # cv2.imshow('tf-pose-estimation result', image)
        filename = 'pose_{}.png'.format(i)
        output_filepath = os.path.join('output', filename)
        cv2.imwrite(output_filepath, image)
        logger.info('image saved: {}'.format(output_filepath))
        # cv2.waitKey(5000)

        all_humans[file.replace(args.folder, '')] = humans

    with open(os.path.join(args.folder, 'pose.dil'), 'wb') as f:
        dill.dump(all_humans, f, protocol=dill.HIGHEST_PROTOCOL)
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        camera = 0
        w, h = model_wh("432x368")
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(0)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose('lifting/models/prob_model_params.mat')
        keypoints = self.mesh(image)
        
        self.points = gl.GLScatterPlotItem(
            pos=keypoints,
            color=pg.glColor((255, 255, 0)),
            size=15
        )
        # self.lines = gl.GLLinePlotItem(
        #     pos=lines,
        #     color=pg.glColor((255, 255, 255)),
        #     width=2
        # )
        self.window.addItem(self.points)
Exemplo n.º 7
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        # Initialize plot.
        plt.ion()
        f = plt.figure(figsize=(5, 5))
        f2 = plt.figure(figsize=(6, 5))

        self.window3DBody = f.gca(projection='3d')
        self.window3DBody.set_title('3D_Body')
        self.windowStable = f2.add_subplot(1, 1, 1)
        self.windowStable.set_title('Stable')
        self.windowStable.set_xlabel('Time')
        self.windowStable.set_ylabel('Distant')
        self.windowStable.set_ylim([0, 1500])

        #plt.show()
        self.times = [0]
        self.stable = [0]
        self.recordHead = []
        self.fps_time = 0

        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        #model = 'cmu'
        #w,h = 656,368
        camera = 1  #1 mean external camera , 0 mean internal camera

        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read(cv2.IMREAD_COLOR)

        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        self.statusBodyWindow = 0
        try:
            keypoints = self.mesh(image)

        except:
            pass
Exemplo n.º 8
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        add some grids in the 3 point... that are just filling up the background
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)  # we translate all of them
        gz.translate(0, 0, -10)  # so they're pushed out to the background
        self.window.addItem(gx)
        self.window.addItem(gy)  # add them all to the window
        self.window.addItem(gz)

        model = "mobilenet_thin_432x368"
        camera = 0
        w, h = model_wh(model)
        # we are gonna create e objects but instead we're gonna call it
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        # we're basically just going the same thing(run.py line:37) instead of args.model
        # we just created our own object for model
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        # we'll have this object to do our 3d pose translater? yukardaki
        keypoints = self.mesh(image)

        self.points = gl.GLScatterPlotItem(  # plot dots
            pos=keypoints,
            color=pg.glColor((0, 255, 0)),
            size=15)
        self.window.addItem(self.points)
def cnvt(img, name) :
    os.chdir(read_path)
    image = cv2.imread(img)

    os.chdir(old_path)
    model = 'mobilenet_thin'
    resolution = '432x368'
    w, h = model_wh(resolution)

    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    humans = e.inference(image)
    blank_image = np.zeros((h,w,3), np.uint8)
    image = TfPoseEstimator.draw_humans(blank_image, humans, imgcopy=False)

    os.chdir(save_path)
    cv2.imwrite(name, image)
    print("Saved - %s As - %s" % (img, name))

    os.chdir(old_path)
Exemplo n.º 10
0
 def __init__(self, calibration_file):
     """
     This method init an OpenPose model
     """
     args = type('', (), {})
     # args.resolution = '1312x736'
     args.resolution = '432x368'
     args.model = 'mobilenet_thin'
     args.scales = '[None]'
     scales = ast.literal_eval(args.scales)
     w, h = model_wh(args.resolution)
     e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
     self.model = {'e': e, 'scales': scales}
     # start new matlab session
     self.matlab = matlab.engine.start_matlab()
     # or...
     # in matlab run: matlab.engine.shareEngine
     # and uncomment the following line
     # self.matlab = matlab.engine.connect_matlab()
     self.calibration = calibration_file
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        self.bitFalling = 0
        # Initialize plot.
        self.times = []
        self.recordVelocity = []
        self.recordNeck = []
        self.recordYTopRectangle = []
        self.recordHIP = []
        self.recordTimeList = []
        self.globalTime = 0
        self.highestNeck = 0
        # self.hightestNeckTime = 0
        self.highestHIP = 0
        self.saveTimesStartFalling = -1

        self.quoutaFalling = 0
        self.surpriseMovingTime = -1
        self.detectedHIP_Y = 0
        self.detectedNECK_Y = 0
        self.extraDistance = 0

        self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1,
                                                       varThreshold=500,
                                                       detectShadows=False)
        self.secondNeck = 0
        self.human_in_frame = False
        self.lastTimesFoundNeck = -1
        self.width = 300
        self.height = 200
        self.quotaVirtureNeck = 3
        self.used_quotaVirtureNeck = 0
        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.recordAcceleration = []
Exemplo n.º 12
0
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            description='tf-pose-estimation run')
        self.parser.add_argument(
            '--resolution',
            type=str,
            default='432x368',
            help='network input resolution. default=432x368')
        self.parser.add_argument('--model',
                                 type=str,
                                 default='mobilenet_thin',
                                 help='cmu / mobilenet_thin')
        self.parser.add_argument(
            '--scales',
            type=str,
            default='[None]',
            help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
        self.args = self.parser.parse_args()
        self.scales = ast.literal_eval(self.args.scales)

        self.w, self.h = model_wh(self.args.resolution)
        self.e = TfPoseEstimator(get_graph_path(self.args.model),
                                 target_size=(self.w, self.h))
Exemplo n.º 13
0
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run') 						# Adding arguments to the programs
    parser.add_argument('--image', type=str, default='../images/p1.jpg')							# Adding images name else it will take the default image
    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')	# Specify resolution 
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')			# Specify Model
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')	# Scales - Reason Unknown 
    args = parser.parse_args()												# Argument contain all the parse
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution) #Return width and height into w, h respectively after checking if its a multiple of 16
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))		# Model + width and height

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()
Exemplo n.º 14
0
    # parameters
    image_topic = rospy.get_param('~camera', '')
    model = rospy.get_param('~model', 'cmu_640x480')
    scales = rospy.get_param('~scales', '[None]')
    scales = ast.literal_eval(scales)
    tf_lock = Lock()

    rospy.loginfo('[TfPoseEstimatorROS] scales(%d)=%s' % (len(scales), str(scales)))

    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(model)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24)
    pub_pose = rospy.Publisher('~pose', Persons, queue_size=1)

    rospy.loginfo('start+')
Exemplo n.º 15
0
    cocoGt = COCO(coco_json_file)
    catIds = cocoGt.getCatIds(catNms=['person'])
    keys = cocoGt.getImgIds(catIds=catIds)
    if args.data_idx < 0:
        if eval_size > 0:
            keys = keys[:eval_size]  # only use the first #eval_size elements.
        pass
    else:
        keys = [keys[args.data_idx]]
    logger.info('validation %s set size=%d' % (coco_json_file, len(keys)))
    write_json = 'etcs/%s_%s_%f.json' % (args.model, args.resize,
                                         args.resize_out_ratio)

    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resize)
    if w == 0 or h == 0:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))
    else:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    result = []
    for i, k in enumerate(tqdm(keys)):
        img_meta = cocoGt.loadImgs(k)[0]
        img_idx = img_meta['id']

        img_name = os.path.join(image_dir, img_meta['file_name'])
        image = read_imgfile(img_name, None, None)
        if image is None:
            logger.error('image not found, path=%s' % img_name)
            sys.exit(-1)
Exemplo n.º 16
0
    rospy.init_node('TfPoseEstimatorROS', anonymous=True, log_level=rospy.INFO)

    # parameters
    image_topic = rospy.get_param('~camera', '')
    model = rospy.get_param('~model', 'cmu')

    resolution = rospy.get_param('~resolution', '432x368')
    resize_out_ratio = float(rospy.get_param('~resize_out_ratio', '4.0'))
    tf_lock = Lock()

    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(resolution)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24)
    pub_pose = rospy.Publisher('~pose', Persons, queue_size=1)

    rospy.loginfo('start+')
Exemplo n.º 17
0
        '--conf',
        type=str,
        default='configs/mgh.yaml',
        help=
        'Path to the YAML config file containing the parameters helpful for Openpose inference'
    )
    args = parser.parse_args()

    with open(args.conf, 'r') as f:
        conf_vals = yaml.load(f, Loader=yaml.FullLoader)
    conf_vals = conf_vals['estimate_pose']

    logger.debug(
        'initialization %s : %s' %
        (conf_vals['model_name'], get_graph_path(conf_vals['model_name'])))
    w, h = model_wh(conf_vals['resolution'])
    e = TfPoseEstimator(get_graph_path(conf_vals['model_name']),
                        target_size=(w, h))

    subdirs = os.listdir(conf_vals['data_dir'])

    def process_vids(subs):
        for i in trange(len(subs), desc='Subject ID', position=0):
            sub = subs[i]
            video_root = os.path.join(conf_vals['data_dir'], sub)
            l_vids = []
            for root, dirs, fnames in os.walk(video_root):
                for fname in fnmatch.filter(fnames,
                                            '*.' + conf_vals['data_ext']):
                    l_vids.append(os.path.join(root, fname))
            l_vids = sorted(l_vids)
Exemplo n.º 18
0
def pose_dd():
    fps_time = 0
    global humans
    global BREAK 
    global image 
    parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()

    ##logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    #logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    ##logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        #logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        #logger.debug('image process+')
        humans = e.inference(image)

        #logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        #logger.debug('show+')
        cv2.putText(image,
                    "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        #out.write(image)
        if cv2.waitKey(1) == 27:
            #out.release()
            cv2.destroyAllWindows()
            BREAK = True
            clientsocket.send("off".encode('utf-8'))
            print("off sent")
            import sys
            sys.exit(0)
            break
Exemplo n.º 19
0
def doCNNTracking(args):    
    global is_tracking,logger

    fps_time = 0

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    pipeline = rs.pipeline()
    pipeline.start()
    
    frames = pipeline.wait_for_frames()
    #get depth影像
    depth = frames.get_depth_frame() 
    depth_image_data = depth.as_frame().get_data()
    depth_image = np.asanyarray(depth_image_data)
    
    
    
    logger.info('cam depth image=%dx%d' % (depth_image.shape[1], depth_image.shape[0])) 
    logger.info('camera ready') 
    
    
    #計算depth影像對應至rgb影像的clip
    clip_box = [100,-100,290,-300]
    
    human_list = []
    while (True):
        if(is_tracking):
            
            fps_time = time.time()
            frames = pipeline.wait_for_frames()
            #get rgb影像
            image_frame = frames.get_color_frame()
            image_data = image_frame.as_frame().get_data()
            image = np.asanyarray(image_data)
            
            #change bgr 2 rgb
            image = np.array(image[...,::-1])
            origen_image = image

            #get depth影像
            depth = frames.get_depth_frame() 
            depth_image_data = depth.as_frame().get_data()
            depth_image = np.asanyarray(depth_image_data)
            depth_image = depth_image[(int)(clip_box[0]):(int)(clip_box[1]),(int)(clip_box[2]):(int)(clip_box[3])]
            depth_image = cv2.resize(depth_image, (640, 480), interpolation=cv2.INTER_CUBIC)
            depth_image=depth_image/30
            depth_image.astype(np.uint8)
            
            #深度去背的遮罩
            thresh=cv2.inRange(depth_image,20,160)

            #去背的遮罩做影像處理
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 1))  
            eroded = cv2.erode(thresh,kernel)
            kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT,(5, 5))  
            dilated = cv2.dilate(eroded,kernel2)
            
            #亮度遮罩
            bright_mask = np.zeros(image.shape);
            bright_mask.fill(200)
            bright_mask = bright_mask.astype(np.uint8);
            bright_mask = cv2.bitwise_and(bright_mask, bright_mask, mask=dilated)
            
            #rgb影像亮度處理
#             image = cv2.bitwise_and(image, image, mask=dilated)
            
            image = image.astype(int)+200-bright_mask.astype(int);
            image = np.clip(image, 0, 255)
            image = image.astype(np.uint8);
            
            image=origen_image
            
            #影像邊緣檢測
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(image_gray, (3, 3), 0)
            canny = cv2.Canny(blurred, 50, 50)
            canny_blurred = cv2.GaussianBlur(canny, (13, 13), 0)
            cv2.imshow('test', canny_blurred)
            
            #tfpose image 縮放
            if args.zoom < 1.0:
                canvas = np.zeros_like(image)
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
                dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
                canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
                image = canvas
            elif args.zoom > 1.0:
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (img_scaled.shape[1] - image.shape[1]) // 2
                dy = (img_scaled.shape[0] - image.shape[0]) // 2
                image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

            #tfpose 計算
            humans = e.inference(image)
            
            # #得到joint
            # jdata = TfPoseEstimator.get_json_data(image.shape[0],image.shape[1],humans)
            # if(len(jdata)>2):
            #     try:
            #         #傳送Position資料至SERVER
            #         chating_room.sendTrackingData(jdata,'track')
            #     except:
            #         print("Cannot send data to server.")
           
            
            #去背後深度影像
            depth_masked = cv2.bitwise_and(depth_image, depth_image, mask=dilated)
            new_human_list = []
            for human in humans:
                #計算深度資料
                depthDatas=[None] * 20
                image_h, image_w = image.shape[:2]
                
                               
                # get joint data with depth
                for i in range(common.CocoPart.Background.value):
                    if i not in human.body_parts.keys():
                        continue
                    body_part = human.body_parts[i]
                    y= int(body_part.y * image_h+ 0.5)
                    x = int(body_part.x * image_w + 0.5)
                    s=5;
                    mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639]

                    count=0;
                    sum_depth=0;

                    for j in range (mat.shape[0]):
                        for k in range (mat.shape[1]):     
                            if mat[j,k]!=0:
                                sum_depth+=mat[j,k]
                                count+=1
                                
                    if(count>0):
                        depth=sum_depth/count
                    else:
                        depth=0
                    
                    mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639]
                    
                    try:
                        depthDatas[i] =  (JointDepthData(i,x,y,depth))
                    except:
                        print("err:"+str(x)+" "+str(y)+" "+str(body_part.x )+" "+str(body_part.y ))
                
                # 後處理                
                jn0=np.zeros((20,3))
                
                for j in depthDatas:
                    if(j!=None):
                        jn0[j.jn]=np.array([1,j.x,j.y])
                jn0 = jn0.astype(int)
                jn00 = jn0.copy()
                
                old_images=None
                if(len(human_list)>0):
                # 找與之前最相似的Human Data  
                    most_simular_value = 9999999
                    most_simular_human = ''
                    jn1=np.zeros((20,3))
                    
                    for human_data in human_list:                        
                        for j in human_data.joint_list:
                            if(j != None):
                                jn1[j.jn]=np.array([1,j.x,j.y])
                        jn1 = jn1.astype(int)

                        different_value=0
                        match_count=0
                        for i in range (18) :
                            if(jn0[i,0]*jn1[i,0]!=0):
                                different_value+= np.linalg.norm(jn0[i]-jn1[i])
                                match_count+=1
                        if(different_value/match_count<most_simular_value):
                            most_simular_human = human_data
                
                    old_images = most_simular_human.image_list
                    
                    if(old_images==None):
                        old_images = [np.zeos((80,80))]*18
                    w = 160
                    
                    
#                     for joint_i in range (18):
#                         smallest_diff = 9999999
#                         if(jn0[joint_i,0]==int(1) and jn1[joint_i,0]==int(1) and old_images[joint_i].shape[0]>0 and old_images[joint_i].shape[1]>0):
#                             new_center=np.zeros(2)
#                             for i in range (-20,20,10):
#                                 for j in range (-20,20,10):
#                                     center = np.array([jn0[joint_i,1]+i,jn0[joint_i,2]+j])
                                    
#                                     mat0 = canny_blurred[center[0]-w:center[0]+w,center[1]-w:center[1]+w]
#                                     if(mat0.shape[0]!=2*w or mat0.shape[1]!=2*w or old_images[joint_i].shape[0]!=2*w or old_images[joint_i].shape[1]!=2*w):
#                                         continue
#                                     try:
#                                         mat1 = mat0-old_images[joint_i]
#                                     except:
#                                         print(mat0.shape)
#                                     mat2 = np.exp2(mat1)
#                                     diff = np.sum(mat2)
#                                     if(diff<smallest_diff):
#                                         smallest_diff=diff
#                                         new_images[joint_i] = canny_blurred[center[0]-w:center[0]+w,center[1]-w:center[1]+w]
#                                         new_center=center
#                             if(smallest_diff<9999999):
#                                 jn0[joint_i,1] = int(new_center[0]+jn0[joint_i,1])/2
#                                 jn0[joint_i,2] = int(new_center[1]+jn1[joint_i,1])/2
                    temp = np.copy(image)
                    
                    for joint_i in range(18):
                        center = np.array([jn0[joint_i,1],jn0[joint_i,2]])
                        mat0 = image_gray[center[1]-w if(center[1]-w>=0) else 0:center[1]+w if(center[1]+w<480) else 479,center[0]-w if(center[0]-w>=0) else 0:center[0]+w if(center[0]+w<640) else 639]
                        mat1 = old_images[joint_i]
                        if(mat0.shape[0]>mat1.shape[0]and mat0.shape[1]>mat1.shape[1] and mat0.shape[0]>0 and mat0.shape[1]>0 and mat1.shape[0]>0 and mat1.shape[1]>0):
                            loc = match_img(mat0,mat1)
                            lt = (center[0]-w if(center[0]-w>=0) else 0,center[1]-w if(center[1]-w>=0) else 0)
                            
                            
                            if(len(loc[1])>0):                                 
                                c = (int(sum(loc[1])/len(loc[1])),int(sum(loc[0])/len(loc[0])))
                                cv2.rectangle(temp,(c[0]+lt[0],c[1]+lt[1]),(c[0]+lt[0]+40,c[1]+lt[1]+40), common.CocoColors[joint_i], 2)                             
                            # for pt in : 
                            #     c = pt-np.array((center[1]-w if(center[1]-w>=0) else 0,center[0]-w if(center[0]-w>=0) else 0))+np.array((center[1],center[0]))
                            #     c2 = (c[0],c[1])
                                # cv2.rectangle(temp,c2,(c[0]+40,c[1]+40),(255,255,255), 2) 
                                
                    
                    cv2.imshow('test3',  temp )
                          
 
                
                w = 40
                new_depthDatas=[None]*20
                new_images = [np.zeros((80,80))]*20
                for joint_i in range (18):
                    if(jn0[joint_i,0]==1):
                        new_depthDatas[joint_i]=(JointDepthData(joint_i,jn0[joint_i,1],jn0[joint_i,2],depthDatas[joint_i].dp))
                        m=np.copy(image_gray[jn0[joint_i,2]-w:jn0[joint_i,2]+w,jn0[joint_i,1]-w:jn0[joint_i,1]+w])

                        if(old_images==None and m.shape[0]==2*w and m.shape[1]==2*w):
                            cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]), w, (0,0,255), -1)
                            new_images[joint_i]=m
                        elif(old_images==None and (m.shape[0]<2*w or m.shape[1]<2*w)):
                            pass
                        elif(m.shape[0]==2*w and m.shape[1]==2*w):
                            cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]),w, (0,255,0), -1)
                            new_images[joint_i]=old_images[joint_i]*0.9+m*0.1
                        else:
                            cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]), w, (255,0,0), -1)
                            new_images[joint_i]=old_images[joint_i]
                        
                        if(joint_i==1 and m.shape[0]==2*w and m.shape[1]==2*w):
                            cv2.imshow('test2', m)
                            cv2.circle(image,(jn00[joint_i,1],jn00[joint_i,2]), w, (255,255,0), -1)
                    
                        # new_images.append(depth_masked[jn0[joint_i,1]-5:jn0[joint_i,1]+5,jn0[joint_i,2]-5:jn0[joint_i,2]+5])
                        
                
                
                new_human = HumanData(new_depthDatas,new_images)
                
                new_human_list.append(new_human)
                
#                 depth_jdata=json.dumps(new_depthDatas)
#                 if(len(depth_jdata)>2):
#                     try:
#                         #傳送Depth資料至SERVER
#                         print(depth_jdata)
#                         chating_room.sendTrackingData(depth_jdata,'track_depth')

#                     except:
#                         print("Cannot send depth data to server.")
            
            human_list = new_human_list           
            depth_image =  cv2.applyColorMap(cv2.convertScaleAbs(depth_image/25), cv2.COLORMAP_JET)
#             cv2.circle(image,(320,240), 5, (255,0,0), -1)

            
            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            cv2.putText(image,"FPS: %f" % (1.0 / (time.time() - fps_time)),(10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2)
            
            cv2.imshow('tf-pose-estimation result', image)
           
            
            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break


    cv2.destroyAllWindows()
Exemplo n.º 20
0
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument(
        '--model',
        type=str,
        default='cmu_640x480',
        help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument(
        '--show-process',
        type=bool,
        default=False,
        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()
    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        #logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=args.zoom,
Exemplo n.º 21
0
def startt():
    fps_time = 0
    

    logger.debug('initialization %s : %s' % ('mobilenet_thin', get_graph_path('mobilenet_thin')))
    w, h = model_wh('432x368')
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(0,cv2.CAP_DSHOW)
    #"http://127.0.0.1:8000/"
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))
    frame=0
    while True:
        ret_val, image = cam.read()
        #image2 = cv2.threshold(image,0,255,cv2.THRESH_BINARY)
        image2 = cv2.imread('../images/123.jpg')
        logger.debug('image preprocess+')
        """if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            canvas2 = np.zeros_like(image2)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            img_scaled2 = cv2.resize(image2, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
            canvas2[dy:dy + img_scaled2.shape[0], dx:dx + img_scaled2.shape[1]] = img_scaled2
            image = canvas
            image2 = canvas2
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]"""
        #image2=np.array(image2)
        logger.debug('image process+')
        humans = e.inference(image)
        n = len(humans)
        print(humans)
        """body = {}
        body["key_points"] = []
        for human in humans:
           for parts in human.body_parts.items():
               print("id" ,parts[0])
               print("x",parts[1].x)
               print("y",parts[1].y)
               print("score = ",parts[1].score)
               body["key_points"].append({"ID":parts[0],"X":parts[1].x,"Y":parts[1].y})
        with open("json/{0}.json".format(str(i)),'w') as file:
        	json.dump(body,file)"""
        output_json='3d-pose-baseline/src3d/json/'
        logger.debug('postprocess+')
        image2 = TfPoseEstimator.draw_humans(image2, humans, imgcopy=False,frame=frame,dir=output_json)
       # image =  
        logger.debug('show+')
        cv2.putText(image,
                    "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
        cv2.putText(image, "persons = %f" % n,(10,30),cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 0, 255), 3)
        cv2.imshow('orignal',image)
        #cv2.imshow('tf-pose-estimation result', image2)
        if n==1:
            frame=frame+1
        fps_time = time.time()
        
        logger.debug('finished+')
        keyboard = cv2.waitKey(30)
        if keyboard == 'q' or keyboard == 27:
            break
    cam.release()
    cv2.destroyAllWindows()
    return image2
Exemplo n.º 22
0
    def post(self):
        global finished_post
        if ((self.request.headers['Content-Type'] == 'imagebin')
                and (finished_post == 1)):
            print(finished_post)
            finished_post = 0
            print('Received image')
            image = self.request.body
            fh = open('static/image1.jpg', 'wb')
            fh.write(image)
            fh.close()
            #fh = open('static/image1.jpg','ab')
            #fh.write(bytes([0xD9]))
            #fh.close()
            print('0')
            #image = cv2.imread('static/image1.jpg')
            print('1')
            print('2')

            parser = argparse.ArgumentParser(
                description='tf-pose-estimation run')
            parser.add_argument(
                '--resolution',
                type=str,
                default='432x368',
                help='network input resolution. default=432x368')
            parser.add_argument('--model',
                                type=str,
                                default='mobilenet_thin',
                                help='cmu / mobilenet_thin')
            parser.add_argument(
                '--scales',
                type=str,
                default='[None]',
                help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
            args = parser.parse_args()
            scales = ast.literal_eval(args.scales)

            w, h = model_wh(args.resolution)
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

            # estimate human poses from a single image !
            image = common.read_imgfile('static/image1.jpg', None, None)
            # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            t = time.time()
            humans = e.inference(image, scales=scales)
            elapsed = time.time() - t

            logger.info('inference image: image3.jpg in %.4f seconds.' %
                        (elapsed))

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            #cv2.putText(image,
            #               f"Fallen: False",
            #              (60, 60),  cv2.FONT_HERSHEY_SIMPLEX, 2,
            #             (0, 255, 0), 5)

            cv2.imwrite('static/image3.jpg', image)

            for client in clients:
                update_clients(client)

            print(finished_post)
            finished_post = 1
Exemplo n.º 23
0
def doCNNTracking(args):    
    global is_tracking,logger

    fps_time = 0

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    pipeline = rs.pipeline()
    pipeline.start()
    
    frames = pipeline.wait_for_frames()
    #get depth影像
    depth = frames.get_depth_frame() 
    depth_image_data = depth.as_frame().get_data()
    depth_image = np.asanyarray(depth_image_data)
    
    
    
    logger.info('cam depth image=%dx%d' % (depth_image.shape[1], depth_image.shape[0])) 
    logger.info('camera ready') 
    
    
    #計算depth影像對應至rgb影像的clip
    clip_box = [100,-100,290,-300]
    

    while (True):
        if(is_tracking):
            fps_time = time.time()
            frames = pipeline.wait_for_frames()
            #get rgb影像
            image_frame = frames.get_color_frame()
            image_data = image_frame.as_frame().get_data()
            image = np.asanyarray(image_data)
            
            #change bgr 2 rgb
            image = np.array(image[...,::-1])
            origen_image = image

            #get depth影像
            depth = frames.get_depth_frame() 
            depth_image_data = depth.as_frame().get_data()
            depth_image = np.asanyarray(depth_image_data)
            depth_image = depth_image[(int)(clip_box[0]):(int)(clip_box[1]),(int)(clip_box[2]):(int)(clip_box[3])]
            depth_image = cv2.resize(depth_image, (640, 480), interpolation=cv2.INTER_CUBIC)
            depth_image=depth_image/30
            depth_image.astype(np.uint8)
            
            #深度去背的遮罩
            thresh=cv2.inRange(depth_image,20,200)

            #去背的遮罩做影像處理
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 1))  
            eroded = cv2.erode(thresh,kernel)
            kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT,(5, 5))  
            dilated = cv2.dilate(eroded,kernel2)
            
            #亮度遮罩
            bright_mask = np.zeros(image.shape);
            bright_mask.fill(200)
            bright_mask = bright_mask.astype(np.uint8);
            bright_mask = cv2.bitwise_and(bright_mask, bright_mask, mask=dilated)
            
            #rgb影像亮度處理
#             image = cv2.bitwise_and(image, image, mask=dilated)
            
            image = image.astype(int)+200-bright_mask.astype(int);
            image = np.clip(image, 0, 255)
            image = image.astype(np.uint8);
            

            
            
            
            #tfpose image 縮放
            if args.zoom < 1.0:
                canvas = np.zeros_like(image)
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
                dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
                canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
                image = canvas
            elif args.zoom > 1.0:
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (img_scaled.shape[1] - image.shape[1]) // 2
                dy = (img_scaled.shape[0] - image.shape[0]) // 2
                image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

            #tfpose 計算
            humans = e.inference(image)
            
#             #得到joint
#             jdata = TfPoseEstimator.get_json_data(image.shape[0],image.shape[1],humans)
#             if(len(jdata)>2):
#                 try:
#                     #傳送Position資料至SERVER
#                     chating_room.sendTrackingData(jdata,'track')
#                 except:
#                     print("Cannot send data to server.")
           
            
            #去背後深度影像
            depth_masked = cv2.bitwise_and(depth_image, depth_image, mask=dilated)
            
            human_json_datas = []
            for human in humans:
                #計算深度資料
                depthDatas=[]
                image_h, image_w = image.shape[:2]
                # get point
                for i in range(common.CocoPart.Background.value):
                    if i not in human.body_parts.keys():
                        continue
                    body_part = human.body_parts[i]
                    y= int(body_part.y * image_h+ 0.5)
                    x = int(body_part.x * image_w + 0.5)
                    s=5;
                    mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639]

                    count=0;
                    sum_depth=0;

                    for j in range (mat.shape[0]):
                        for k in range (mat.shape[1]):     
                            if mat[j,k]!=0:
                                sum_depth+=mat[j,k]
                                count+=1
                                
                    if(count>0):
                        depth=sum_depth/count
                    else:
                        depth=0
                        
                    try:
                        depthDatas.append(JointDepthData(i,x,y,depth).__dict__)
                    except:
                        print("err:"+str(x)+" "+str(y)+" "+str(body_part.x )+" "+str(body_part.y ))
                    

                human_json_datas.append(json.dumps(depthDatas))
            json_data = json.dumps(human_json_datas)
            if(len(json_data)>2):
                try:
                    #傳送Depth資料至SERVER
                    chating_room.sendTrackingData(json_data,'track_depth')
                except:
                    print("Cannot send depth data to server.")
                        
            depth_image =  cv2.applyColorMap(cv2.convertScaleAbs(depth_image/25), cv2.COLORMAP_JET)
            cv2.circle(image,(320,240), 5, (255,255,255), -1)
            cv2.circle(image,(304,480-98), 5, (0,0,255), -1)
            cv2.circle(image,(377,480-197), 5, (0,160,255), -1)
            cv2.circle(image,(106,480-49), 5, (0,255,255), -1)
            cv2.circle(image,(460,480-136), 5, (0,255,0), -1)
            cv2.circle(image,(481,480-134), 5, (255,0,0), -1)
            cv2.circle(image,(85,480-143), 5, (255,160,0), -1)
            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            cv2.putText(image,"FPS: %f" % (1.0 / (time.time() - fps_time)),(10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2)
            
            cv2.imshow('tf-pose-estimation result', image)
            
            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break


    cv2.destroyAllWindows()
Exemplo n.º 24
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        add some grids in the 3 point... that are just filling up the background
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setFixedSize(1920, 1080)
        self.window.showMaximized()
        # self.window.setMaximumSize(1920, 1080)
        # self.win = pg.GraphicsWindow()
        # self.vb = self.win.addViewBox(col=0, row=0)
        # self.t = pg.TextItem("zeynep", (255, 255, 255), anchor=(0,0))
        # self.vb.addItem(self.t)
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 0, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()
        # self.root = Tk()
        self.angle = ""
        # self.root = Tk()
        # self.root.geometry("100x50+%d+%d" % (0, 0))  # top left
        # self.root.overrideredirect(True)  # frameless tkinter window
        # self.root.resizable(False, False)
        # self.root.columnconfigure(0, weight=1)
        # self.root.rowconfigure(0, weight=1)

        # TEXT = ""
        # self.lbl = tk.Label(root, text=TEXT, bg="#e61c1c", font=("bold", 15), border=0, width=35)
        # self.lbl.grid(column=0, row=0, sticky="nsew")

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)  # we translate all of them
        gz.translate(0, 0, -10)  # so they're pushed out to the background
        self.window.addItem(gx)
        self.window.addItem(gy)  # add them all to the window
        self.window.addItem(gz)

        model = "mobilenet_thin_432x368"
        camera = 0

        self.lines = {}
        self.connections = [  # lines that we want to connect all those key points
            [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8],
            [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15],
            [15, 16]
        ]  # 5.35

        w, h = model_wh(model)
        # we are gonna create e objects but instead we're gonna call it
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        # we're basically just going the same thing(run.py line:37) instead of args.model
        # we just created our own object for model
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        # we'll have this object to do our 3d pose translater? yukardaki
        keypoints = self.mesh(image)
        # rightPointList = keypoints[2:5]
        self.rightPointList = keypoints[11:14]
        self.leftPointList = keypoints[14:]

        self.a = (
            gl.GLScatterPlotItem(  # plot dot
                pos=keypoints[:11],
                color=pg.glColor((0, 255, 0)),
                size=15))
        self.window.addItem(self.a)
        self.right = (
            gl.GLScatterPlotItem(  # plot dot
                pos=self.rightPointList,
                color=pg.glColor((255, 0, 0)),
                size=15))
        self.window.addItem(self.right)
        self.left = (
            gl.GLScatterPlotItem(  # plot dot
                pos=self.leftPointList,
                color=pg.glColor((0, 0, 255)),
                size=15))
        self.window.addItem(self.left)

        for n, pts in enumerate(self.connections):
            self.lines[n] = gl.GLLinePlotItem(  # lines dict with all of them
                pos=np.array([keypoints[p] for p in pts]),
                color=pg.glColor((0, 0, 255)),
                width=3,
                antialias=True)
            # add them to our window
            self.window.addItem(self.lines[n])
Exemplo n.º 25
0
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg')
    parser.add_argument('--image', type=str, default='./images/apink2.jpg')
    # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240')
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(scales)

    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=[None])
    # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)])
    # humans = e.inference(image, scales=[(1.2, 0.05)])
    # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)])
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = cv2.imread(args.image, cv2.IMREAD_COLOR)
Exemplo n.º 26
0
import cv2
import numpy as np

from estimator import TfPoseEstimator
from networks import get_graph_path, model_wh

fps_time = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation Video')
    parser.add_argument('--video', type=str, default='')

    args = parser.parse_args()
    model = 'mobilenet_thin'
    w, h = model_wh('432x368')
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    cap = cv2.VideoCapture(args.video)

    if (cap.isOpened() == False):
        print("Error opening video stream or file")
    while (cap.isOpened()):
        ret_val, image = cap.read()

        humans = e.inference(image)
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
Exemplo n.º 27
0
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument(
        '--show-process',
        type=bool,
        default=False,
        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=args.zoom,
Exemplo n.º 28
0
    def post(self):
        global fallen
        if ((self.request.headers['Content-Type'] == 'imagebin')):
            print('Received image')
            image = self.request.body
            fh = open('static/image1.jpg', 'wb')
            fh.write(image)
            fh.close()
            #fh = open('static/image1.jpg','ab')
            #fh.write(bytes([0xD9]))
            #fh.close()
            print('0')
            #image = cv2.imread('static/image1.jpg')
            print('1')
            print('2')

            parser = argparse.ArgumentParser(
                description='tf-pose-estimation run')
            parser.add_argument(
                '--resolution',
                type=str,
                default='432x368',
                help='network input resolution. default=432x368')
            parser.add_argument('--model',
                                type=str,
                                default='mobilenet_thin',
                                help='cmu / mobilenet_thin')
            parser.add_argument(
                '--scales',
                type=str,
                default='[None]',
                help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
            args = parser.parse_args()
            scales = ast.literal_eval(args.scales)

            w, h = model_wh(args.resolution)
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

            # estimate human poses from a single image !
            image = common.read_imgfile('static/image1.jpg', None, None)
            # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            t = time.time()
            humans = e.inference(image, scales=scales)
            elapsed = time.time() - t

            logger.info('inference image: image3.jpg in %.4f seconds.' %
                        (elapsed))

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
            fallen = 'OKAY'
            for i, h in enumerate(humans):
                TORSO_INDEX = 1
                LEFT_HIP_INDEX = 8
                RIGHT_HIP_INDEX = 11
                RIGHT_HAND_INDEX = 4
                RIGHT_FOOT_INDEX = 12
                LEFT_FOOT_INDEX = 9

# and RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts:

            parts = h.body_parts
            if TORSO_INDEX in parts and LEFT_HIP_INDEX in parts and RIGHT_HIP_INDEX in parts:

                torso = parts[TORSO_INDEX]
                left_hip = parts[LEFT_HIP_INDEX]
                right_hip = parts[RIGHT_HIP_INDEX]

                tx, ty = torso.x, torso.y
                lhx, lhy = left_hip.x, left_hip.y
                rhx, rhy = right_hip.x, right_hip.y

                if tx < lhx or tx > rhx:
                    fallen = 'FALL'

                if abs(lhy - ty) < 0.1 or abs(rhy - ty) < 0.1:
                    fallen = 'FALL'
                if RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts:
                    right_foot = parts[RIGHT_FOOT_INDEX]
                    left_foot = parts[LEFT_FOOT_INDEX]
                    right_hand = parts[RIGHT_HAND_INDEX]
                    righax, righay = right_hand.x, right_hand.y
                    rfx, rfy = right_foot.x, right_foot.y
                    lfx, lfy = left_foot.x, left_foot.y
                    if abs(lfy - lhy) < 0.1 or abs(rhy - ty) < 0.1:
                        fallen = 'FALL'
                    if (lfy - lhy) > (lhy - ty):
                        fallen = 'FALL'
                    print(lfy - lhy, lhy - ty)
                    lowermag = math.sqrt((lfy - lhy) * (lfy - lhy) +
                                         (lhx - lfx) * (lhx - lfx))
                    uppermag = math.sqrt((lhy - ty) * (lhy - ty) + (tx - lhx) *
                                         (tx - lhx))
                    if lowermag > uppermag:
                        fallen = 'FALL'
            #cv2.putText(image,
            #               f"Fallen: False",
            #              (60, 60),  cv2.FONT_HERSHEY_SIMPLEX, 2,
            #             (0, 255, 0), 5)

            cv2.putText(image, f"Fallen: {fallen}", (60, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5)

            cv2.imwrite('static/image3.jpg', image)
            for client in clients:
                update_clients(client)
Exemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p3.jpg')
    parser.add_argument(
        '--resolution',
        type=str,
        default='432x368',
        help='network input resolution. default=432x368')
    parser.add_argument(
        '--model',
        type=str,
        default='mobilenet_thin',
        help='cmu / mobilenet_thin')
    parser.add_argument(
        '--scales',
        type=str,
        default='[None]',
        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    parser.add_argument(
        '--stick_only',
        action='store_true',
        help='save output without other analysis')
    parser.add_argument('--plot_3d', action='store_true', help='save 3d poses')

    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt

    fig = plt.figure()
    a = fig.add_subplot(2, 2, 1)
    a.set_title('Result')
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if args.stick_only:
        cv2.imwrite('output/test.png', image)
        logger.info('image saved: {}'.format('output/test.png'))
        import sys
        sys.exit(0)
    plt.imshow(rgb_image)

    bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB)
    bgimg = cv2.resize(
        bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]),
        interpolation=cv2.INTER_AREA)

    # show network output
    a = fig.add_subplot(2, 2, 2)
    plt.imshow(bgimg, alpha=0.5)
    tmp = np.amax(e.heatMat[:, :, :-1], axis=2)
    plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    tmp2 = e.pafMat.transpose((2, 0, 1))
    tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0)
    tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0)

    a = fig.add_subplot(2, 2, 3)
    a.set_title('Vectormap-x')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    a = fig.add_subplot(2, 2, 4)
    a.set_title('Vectormap-y')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()
    if not os.path.exists('output'):
        os.mkdir('output')
    plt.savefig('output/test.png')
    logger.info('image saved: {}'.format('output/test.png'))

    if not args.plot_3d:
        import sys
        sys.exit(0)
    #For plotting in 3d
    logger.info('3d lifting initialization.')
    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')

    image_h, image_w = image.shape[:2]
    standard_w = 640
    standard_h = 480

    pose_2d_mpiis = []
    visibilities = []
    for human in humans:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
        pose_2d_mpiis.append([(int(x * standard_w + 0.5),
                               int(y * standard_h + 0.5))
                              for x, y in pose_2d_mpii])
        visibilities.append(visibility)

    pose_2d_mpiis = np.array(pose_2d_mpiis)
    visibilities = np.array(visibilities)
    transformed_pose2d, weights = poseLifting.transform_joints(
        pose_2d_mpiis, visibilities)
    pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)

    for i, single_3d in enumerate(pose_3d):
        plot_pose(single_3d)
    plt.draw()
    plt.savefig('output/pose_3d_test.png')
    logger.info('3d plot saved: {}'.format('output/pose_3d_test.png'))
Exemplo n.º 30
0
    def __init__(self):

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setFixedSize(1920, 1080)
        # self.window.showMaximized()

        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 0, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()
        self.angle = ""

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)  # we translate all of them
        gz.translate(0, 0, -10)  # so they're pushed out to the background
        self.window.addItem(gx)
        self.window.addItem(gy)  # add them all to the window
        self.window.addItem(gz)

        model = "mobilenet_thin_432x368"
        camera = 0

        self.lines = {}
        self.connections = [  # lines that we want to connect all those key points
            [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8],
            [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15],
            [15, 16]
        ]  # 5.35

        w, h = model_wh(model)
        # we are gonna create e objects but instead we're gonna call it
        print("modellllll : ", get_graph_path(model))
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        print("self.e : ", self.e)
        # we're basically just going the same thing(run.py line:37) instead of args.model
        # we just created our own object for model
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        # we'll have this object to do our 3d pose translater? yukardaki
        keypoints = self.mesh(image)
        self.rightPointList = keypoints[11:14]
        self.leftPointList = keypoints[14:]

        self.a = (
            gl.GLScatterPlotItem(  # plot dot
                pos=keypoints[:11],
                color=pg.glColor((0, 255, 0)),
                size=15))
        self.window.addItem(self.a)
        self.right = (
            gl.GLScatterPlotItem(  # plot dot
                pos=self.rightPointList,
                color=pg.glColor((255, 0, 0)),
                size=15))
        self.window.addItem(self.right)
        self.left = (
            gl.GLScatterPlotItem(  # plot dot
                pos=self.leftPointList,
                color=pg.glColor((0, 0, 255)),
                size=15))
        self.window.addItem(self.left)

        for n, pts in enumerate(self.connections):
            self.lines[n] = gl.GLLinePlotItem(  # lines dict with all of them
                pos=np.array([keypoints[p] for p in pts]),
                color=pg.glColor((0, 0, 255)),
                width=3,
                antialias=True)
            # add them to our window
            self.window.addItem(self.lines[n])
Exemplo n.º 31
0

if __name__ == '__main__':
    rospy.loginfo('initialization+')
    rospy.init_node('TfPoseEstimatorROS', anonymous=True)

    # parameters
    image_topic = rospy.get_param('~camera', '')
    model = rospy.get_param('~model', 'cmu_640x480')

    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(model)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic, Image, callback_image, queue_size=1)
    pub_pose = rospy.Publisher('~pose', Persons, queue_size=1)

    rospy.loginfo('start+')
def pose_comparison():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p1.jpg')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    ref_image = common.read_imgfile(REF_POSE_PATH, None, None)
    ref_image = cv2.resize(ref_image, (640, 480))
    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    image = cv2.resize(image, (640, 480))
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    #t = time.time()

    ref_humans = e.inference(ref_image, scales=scales)
    humans = e.inference(image, scales=scales)

    #elapsed = time.time() - t

    #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    _, ref_centers = TfPoseEstimator.draw_humans_mine(ref_image,
                                                      ref_humans,
                                                      imgcopy=False)
    _, centers = TfPoseEstimator.draw_humans_mine(image, humans, imgcopy=False)

    ref_centers = list(ref_centers.values())
    centers = list(centers.values())

    ref_centers = list(sum(ref_centers, ()))
    centers = list(sum(centers, ()))

    ref_centers = np.array(ref_centers, dtype=int)
    centers = np.array(centers, dtype=int)

    shapes = []
    shapes.append(ref_centers)
    shapes.append(centers)

    #create canvas on which the triangles will be visualized
    canvas = np.full([640, 480], 255).astype('uint8')

    #convert to 3 channel RGB for fun colors!
    canvas = cv2.cvtColor(canvas, cv2.COLOR_GRAY2RGB)
    #im = draw_shapes(canvas,shapes)

    x, y = get_translation(shapes[0])
    new_shapes = []
    new_shapes.append(shapes[0])

    for i in range(1, len(shapes)):
        new_shape = procrustes_analysis(shapes[0], shapes[i])
        new_shape[::2] = new_shape[::2] + x
        new_shape[1::2] = new_shape[1::2] + y
        new_shape = new_shape.astype(int)
        new_shapes.append(new_shape)

    pts_list = []

    for lst in new_shapes:
        temp = lst.reshape(-1, 1, 2)
        pts = list(map(tuple, temp))
        pts_list.append(pts)

    for i in range(18):
        cv2.circle(ref_image,
                   tuple(pts_list[0][i][0]),
                   3, (255, 0, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)
        cv2.circle(ref_image,
                   tuple(pts_list[1][i][0]),
                   3, (255, 255, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)

    cv2.imshow('tf-pose-estimation result', ref_image)
    cv2.waitKey(0)

    variations = []
    for i in range(len(new_shapes)):
        dist = procrustes_distance(shapes[0], new_shapes[i])
        variations.append(dist)

    print(variations)