Exemplo n.º 1
0
    def initNets(self):
        """
        Init network in current process
        :return: 
        """
        # Force network to compile output in the beginning
        if isinstance(self.poseNet, PoseRegNetParams):
            self.poseNet = PoseRegNet(numpy.random.RandomState(23455),
                                      cfgParams=self.poseNet)
            self.poseNet.computeOutput(
                numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32'))
        elif isinstance(self.poseNet, ResNetParams):
            self.poseNet = ResNet(numpy.random.RandomState(23455),
                                  cfgParams=self.poseNet)
            self.poseNet.computeOutput(
                numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32'))
        else:
            raise RuntimeError("Unknown pose estimation method!")

        if self.comrefNet is not None:
            if isinstance(self.comrefNet, ScaleNetParams):
                self.comrefNet = ScaleNet(numpy.random.RandomState(23455),
                                          cfgParams=self.comrefNet)
                self.comrefNet.computeOutput([
                    numpy.zeros(sz, dtype='float32')
                    for sz in self.comrefNet.cfgParams.inputDim
                ])
            else:
                raise RuntimeError("Unknown refine method!")
Exemplo n.º 2
0
 def loadRefineNetLazy(self, net):
     if isinstance(net, basestring):
         if os.path.exists(net):
             from net.scalenet import ScaleNet, ScaleNetParams
             comrefNetParams = ScaleNetParams(type=5, nChan=1, wIn=128, hIn=128, batchSize=1, resizeFactor=2,
                                              numJoints=1, nDims=3)
             self.refineNet = ScaleNet(np.random.RandomState(23455), cfgParams=comrefNetParams)
             self.refineNet.load(net)
         else:
             raise EnvironmentError("File not found: {}".format(net))
Exemplo n.º 3
0
class DepthImporter(object):
    """
    provide baisc functionality to load depth data
    """

    def __init__(self, fx, fy, ux, uy):
        """
        Initialize object
        :param fx: focal length in x direction
        :param fy: focal length in y direction
        :param ux: principal point in x direction
        :param uy: principal point in y direction
        """

        self.fx = fx
        self.fy = fy
        self.ux = ux
        self.uy = uy
        self.depth_map_size = (320, 240)
        self.refineNet = None
        self.crop_joint_idx = 0

    def jointsImgTo3D(self, sample):
        """
        Normalize sample to metric 3D
        :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm
        :return: normalized joints in mm
        """
        ret = np.zeros((sample.shape[0], 3), np.float32)
        for i in range(sample.shape[0]):
            ret[i] = self.jointImgTo3D(sample[i])
        return ret

    def jointImgTo3D(self, sample):
        """
        Normalize sample to metric 3D
        :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm
        :return: normalized joints in mm
        """
        ret = np.zeros((3,), np.float32)
        # convert to metric using f
        ret[0] = (sample[0]-self.ux)*sample[2]/self.fx
        ret[1] = (sample[1]-self.uy)*sample[2]/self.fy
        ret[2] = sample[2]
        return ret

    def joints3DToImg(self, sample):
        """
        Denormalize sample from metric 3D to image coordinates
        :param sample: joints in (x,y,z) with x,y and z in mm
        :return: joints in (x,y,z) with x,y in image coordinates and z in mm
        """
        ret = np.zeros((sample.shape[0], 3), np.float32)
        for i in range(sample.shape[0]):
            ret[i] = self.joint3DToImg(sample[i])
        return ret

    def joint3DToImg(self, sample):
        """
        Denormalize sample from metric 3D to image coordinates
        :param sample: joints in (x,y,z) with x,y and z in mm
        :return: joints in (x,y,z) with x,y in image coordinates and z in mm
        """
        ret = np.zeros((3,), np.float32)
        # convert to metric using f
        if sample[2] == 0.:
            ret[0] = self.ux
            ret[1] = self.uy
            return ret
        ret[0] = sample[0]/sample[2]*self.fx+self.ux
        ret[1] = sample[1]/sample[2]*self.fy+self.uy
        ret[2] = sample[2]
        return ret

    def getCameraProjection(self):
        """
        Get homogenous camera projection matrix
        :return: 4x4 camera projection matrix
        """
        ret = np.zeros((4, 4), np.float32)
        ret[0, 0] = self.fx
        ret[1, 1] = self.fy
        ret[2, 2] = 1.
        ret[0, 2] = self.ux
        ret[1, 2] = self.uy
        ret[3, 2] = 1.
        return ret

    def getCameraIntrinsics(self):
        """
        Get intrinsic camera matrix
        :return: 3x3 intrinsic camera matrix
        """
        ret = np.zeros((3, 3), np.float32)
        ret[0, 0] = self.fx
        ret[1, 1] = self.fy
        ret[2, 2] = 1.
        ret[0, 2] = self.ux
        ret[1, 2] = self.uy
        return ret

    def showAnnotatedDepth(self, frame):
        """
        Show the depth image
        :param frame: image to show
        :return:
        """
        raise NotImplementedError("Must be overloaded by base!")

    @staticmethod
    def depthToPCL(dpt, T, background_val=0.):

        # get valid points and transform
        pts = np.asarray(np.where(~np.isclose(dpt, background_val))).transpose()
        pts = np.concatenate([pts[:, [1, 0]] + 0.5, np.ones((pts.shape[0], 1), dtype='float32')], axis=1)
        pts = np.dot(np.linalg.inv(np.asarray(T)), pts.T).T
        pts = (pts[:, 0:2] / pts[:, 2][:, None]).reshape((pts.shape[0], 2))

        # replace the invalid data
        depth = dpt[(~np.isclose(dpt, background_val))]

        # get x and y data in a vectorized way
        row = (pts[:, 0] - 160.) / 241.42 * depth
        col = (pts[:, 1] - 120.) / 241.42 * depth

        # combine x,y,depth
        return np.column_stack((row, col, depth))

    def loadRefineNetLazy(self, net):
        if isinstance(net, basestring):
            if os.path.exists(net):
                from net.scalenet import ScaleNet, ScaleNetParams
                comrefNetParams = ScaleNetParams(type=5, nChan=1, wIn=128, hIn=128, batchSize=1, resizeFactor=2,
                                                 numJoints=1, nDims=3)
                self.refineNet = ScaleNet(np.random.RandomState(23455), cfgParams=comrefNetParams)
                self.refineNet.load(net)
            else:
                raise EnvironmentError("File not found: {}".format(net))
Exemplo n.º 4
0
    yend = ystart + dsize[1]
    test_data4 = test_data[:, :, ystart:yend, xstart:xend]

    print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min()
    print train_data.max(), test_data.max(), train_data.min(), test_data.min()

    imgSizeW = train_data.shape[3]
    imgSizeH = train_data.shape[2]
    nChannels = train_data.shape[1]

    #############################################################################
    print("create network")
    batchSize = 64
    poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize,
                                   resizeFactor=2, numJoints=1, nDims=3)
    poseNet = ScaleNet(rng, cfgParams=poseNetParams)

    poseNetTrainerParams = ScaleNetTrainerParams()
    poseNetTrainerParams.use_early_stopping = False
    poseNetTrainerParams.batch_size = batchSize
    poseNetTrainerParams.learning_rate = 0.0005
    poseNetTrainerParams.weightreg_factor = 0.0001
    poseNetTrainerParams.force_macrobatch_reload = True
    poseNetTrainerParams.para_augment = True
    poseNetTrainerParams.augment_fun_params = {'fun': 'augment_poses', 'args': {'normZeroOne': False,
                                                                                'di': di,
                                                                                'aug_modes': aug_modes,
                                                                                'hd': HandDetector(train_data[0, 0].copy(), abs(di.fx), abs(di.fy), importer=di)}}

    print("setup trainer")
    poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng, './eval/'+eval_prefix)
                                     nDims=3)
    poseNet = PoseRegNet(numpy.random.RandomState(23455),
                         cfgParams=poseNetParams)
    poseNet.load("./NYU_network_prior.pkl")
    # comrefNetParams = ScaleNetParams(type=1, nChan=1, wIn=128, hIn=128, batchSize=1, resizeFactor=2, numJoints=1, nDims=3)
    # comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=comrefNetParams)
    # comrefNet.load("./net_ICVL_COM.pkl")
    comrefNetParams = ScaleNetParams(type=1,
                                     nChan=1,
                                     wIn=128,
                                     hIn=128,
                                     batchSize=1,
                                     resizeFactor=2,
                                     numJoints=1,
                                     nDims=3)
    comrefNet = ScaleNet(numpy.random.RandomState(23455),
                         cfgParams=comrefNetParams)
    comrefNet.load("./net_NYU_COM.pkl")
    config = {'fx': 588., 'fy': 587., 'cube': (300, 300, 300)}
    # config = {'fx': 241.42, 'fy': 241.42, 'cube': (250, 250, 250)}
    # config = {'fx': 224.5, 'fy': 230.5, 'cube': (300, 300, 300)}  # Creative Gesture Camera
    # di = ICVLImporter("./capture/")
    # di.fx = 224.5
    # di.fy = 230.5
    # di.ux = 160.
    # di.uy = 120.
    rtp = RealtimeHandposePipeline(poseNet, config, di, comrefNet)

    # use filenames
    filenames = []
    for i in testSeqs[0].data:
        filenames.append(i.fileName)
class RealtimeHandposePipeline(object):
    """
    Realtime pipeline for handpose estimation
    """

    # states of pipeline
    STATE_IDLE = 0
    STATE_INIT = 1
    STATE_RUN = 2

    # different hands
    HAND_LEFT = 0
    HAND_RIGHT = 1

    # different detectors
    DETECTOR_COM = 0

    def __init__(self, poseNet, config, di, verbose=False, comrefNet=None):
        """
        Initialize data
        :param poseNet: network for pose estimation
        :param config: configuration
        :param di: depth importer
        :param verbose: print additional info
        :param comrefNet: refinement network from center of mass detection
        :return: None
        """

        # handpose CNN
        self.importer = di
        self.poseNet = poseNet
        self.comrefNet = comrefNet
        # configuration
        self.initialconfig = copy.deepcopy(config)
        # synchronization between processes
        self.sync = Manager().dict(config=config, fid=0,
                                   crop=numpy.ones((128, 128), dtype='float32'),
                                   com3D=numpy.asarray([0, 0, 300]),
                                   frame=numpy.ones((240, 320), dtype='float32'), M=numpy.eye(3))
        self.start_prod = Value(c_bool, False)
        self.start_con = Value(c_bool, False)
        self.stop = Value(c_bool, False)
        # for calculating FPS
        self.lastshow = time.time()
        self.runningavg_fps = deque(100*[0], 100)
        self.verbose = verbose
        # hand left/right
        self.hand = Value('i', self.HAND_LEFT)
        # initial state
        self.state = Value('i', self.STATE_IDLE)
        # detector
        self.detection = Value('i', self.DETECTOR_COM)
        # hand size estimation
        self.handsizes = []
        self.numinitframes = 50
        # hand tracking or detection
        self.tracking = Value(c_bool, False)
        self.lastcom = (0, 0, 0)
        # show different results
        self.show_pose = False
        self.show_crop = False

    def initNets(self):
        """
        Init network in current process
        :return: 
        """
        # Force network to compile output in the beginning
        if isinstance(self.poseNet, PoseRegNetParams):
            self.poseNet = PoseRegNet(numpy.random.RandomState(23455), cfgParams=self.poseNet)
            self.poseNet.computeOutput(numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32'))
        elif isinstance(self.poseNet, ResNetParams):
            self.poseNet = ResNet(numpy.random.RandomState(23455), cfgParams=self.poseNet)
            self.poseNet.computeOutput(numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32'))
        else:
            raise RuntimeError("Unknown pose estimation method!")

        if self.comrefNet is not None:
            if isinstance(self.comrefNet, ScaleNetParams):
                self.comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=self.comrefNet)
                self.comrefNet.computeOutput([numpy.zeros(sz, dtype='float32') for sz in self.comrefNet.cfgParams.inputDim])
            else:
                raise RuntimeError("Unknown refine method!")

    def threadProducer(self, device):
        """
        Thread that produces frames from video capture
        :param device: device
        :return: None
        """
        device.start()

        self.initNets()
        # Nets are compiled, ready to run
        self.start_prod.value = True

        fid = 0
        while True:
            if self.start_con.value is False:
                time.sleep(0.1)
                continue

            if self.stop.value is True:
                break
            # Capture frame-by-frame
            start = time.time()
            ret, frame = device.getDepth()
            if ret is False:
                print "Error while reading frame."
                time.sleep(0.1)
                continue
            if self.verbose is True:
                print("{}ms capturing".format((time.time() - start)*1000.))

            startd = time.time()
            crop, M, com3D = self.detect(frame.copy())
            if self.verbose is True:
                print("{}ms detection".format((time.time() - startd)*1000.))

            self.sync.update(fid=fid, crop=crop, com3D=com3D, frame=frame, M=M)
            fid += 1

        # we are done
        print "Exiting producer..."
        device.stop()
        return True

    def threadConsumer(self):
        """
        Thread that consumes the frames, estimate the pose and display
        :return: None
        """

        self.initNets()
        # Nets are compiled, ready to run
        self.start_con.value = True
        
        while True:
            if self.start_prod.value is False:
                time.sleep(0.1)
                continue

            if self.stop.value is True:
                break

            frm = copy.deepcopy(self.sync)

            startp = time.time()
            pose = self.estimatePose(frm['crop'], frm['com3D'])
            pose = pose * self.sync['config']['cube'][2]/2. + frm['com3D']
            if self.verbose is True:
                print("{}ms pose".format((time.time() - startp)*1000.))

            # Display the resulting frame
            starts = time.time()
            img, poseimg = self.show(frm['frame'], pose, frm['com3D'])
            img = self.addStatusBar(img)
            cv2.imshow('frame', img)
            self.lastshow = time.time()
            if self.show_pose:
                cv2.imshow('pose', poseimg)
            if self.show_crop:
                cv2.imshow('crop', numpy.clip((frm['crop'] + 1.)*128., 0, 255).astype('uint8'))
            self.processKey(cv2.waitKey(1) & 0xFF)
            if self.verbose is True:
                print("{}ms display".format((time.time() - starts)*1000.))

        cv2.destroyAllWindows()
        # we are done
        print "Exiting consumer..."
        return True

    def processVideoThreaded(self, device):
        """
        Use video as input
        :param device: device id
        :return: None
        """

        print "Create producer process..."
        p = Process(target=self.threadProducer, args=[device])
        p.daemon = True
        print"Create consumer process..."
        c = Process(target=self.threadConsumer, args=[])
        c.daemon = True
        p.start()
        c.start()

        c.join()
        p.join()

    def processVideo(self, device):
        """
        Use video as input
        :param device: device id
        :return: None
        """
        device.start()

        self.initNets()

        i = 0
        while True:
            i += 1
            if self.stop.value is True:
                break
            # Capture frame-by-frame
            start = time.time()
            ret, frame = device.getDepth()
            if ret is False:
                print "Error while reading frame."
                time.sleep(0.1)
                continue
            if self.verbose is True:
                print("{}ms capturing".format((time.time() - start)*1000.))

            startd = time.time()
            crop, M, com3D = self.detect(frame.copy())
            if self.verbose is True:
                print("{}ms detection".format((time.time() - startd)*1000.))

            startp = time.time()
            pose = self.estimatePose(crop, com3D)
            pose = pose*self.sync['config']['cube'][2]/2. + com3D
            if self.verbose is True:
                print("{}ms pose".format((time.time() - startp)*1000.))

            # Display the resulting frame
            starts = time.time()
            img, poseimg = self.show(frame, pose, com3D)

            img = self.addStatusBar(img)
            cv2.imshow('frame', img)
            self.lastshow = time.time()
            if self.show_pose:
                cv2.imshow('pose', poseimg)
            if self.show_crop:
                cv2.imshow('crop', numpy.clip((crop + 1.)*128., 0, 255).astype('uint8'))
            self.processKey(cv2.waitKey(1) & 0xFF)
            if self.verbose is True:
                print("{}ms display".format((time.time() - starts)*1000.))
                print("-> {}ms per frame".format((time.time() - start)*1000.))

        # When everything done, release the capture
        cv2.destroyAllWindows()
        device.stop()

    def detect(self, frame):
        """
        Detect the hand
        :param frame: image frame
        :return: cropped image, transformation, center
        """

        hd = HandDetector(frame, self.sync['config']['fx'], self.sync['config']['fy'], importer=self.importer, refineNet=self.comrefNet)
        doHS = (self.state.value == self.STATE_INIT)
        if self.tracking.value and not numpy.allclose(self.lastcom, 0):
            loc, handsz = hd.track(self.lastcom, self.sync['config']['cube'], doHandSize=doHS)
        else:
            loc, handsz = hd.detect(size=self.sync['config']['cube'], doHandSize=doHS)

        self.lastcom = loc

        if self.state.value == self.STATE_INIT:
            self.handsizes.append(handsz)
            if self.verbose is True:
                print numpy.median(numpy.asarray(self.handsizes), axis=0)
        else:
            self.handsizes = []

        if self.state.value == self.STATE_INIT and len(self.handsizes) >= self.numinitframes:
            cfg = self.sync['config']
            cfg['cube'] = tuple(numpy.median(numpy.asarray(self.handsizes), axis=0).astype('int'))
            self.sync.update(config=cfg)
            self.state.value = self.STATE_RUN
            self.handsizes = []

        if numpy.allclose(loc, 0):
            return numpy.zeros((self.poseNet.cfgParams.inputDim[2], self.poseNet.cfgParams.inputDim[3]), dtype='float32'), numpy.eye(3), loc
        else:
            crop, M, com = hd.cropArea3D(com=loc, size=self.sync['config']['cube'],
                                         dsize=(self.poseNet.layers[0].cfgParams.inputDim[2], self.poseNet.layers[0].cfgParams.inputDim[3]))
            com3D = self.importer.jointImgTo3D(com)
            sc = (self.sync['config']['cube'][2] / 2.)
            crop[crop == 0] = com3D[2] + sc
            crop.clip(com3D[2] - sc, com3D[2] + sc)
            crop -= com3D[2]
            crop /= sc
            return crop, M, com3D

    def estimatePose(self, crop, com3D):
        """
        Estimate the hand pose
        :param crop: cropped hand depth map
        :param com3D: com detection crop position
        :return: joint positions
        """

        # mirror hand if left/right changed
        if self.hand.value == self.HAND_LEFT:
            inp = crop[None, None, :, :].astype('float32')
        else:
            inp = crop[None, None, :, ::-1].astype('float32')

        jts = self.poseNet.computeOutput(inp)
        jj = jts[0].reshape((-1, 3))

        if 'invX' in self.sync['config']:
            if self.sync['config']['invX'] is True:
                # mirror coordinates
                jj[:, 1] *= (-1.)

        if 'invY' in self.sync['config']:
            if self.sync['config']['invY'] is True:
                # mirror coordinates
                jj[:, 0] *= (-1.)

        # mirror pose if left/right changed
        if self.hand.value == self.HAND_RIGHT:
            # mirror coordinates
            jj[:, 0] *= (-1.)
        return jj

    def show(self, frame, handpose, com3D):
        """
        Show depth with overlaid joints
        :param frame: depth frame
        :param handpose: joint positions
        :return: image
        """
        upsample = 1.
        if 'upsample' in self.sync['config']:
            upsample = self.sync['config']['upsample']

        # plot depth image with annotations
        imgcopy = frame.copy()
        # display hack to hide nd depth
        msk = numpy.logical_and(32001 > imgcopy, imgcopy > 0)
        msk2 = numpy.logical_or(imgcopy == 0, imgcopy == 32001)
        min = imgcopy[msk].min()
        max = imgcopy[msk].max()
        imgcopy = (imgcopy - min) / (max - min) * 255.
        imgcopy[msk2] = 255.
        imgcopy = imgcopy.astype('uint8')
        imgcopy = cv2.cvtColor(imgcopy, cv2.COLOR_GRAY2BGR)

        if not numpy.allclose(upsample, 1):
            imgcopy = cv2.resize(imgcopy, dsize=None, fx=upsample, fy=upsample, interpolation=cv2.INTER_LINEAR)

        if handpose.shape[0] == 16:
            hpe = ICVLHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3)))
        elif handpose.shape[0] == 14:
            hpe = NYUHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3)))
        elif handpose.shape[0] == 21:
            hpe = MSRAHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3)))
        else:
            raise ValueError("Invalid number of joints {}".format(handpose.shape[0]))

        jtI = self.importer.joints3DToImg(handpose)
        jtI[:, 0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2])
        jtI[:, 0:2] *= upsample
        jtI[:, 0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2])
        for i in xrange(handpose.shape[0]):
            cv2.circle(imgcopy, (jtI[i, 0], jtI[i, 1]), 3, (255, 0, 0), -1)

        for i in xrange(len(hpe.jointConnections)):
            cv2.line(imgcopy, (jtI[hpe.jointConnections[i][0], 0], jtI[hpe.jointConnections[i][0], 1]),
                     (jtI[hpe.jointConnections[i][1], 0], jtI[hpe.jointConnections[i][1], 1]),
                     255.*hpe.jointConnectionColors[i], 2)

        comI = self.importer.joint3DToImg(com3D)
        comI[0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2])
        comI[0:2] *= upsample
        comI[0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2])
        cv2.circle(imgcopy, (comI[0], comI[1]), 3, (0, 255, 0), 1)

        poseimg = numpy.zeros_like(imgcopy)
        # rotate 3D pose and project to 2D
        jtP = self.importer.joints3DToImg(rotatePoints3D(handpose, handpose[self.importer.crop_joint_idx], 0., 90., 0.))
        jtP[:, 0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2])
        jtP[:, 0:2] *= upsample
        jtP[:, 0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2])
        for i in xrange(handpose.shape[0]):
            cv2.circle(poseimg, (jtP[i, 0], jtP[i, 1]), 3, (255, 0, 0), -1)

        for i in xrange(len(hpe.jointConnections)):
            cv2.line(poseimg, (jtP[hpe.jointConnections[i][0], 0], jtP[hpe.jointConnections[i][0], 1]),
                     (jtP[hpe.jointConnections[i][1], 0], jtP[hpe.jointConnections[i][1], 1]),
                     255.*hpe.jointConnectionColors[i], 2)

        comP = self.importer.joint3DToImg(rotatePoint3D(com3D, handpose[self.importer.crop_joint_idx], 0., 90., 0.))
        comP[0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2])
        comP[0:2] *= upsample
        comP[0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2])
        cv2.circle(poseimg, (comP[0], comP[1]), 3, (0, 255, 0), 1)

        return imgcopy, poseimg

    def addStatusBar(self, img):
        """
        Add status bar to image
        :param img: image
        :return: image with status bar
        """
        barsz = 20
        retimg = numpy.ones((img.shape[0]+barsz, img.shape[1], img.shape[2]), dtype='uint8')*255

        retimg[barsz:img.shape[0]+barsz, 0:img.shape[1], :] = img

        # FPS text
        fps = 1./(time.time()-self.lastshow)
        self.runningavg_fps.append(fps)
        avg_fps = numpy.mean(self.runningavg_fps)
        cv2.putText(retimg, "FPS {0:2.1f}".format(avg_fps), (20, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0))

        # hand text
        cv2.putText(retimg, "Left" if self.hand.value == self.HAND_LEFT else "Right", (80, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0))

        # hand size
        ss = "HC-{0:d}".format(self.sync['config']['cube'][0])
        cv2.putText(retimg, ss, (120, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0))

        # hand tracking mode, tracking or detection
        cv2.putText(retimg, "T" if self.tracking.value else "D", (260, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0))

        # hand detection mode, COM or CNN
        if self.detection.value == self.DETECTOR_COM:
            mode = "COM"
        else:
            mode = "???"
        cv2.putText(retimg, mode, (280, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0))

        # status symbol
        if self.state.value == self.STATE_IDLE:
            col = (0, 0, 255)
        elif self.state.value == self.STATE_INIT:
            col = (0, 255, 255)
        elif self.state.value == self.STATE_RUN:
            col = (0, 255, 0)
        else:
            col = (0, 0, 255)
        cv2.circle(retimg, (5, 5), 5, col, -1)
        return retimg

    def processKey(self, key):
        """
        Process key
        :param key: key value
        :return: None
        """

        if key == ord('q'):
            self.stop.value = True
        elif key == ord('h'):
            if self.hand.value == self.HAND_LEFT:
                self.hand.value = self.HAND_RIGHT
            else:
                self.hand.value = self.HAND_LEFT
        elif key == ord('+'):
            cfg = self.sync['config']
            cfg['cube'] = tuple([lst + 10 for lst in list(cfg['cube'])])
            self.sync.update(config=cfg)
        elif key == ord('-'):
            cfg = self.sync['config']
            cfg['cube'] = tuple([lst - 10 for lst in list(cfg['cube'])])
            self.sync.update(config=cfg)
        elif key == ord('r'):
            self.reset()
        elif key == ord('i'):
            self.state.value = self.STATE_INIT
        elif key == ord('t'):
            self.tracking.value = not self.tracking.value
        elif key == ord('s'):
            self.show_crop = not self.show_crop
            self.show_pose = not self.show_pose
        else:
            pass

    def reset(self):
        """
        Reset stateful parts
        :return: None
        """
        self.state.value = self.STATE_IDLE
        self.sync.update(config=copy.deepcopy(self.initialconfig))
        self.detection.value = self.DETECTOR_COM
Exemplo n.º 7
0
xstart = int(train_data.shape[2] / 2 - dsize[0] / 2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3] / 2 - dsize[1] / 2)
yend = ystart + dsize[1]
train_data4 = train_data[:, :, ystart:yend, xstart:xend]

comrefNetParams = ScaleNetParams(type=1,
                                 nChan=1,
                                 wIn=96,
                                 hIn=96,
                                 batchSize=1,
                                 resizeFactor=2,
                                 numJoints=1,
                                 nDims=3)
comrefNetParams.loadFile = "../../ptm/net_MSRA15_COM_AUGMENT.pkl"
poseNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=comrefNetParams)
train_data = numpy.ndarray.astype(train_data, dtype='float64')
train_data2 = numpy.ndarray.astype(train_data2, dtype='float64')
train_data4 = numpy.ndarray.astype(train_data4, dtype='float64')
#Seq_all list of sequence data
gt3D = []
for i in xrange(len(Seq_all)):
    gt3D_temp = [
        j.gt3Dorig[di.crop_joint_idx].reshape(1, 3) for j in Seq_all[i].data
    ]
    gt3D.extend(gt3D_temp)
jts = poseNet.computeOutput([train_data, train_data2, train_data4])
joints = []
for i in xrange(train_data.shape[0]):
    joints.append(jts[i].reshape(1, 3) * (Seq_all[0].config['cube'][2] / 2.) +
                  Seq_all[0].data[i].com)