def main(input_img, model, e): ''' Query the model given an image ''' if(input_img): image = stringToImage(input_img[input_img.find(",")+1:]) image = toRGB(image) if(model == None): model = 'mobilenet_thin' humans = e.inference(image) coords = [] for human in humans: coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()]) outdata = { 'humans': coords } return outdata else: # Test with a sample image image = common.read_imgfile('./images/p1.jpg', None, None) e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) humans = e.inference(image) coords = [] for human in humans: coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()]) outdata = { 'humans': coords } return outdata
def PoseEstimatorPredict(image_path,plot = False,resolution ='432x368', scales = '[None]',model = 'mobilenet_thin'): ''' input: image_path,图片路径,jpg plot = False,是否画图,如果True,两样内容,关键点信息+标点图片matrix resolution ='432x368', 规格 scales = '[None]', model = 'mobilenet_thin',模型选择 output: plot为false,返回一个内容:关键点信息 plot为true,返回两个内容:关键点信息+标点图片matrix ''' w, h = model_wh(resolution) e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) image = common.read_imgfile(image_path, None, None) t = time.time() humans = e.inference(image, scales=scales) # 主要的预测函数 elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (image_path, elapsed)) centers = get_keypoint(image,humans) # 拿上关键点信息 if plot: # 画图的情况下: image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # 画图函数 fig = plt.figure() a = fig.add_subplot(2, 2, 1) a.set_title('Result') plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) return centers,image else: # 不画图的情况下: return centers
def __init__(self, input_wicth=800, input_height=640): import sys sys.path.append('./src/detector/tf-pose-estimation/src') self.model = 'cmu' import common self.enum_coco_parts = common.CocoPart self.enum_coco_colors = common.CocoColors self.enum_coco_pairs_render = common.CocoPairsRender from estimator import TfPoseEstimator from networks import get_graph_path self.image_h, self.image_w = input_height, input_wicth if self.image_w % 16 != 0 or self.image_h % 16 != 0: raise Exception( 'Width and height should be multiples of 16. w=%d, h=%d' % (self.image_w, self.image_h)) print('Warming up detector ConvPoseMachine....') import time s = time.time() self.estimator = TfPoseEstimator(get_graph_path(self.model), target_size=(self.image_w, self.image_h)) e = time.time() print('ConvPoseMachine Warmed, Time: {}'.format(e - s))
def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' #model = '432x368' camera = 0 self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] resolution = '432x368' w, h = model_wh(resolution) self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() #print('ret_val', ret_val) self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') keypoints = self.mesh(image) print('keypoints', keypoints) self.points = gl.GLScatterPlotItem(pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) #print(keypoints) self.window.addItem(self.points) for n, pts in enumerate(self.connection): self.lines[n] = gl.GLLinePlotItem(pos=np.array( [keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) self.window.addItem(self.lines[n])
def init_multi(): model_path = get_graph_path(model_name='mobilenet_thin') #'mobilenet_thin/mobilenet_v2_large/mobilenet_v2_small/cmu' model_1 = TfPoseEstimator(model_path, target_size=(432, 368), device='0') model_2 = TfPoseEstimator(model_path, target_size=(656, 368), device='0') f=open(engine_file_path, "rb") runtime=trt.Runtime(TRT_LOGGER) engine= runtime.deserialize_cuda_engine(f.read()) context=engine.create_execution_context() return model_1, model_2,engine,context
def index(): try: data = request.data with open('/tmp/temp.jpg', 'wb') as f: f.write(data) img = common.read_imgfile('/tmp/temp.jpg', 432, 368) scales = ast.literal_eval(args.scales) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) humans = e.inference(img, scales=scales) return jsonify({"humans": list(map(lambda x: x.to_dict(), humans))}) except Exception as e: return jsonify({"error": str(traceback.format_exc())})
def __init__(self, zoom=1.0, resolution='656x368', model='cmu', show_process=False): self.zoom = zoom self.resolution = resolution self.model = model self.show_process = show_process logger.debug('initialization %s : %s' % (model, get_graph_path(model))) self.w, self.h = model_wh(resolution) self.e = TfPoseEstimator(get_graph_path(model), target_size=(self.w, self.h))
def __init__(self): """ Initialize the graphics window and mesh surface """ self.bitFalling = 0 # Initialize plot. plt.ion() f2 = plt.figure(figsize=(6, 5)) self.windowNeck = f2.add_subplot(1, 1, 1) self.windowNeck.set_title('Speed') self.windowNeck.set_xlabel('Time') self.windowNeck.set_ylabel('Speed') # plt.show() self.times = [] self.recordVelocity = [] self.recordNeck = [] self.recordYTopRectangle = [] self.recordHIP = [] self.recordTimeList = [] self.globalTime = 0 self.highestNeck = 0 # self.highestNeckTime = 0 self.highestHIP = 0 self.saveTimesStartFalling = -1 self.surpriseMovingTime = -1 self.detectedHIP_Y = 0 self.detectedNECK_Y = 0 self.extraDistance = 0 self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1, varThreshold=500, detectShadows=False) self.secondNeck = 0 self.human_in_frame = False self.lastTimesFoundNeck = -1 self.width = 300 self.height = 200 self.quotaVirtureNeck = 3 self.used_quotaVirtureNeck = 0 self.quoutaFalling = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) camera = 0 # 1 mean external camera , 0 mean internal camera self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) # self.cam = cv2.VideoCapture('C:/outpy2.avi') self.cam.set(cv2.CAP_PROP_AUTOFOCUS, 0) # turn the autofocus off self.recordAcceleration = []
def main(): parser = argparse.ArgumentParser( description='tf-pose-estimation run by folder') parser.add_argument('--folder', type=str, default='./images/') parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) types = ('*.png', '*.jpg') files_grabbed = [] for files in types: files_grabbed.extend(glob.glob(os.path.join(args.folder, files))) all_humans = dict() if not os.path.exists('output'): os.mkdir('output') for i, file in enumerate(files_grabbed): # estimate human poses from a single image ! image = common.read_imgfile(file, None, None) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image #%d: %s in %.4f seconds.' % (i, file, elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) filename = 'pose_{}.png'.format(i) output_filepath = os.path.join('output', filename) cv2.imwrite(output_filepath, image) logger.info('image saved: {}'.format(output_filepath)) # cv2.waitKey(5000) all_humans[file.replace(args.folder, '')] = humans with open(os.path.join(args.folder, 'pose.dil'), 'wb') as f: dill.dump(all_humans, f, protocol=dill.HIGHEST_PROTOCOL)
def joint(): try: data = request.data with open('/tmp/temp.jpg', 'wb') as f: f.write(data) img = common.read_imgfile('/tmp/temp.jpg', 432, 368) scales = ast.literal_eval(args.scales) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) humans = e.inference(img, scales=scales) image = TfPoseEstimator.draw_humans(img, humans, imgcopy=False) b_str = base64.b64encode(img2bytes( Image.fromarray(image))).decode('utf-8') return jsonify({"image": b_str}) except Exception as e: return jsonify({"error": str(traceback.format_exc())})
def cb_pose(data): # get image with pose time t = data.header.stamp image = vf.get_latest(t, remove_older=True) if image is None: rospy.logwarn('No received images.') return h, w = image.shape[:2] if resize_ratio > 0: image = cv2.resize(image, (int(resize_ratio*w), int(resize_ratio*h)), interpolation=cv2.INTER_LINEAR) # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) # draw image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) pub_img.publish(cv_bridge.cv2_to_imgmsg(image, 'bgr8'))
def liveData(self): directory_in_str = sys.path[0] + r"/../images/LiveTest/" try: os.remove(outputfile) os.remove(cleanedOutputfile) except OSError: pass for file in os.listdir(directory_in_str): filename = os.fsdecode(file) if filename.endswith(".jpg") or filename.endswith(".png"): fullpath = directory_in_str + filename # estimate human poses from a single image ! image = common.read_imgfile(fullpath, None, None) humans = self.e.inference(image, scales=self.scales) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) # cv2.waitKey() myFile = open(outputfile, 'a') # myFile.write(str(filename) + ',') # print(filename) myFile.write('\n') # break myFile.close()
def Estimate_3Ddata(image, e, scales): # t0 = time.time() # # estimate human poses from a single image ! # t = time.time() humans = e.inference(image, scales=scales) #elapsed = time.time() - t image = TfPoseEstimator.draw_humans(image, humans) #logger.info('inference image:%.4f seconds.' % (elapsed)) logger.info('3d lifting initialization.') poseLifting = Prob3dPose('lifting/models/prob_model_params.mat') standard_w = 320 standard_h = 240 pose_2d_mpiis = [] visibilities = [] for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) if (pose_2d_mpiis.ndim != 3): return 0 transformed_pose2d, weights = poseLifting.transform_joints( pose_2d_mpiis, visibilities) pose_3d = poseLifting.compute_3d(transformed_pose2d, weights) #alltime= time.time() - t0 #logger.info('estimate all time:%.4f seconds.' % (alltime)) return pose_3d, image
def cnvt(img) : os.chdir(read_path) image = cv2.imread(img) humans = e.inference(image) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) os.chdir(save_path) cv2.imwrite(img, image) os.chdir(old_path)
def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' camera = 0 w, h = model_wh("432x368") self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(0) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose('lifting/models/prob_model_params.mat') keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem( pos=keypoints, color=pg.glColor((255, 255, 0)), size=15 ) # self.lines = gl.GLLinePlotItem( # pos=lines, # color=pg.glColor((255, 255, 255)), # width=2 # ) self.window.addItem(self.points)
def __init__(self): """ Initialize the graphics window and mesh surface add some grids in the 3 point... that are just filling up the background """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) # we translate all of them gz.translate(0, 0, -10) # so they're pushed out to the background self.window.addItem(gx) self.window.addItem(gy) # add them all to the window self.window.addItem(gz) model = "mobilenet_thin_432x368" camera = 0 w, h = model_wh(model) # we are gonna create e objects but instead we're gonna call it self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # we're basically just going the same thing(run.py line:37) instead of args.model # we just created our own object for model self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') # we'll have this object to do our 3d pose translater? yukardaki keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem( # plot dots pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) self.window.addItem(self.points)
def __init__(self): """ Initialize the graphics window and mesh surface """ # Initialize plot. plt.ion() f = plt.figure(figsize=(5, 5)) f2 = plt.figure(figsize=(6, 5)) self.window3DBody = f.gca(projection='3d') self.window3DBody.set_title('3D_Body') self.windowStable = f2.add_subplot(1, 1, 1) self.windowStable.set_title('Stable') self.windowStable.set_xlabel('Time') self.windowStable.set_ylabel('Distant') self.windowStable.set_ylim([0, 1500]) #plt.show() self.times = [0] self.stable = [0] self.recordHead = [] self.fps_time = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) #model = 'cmu' #w,h = 656,368 camera = 1 #1 mean external camera , 0 mean internal camera self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read(cv2.IMREAD_COLOR) self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') self.statusBodyWindow = 0 try: keypoints = self.mesh(image) except: pass
def cnvt(img, name) : os.chdir(read_path) image = cv2.imread(img) os.chdir(old_path) model = 'mobilenet_thin' resolution = '432x368' w, h = model_wh(resolution) e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) humans = e.inference(image) blank_image = np.zeros((h,w,3), np.uint8) image = TfPoseEstimator.draw_humans(blank_image, humans, imgcopy=False) os.chdir(save_path) cv2.imwrite(name, image) print("Saved - %s As - %s" % (img, name)) os.chdir(old_path)
class ConvPoseMachine(Detector): def __init__(self, input_wicth=800, input_height=640): import sys sys.path.append('./src/detector/tf-pose-estimation/src') self.model = 'cmu' import common self.enum_coco_parts = common.CocoPart self.enum_coco_colors = common.CocoColors self.enum_coco_pairs_render = common.CocoPairsRender from estimator import TfPoseEstimator from networks import get_graph_path self.image_h, self.image_w = input_height, input_wicth if self.image_w % 16 != 0 or self.image_h % 16 != 0: raise Exception( 'Width and height should be multiples of 16. w=%d, h=%d' % (self.image_w, self.image_h)) print('Warming up detector ConvPoseMachine....') import time s = time.time() self.estimator = TfPoseEstimator(get_graph_path(self.model), target_size=(self.image_w, self.image_h)) e = time.time() print('ConvPoseMachine Warmed, Time: {}'.format(e - s)) def predict(self, imgcv): # Build model based on input image size img_h, img_w, _ = imgcv.shape humans = self.estimator.inference(imgcv) formatted_dets = [] for human in humans: key_point = {} # draw point for i in range(self.enum_coco_parts.Background.value): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] x = int( (body_part.x * self.image_w + 0.5) * img_w / self.image_w) y = int( (body_part.y * self.image_h + 0.5) * img_h / self.image_h) center = (x, y) key_point[i] = center detection = get_default_detection() detection['person_keypoint'] = key_point formatted_dets.append(detection) return formatted_dets
class tfOpenpose: def __init__(self, zoom=1.0, resolution='656x368', model='cmu', show_process=False): self.zoom = zoom self.resolution = resolution self.model = model self.show_process = show_process logger.debug('initialization %s : %s' % (model, get_graph_path(model))) self.w, self.h = model_wh(resolution) self.e = TfPoseEstimator(get_graph_path(model), target_size=(self.w, self.h)) def detect(self, image): logger.debug('image preprocess+') if self.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=self.zoom, fy=self.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled image = canvas elif self.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=self.zoom, fy=self.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]] logger.debug('image process+') humans = self.e.inference(image) logger.debug('postprocess+') image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) fps_time = 0 logger.debug('show+') cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() logger.debug('finished+')
def __init__(self): """ Initialize the graphics window and mesh surface """ self.bitFalling = 0 # Initialize plot. self.times = [] self.recordVelocity = [] self.recordNeck = [] self.recordYTopRectangle = [] self.recordHIP = [] self.recordTimeList = [] self.globalTime = 0 self.highestNeck = 0 # self.hightestNeckTime = 0 self.highestHIP = 0 self.saveTimesStartFalling = -1 self.quoutaFalling = 0 self.surpriseMovingTime = -1 self.detectedHIP_Y = 0 self.detectedNECK_Y = 0 self.extraDistance = 0 self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1, varThreshold=500, detectShadows=False) self.secondNeck = 0 self.human_in_frame = False self.lastTimesFoundNeck = -1 self.width = 300 self.height = 200 self.quotaVirtureNeck = 3 self.used_quotaVirtureNeck = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.recordAcceleration = []
def __init__(self): self.parser = argparse.ArgumentParser( description='tf-pose-estimation run') self.parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') self.parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') self.parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') self.args = self.parser.parse_args() self.scales = ast.literal_eval(self.args.scales) self.w, self.h = model_wh(self.args.resolution) self.e = TfPoseEstimator(get_graph_path(self.args.model), target_size=(self.w, self.h))
def generate_skeletonize_video(): """ The method takes images , save a skeleton per image and creates a video output :return: """ input_folder = "./videos/demo/" output_folder = "./videos" results_folder = "./images/demo/" input_video = "./videos/demo.mp4" images = video_utils.load_images_from_folder(input_folder) w = 432 h = 368 estimator = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) count = 1 for i in images: image_parts = estimator.inference(i, scales=None) image_skeleton = TfPoseEstimator.draw_humans(i, image_parts, imgcopy=True) cv2.imwrite(r".\images\demo\{}.png".format(count), image_skeleton) count = count + 1 video_utils.create_video(input_video, results_folder, output_folder)
class Detector: def __init__(self, show=True): model = 'mobilenet_thin' w, h = model_wh('432x368') self.estimator = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.show = show def detectCandidates(self, frame): cands = [] humans = self.estimator.inference(frame) image_h, image_w = frame.shape[:2] feat_list = [] for i in range(len(humans)): if i >= len(humans): break keys = humans[i].body_parts.keys() if len(np.setdiff1d(needed_elements, keys)): del humans[i] continue neck = humans[i].body_parts[1] lhip = humans[i].body_parts[8] rhip = humans[i].body_parts[11] center = (neck.x + lhip.x + rhip.x) / 3, (neck.y + lhip.y + rhip.y) / 3 feats = [] for idx in needed_elements: part = humans[i].body_parts[idx] feats = feats + [part.x - center[0], part.y - center[1]] feat_list.append(np.asarray(feats)) center = image_w * center[0], image_h * center[1] cv2.circle(frame, (int(center[0]), int(center[1])), 3, (255, 0, 0), 3) cands.append(np.asarray(center, dtype=np.float32)) # print feat_list[0] if (self.show): frame = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) return cands, feat_list, frame
def __init__(self, calibration_file): """ This method init an OpenPose model """ args = type('', (), {}) # args.resolution = '1312x736' args.resolution = '432x368' args.model = 'mobilenet_thin' args.scales = '[None]' scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) self.model = {'e': e, 'scales': scales} # start new matlab session self.matlab = matlab.engine.start_matlab() # or... # in matlab run: matlab.engine.shareEngine # and uncomment the following line # self.matlab = matlab.engine.connect_matlab() self.calibration = calibration_file
def detect(self, image): logger.debug('image preprocess+') if self.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=self.zoom, fy=self.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled image = canvas elif self.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=self.zoom, fy=self.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]] logger.debug('image process+') humans = self.e.inference(image) logger.debug('postprocess+') image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) fps_time = 0 logger.debug('show+') cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() logger.debug('finished+')
def detectCandidates(self, frame): cands = [] humans = self.estimator.inference(frame) image_h, image_w = frame.shape[:2] frame = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) feat_list = [] for i in range(len(humans)): if i >= len(humans): break keys = humans[i].body_parts.keys() if len(np.setdiff1d(needed_elements, keys)): del humans[i] continue neck = humans[i].body_parts[1] lhip = humans[i].body_parts[8] rhip = humans[i].body_parts[11] center = (neck.x + lhip.x + rhip.x) / 3, (neck.y + lhip.y + rhip.y) / 3 feats = [] for idx in needed_elements: part = humans[i].body_parts[idx] feats = feats + [part.x - center[0], part.y - center[1]] feat_list.append(np.asarray(feats)) center = image_w * center[0], image_h * center[1] cv2.circle(frame, (int(center[0]), int(center[1])), 3, (255, 0, 0), 3) cands.append(np.asarray(center, dtype=np.float32)) # print feat_list[0] return cands, feat_list, frame
rospy.loginfo('initialization+') rospy.init_node('TfPoseEstimatorROS', anonymous=True) # parameters image_topic = rospy.get_param('~camera', '') model = rospy.get_param('~model', 'cmu_640x480') if not image_topic: rospy.logerr('Parameter \'camera\' is not provided.') sys.exit(-1) try: w, h = model_wh(model) graph_path = get_graph_path(model) rospack = rospkg.RosPack() graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path) except Exception as e: rospy.logerr('invalid model: %s, e=%s' % (model, e)) sys.exit(-1) pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h)) cv_bridge = CvBridge() rospy.Subscriber(image_topic, Image, callback_image, queue_size=1) pub_pose = rospy.Publisher('~pose', Persons, queue_size=1) rospy.loginfo('start+') rospy.spin() rospy.loginfo('finished')
def post(self): global finished_post if ((self.request.headers['Content-Type'] == 'imagebin') and (finished_post == 1)): print(finished_post) finished_post = 0 print('Received image') image = self.request.body fh = open('static/image1.jpg', 'wb') fh.write(image) fh.close() #fh = open('static/image1.jpg','ab') #fh.write(bytes([0xD9])) #fh.close() print('0') #image = cv2.imread('static/image1.jpg') print('1') print('2') parser = argparse.ArgumentParser( description='tf-pose-estimation run') parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile('static/image1.jpg', None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: image3.jpg in %.4f seconds.' % (elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) #cv2.putText(image, # f"Fallen: False", # (60, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, # (0, 255, 0), 5) cv2.imwrite('static/image3.jpg', image) for client in clients: update_clients(client) print(finished_post) finished_post = 1
def main(): parser = argparse.ArgumentParser(description='tf-pose-estimation run') parser.add_argument('--image', type=str, default='./images/p3.jpg') parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument( '--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') parser.add_argument( '--stick_only', action='store_true', help='save output without other analysis') parser.add_argument('--plot_3d', action='store_true', help='save 3d poses') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) # cv2.waitKey() import matplotlib.pyplot as plt fig = plt.figure() a = fig.add_subplot(2, 2, 1) a.set_title('Result') rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if args.stick_only: cv2.imwrite('output/test.png', image) logger.info('image saved: {}'.format('output/test.png')) import sys sys.exit(0) plt.imshow(rgb_image) bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB) bgimg = cv2.resize( bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]), interpolation=cv2.INTER_AREA) # show network output a = fig.add_subplot(2, 2, 2) plt.imshow(bgimg, alpha=0.5) tmp = np.amax(e.heatMat[:, :, :-1], axis=2) plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() tmp2 = e.pafMat.transpose((2, 0, 1)) tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0) tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0) a = fig.add_subplot(2, 2, 3) a.set_title('Vectormap-x') # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5) plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() a = fig.add_subplot(2, 2, 4) a.set_title('Vectormap-y') # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5) plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() if not os.path.exists('output'): os.mkdir('output') plt.savefig('output/test.png') logger.info('image saved: {}'.format('output/test.png')) if not args.plot_3d: import sys sys.exit(0) #For plotting in 3d logger.info('3d lifting initialization.') poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat') image_h, image_w = image.shape[:2] standard_w = 640 standard_h = 480 pose_2d_mpiis = [] visibilities = [] for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) transformed_pose2d, weights = poseLifting.transform_joints( pose_2d_mpiis, visibilities) pose_3d = poseLifting.compute_3d(transformed_pose2d, weights) for i, single_3d in enumerate(pose_3d): plot_pose(single_3d) plt.draw() plt.savefig('output/pose_3d_test.png') logger.info('3d plot saved: {}'.format('output/pose_3d_test.png'))
ch.setFormatter(formatter) logger.addHandler(ch) if __name__ == '__main__': parser = argparse.ArgumentParser(description='tf-pose-estimation run') # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg') parser.add_argument('--image', type=str, default='./images/apink2.jpg') # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240') parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(scales) w, h = model_wh(args.model) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=[None]) # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)]) # humans = e.inference(image, scales=[(1.2, 0.05)]) # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)]) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = cv2.imread(args.image, cv2.IMREAD_COLOR) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)