def __init__(self, gpu = 1): # Load snapshot self.gpu = gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu) self.snapshot_path = os.path.join(WORK_DIR, 'model/hopenet_robust_alpha1.pkl') self.model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(self.snapshot_path) # gpu #saved_state_dict = torch.load(self.snapshot_path, map_location=lambda storage, loc: storage) # cpu self.model.load_state_dict(saved_state_dict) batchsize = 5 x = Variable(torch.randn(batchsize, 3, 224, 224)) orch_out = torch.onnx.export_to_pretty_string(self.model, x, "pnas.onnx", export_params=True) self.model.cuda(self.gpu) self.model.eval() self.transformations = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.idx_tensor = [idx for idx in range(66)] self.idx_tensor = torch.FloatTensor(self.idx_tensor).cuda(self.gpu)
def load_model(): model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) model.load_state_dict(torch.load(r'hopenet_robust_alpha1.pkl')) model.cuda(0) model.eval() return model
def __init__(self): self.model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load('../model/hopenet_robust_alpha1.pkl', map_location="cpu") self.model.load_state_dict(saved_state_dict) self.model.eval() self.transformations = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.idx_tensor = torch.FloatTensor([idx for idx in range(66)])
def __init__(self): # args = parse_args() cudnn.enabled = True batch_size = 1 self.gpu = 0 snapshot_path = '/home/xiangmingcan/notespace/deep-head-pose/hopenet_robust_alpha1.pkl' input_path = '/home/xiangmingcan/notespace/cvpr_data/celeba/' output = 'output/celeba.txt' face_model = '/home/xiangmingcan/notespace/deep-head-pose/mmod_human_face_detector.dat' out_dir = os.path.split(output)[0] name = os.path.split(output)[1] write_path = join(out_dir, "images_" + name[:-4]) if not os.path.exists(write_path): os.makedirs(write_path) if not os.path.exists(input_path): sys.exit('Folder does not exist') # ResNet50 structure self.model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) # Dlib face detection model self.cnn_face_detector = dlib.cnn_face_detection_model_v1(face_model) print 'Loading snapshot.' # Load snapshot saved_state_dict = torch.load(snapshot_path) self.model.load_state_dict(saved_state_dict) print 'Loading data.' self.transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.model.cuda(self.gpu) print 'Ready to test network.' # Test the Model self.model.eval( ) # Change model to 'eval' mode (BN uses moving mean/var). total = 0 self.idx_tensor = [idx for idx in range(66)] self.idx_tensor = torch.FloatTensor(self.idx_tensor).cuda(self.gpu)
def __init__(self, snapshot_path, gpu=0): self.model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(snapshot_path) self.model.load_state_dict(saved_state_dict) self.transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.gpu = gpu self.model.cuda(gpu) self.model.eval() self.idx_tensor = [idx for idx in range(66)] self.idx_tensor = torch.FloatTensor(self.idx_tensor).cuda(gpu)
def __init__(self): # 加载网路模型 self.snapshot_path = os.path.join(WORK_DIR, 'models/hopenet_robust_alpha1.pkl') self.model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) # Load snapshot saved_state_dict = torch.load(self.snapshot_path) #saved_state_dict = torch.load(self.snapshot_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(saved_state_dict) # 设置模型使用gpu or cpu self.gpu = 1 self.model.cuda(self.gpu) #self.model.cpu() self.model.eval() # 图像预处理,送入模型之前的处理 self.transformations = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.idx_tensor = [idx for idx in range(66)] self.idx_tensor = torch.FloatTensor(self.idx_tensor).cuda(self.gpu)
def module_init(args): # ResNet50 structure model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) # Load snapshot gpu = args.gpu_id pretrained_path = args.pretrained saved_state_dict = torch.load(pretrained_path) model.load_state_dict(saved_state_dict) model.cuda(gpu) model.eval() mtcnn = MTCNN() transformations = transforms.Compose([transforms.Resize(224), \ transforms.CenterCrop(224), transforms.ToTensor(), \ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) softmax = nn.Softmax(dim=1).cuda(gpu) idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu) return model, mtcnn, transformations, softmax, idx_tensor
type=str) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() cudnn.enabled = True gpu = args.gpu_id snapshot_path = args.snapshot # ResNet50 structure model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) print('Loading snapshot.') # Load snapshot saved_state_dict = torch.load(snapshot_path) model.load_state_dict(saved_state_dict) print('Loading data.') transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # rgb模式
return y / y.sum(axis=axis, keepdims=True) if __name__ == '__main__': args = parse_args() batch_size = 1 gpu = args.gpu_id ctx = mx.gpu(gpu) # snapshot_path = args.snapshot # video_path = args.video_path snapshot_path = "./data/2018-12-08-16-29-08-458567" face_model_path = "D:\\CV\\dlib-models-master\\mmod_human_face_detector.dat" # ResNet50 structure model = hopenet.Hopenet(model_zoo.vision.BottleneckV1, [3, 4, 6, 3], 66) # Dlib face detection model cnn_face_detector = dlib.cnn_face_detection_model_v1(face_model_path) print('Loading snapshot.') # Load snapshot model = gluon.nn.SymbolBlock.imports(os.path.join(snapshot_path, "hopenet-4-symbol.json"), ['data'], os.path.join(snapshot_path, "hopenet-4-0000.params"), ctx=ctx) print('Loading data.') transformations = transforms.Compose([
if __name__ == '__main__': args = parse_args() cudnn.enabled = True num_epochs = args.num_epochs batch_size = args.batch_size gpu = args.gpu_id if not os.path.exists('output/snapshots'): os.makedirs('output/snapshots') if not os.path.exists('output/best_snapshot'): os.makedirs('output/best_snapshot') # ResNet50 structure if args.model == 'resnet50': model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) elif args.model == 'resnet18': model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66) elif args.model == 'resnet152': model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 8, 36, 3], 66) if args.snapshot == '': if args.model == 'resnet50': load_filtered_state_dict( model, model_zoo.load_url( 'https://download.pytorch.org/models/resnet50-19c8e357.pth' )) elif args.model == 'resnet18':
args = parse_args() if not os.path.exists('../log'): os.makedirs('../log') log = open('../log/hopenet.txt', 'a') cudnn.enabled = True num_epochs = args.num_epochs batch_size = args.batch_size gpu = args.gpu_id if not os.path.exists('../models'): os.makedirs('../models') # ResNet50 structure model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 68) # 66 姿态角度划分的等级数量,即类别数 if args.snapshot == '': load_filtered_state_dict( model, model_zoo.load_url( 'https://download.pytorch.org/models/resnet50-19c8e357.pth')) else: saved_state_dict = torch.load(args.snapshot) model.load_state_dict(saved_state_dict) print 'Loading data.' transformations = transforms.Compose([ transforms.Scale(240), transforms.RandomCrop(224),
def test_on_video_dlib_new_init(trigger): global frame ,yawx ,pitchx ,rollx args = parse_args() GESTURE = set(["yaw","pitch","row"]) PRT_GES = {obj:0 for obj in GESTURE} cudnn.enabled = True batch_size = 1 gpu = args["gpu_id"] snapshot_path = args["snapshot"] out_dir = 'output/video' if not os.path.exists(out_dir): os.makedirs(out_dir) if not args.get("video_path", False): print("[INFO] starting video stream...") video = VideoStream(src=0).start() time.sleep(1.0) # otherwise, grab a reference to the video file else: print("[INFO] opening video file...") video = cv2.VideoCapture(args["video_path"]) # ResNet50 structure model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) # Dlib face detection model cnn_face_detector = dlib.cnn_face_detection_model_v1(args["face_model"]) print('Loading snapshot.') # Load snapshot saved_state_dict = torch.load(snapshot_path) model.load_state_dict(saved_state_dict) print('Loading data.') transformations = transforms.Compose([transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) model.cuda(gpu) print('Ready to test network.') # Test the Model model.eval() # Change model to 'eval' mode (BN uses moving mean/var). total = 0 idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu) # video = cv2.VideoCapture(video_path) # New cv2 # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float # # Old cv2 # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float # height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float # # # Define the codec and create VideoWriter object # fourcc = cv2.cv.CV_FOURCC(*'MJPG') # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) txt_out = open('output/video/output-%s.txt' % args["output_string"], 'w') fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = None frame_num = 1 while True: time.sleep(0.02) frame = video.read() frame = frame[1] if args.get("video_path", False) else frame (height, width) = frame.shape[:2] # print(height,width) # Define the codec and create VideoWriter object if out is None: out = cv2.VideoWriter('output/video/output-%s.avi' % args["output_string"], fourcc, args["fps"], (width, height)) if frame is None: break cv2_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) # Dlib detect dets = cnn_face_detector(cv2_frame, 1) for idx, det in enumerate(dets): # Get x_min, y_min, x_max, y_max, conf x_min = det.rect.left() y_min = det.rect.top() x_max = det.rect.right() y_max = det.rect.bottom() conf = det.confidence if conf > 0.5: bbox_width = abs(x_max - x_min) bbox_height = abs(y_max - y_min) x_min -= 2 * bbox_width / 4 x_max += 2 * bbox_width / 4 y_min -= 3 * bbox_height / 4 y_max += bbox_height / 4 x_min = max(x_min, 0); y_min = max(y_min, 0) x_max = min(frame.shape[1], x_max); y_max = min(frame.shape[0], y_max) # Crop image img = cv2_frame[int(y_min):int(y_max),int(x_min):int(x_max)] img = Image.fromarray(img) # Transform img = transformations(img) img_shape = img.size() img = img.view(1, img_shape[0], img_shape[1], img_shape[2]) img = Variable(img).cuda(gpu) yaw, pitch, roll = model(img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99 yawx=yaw_predicted pitchx=pitch_predicted rollx=roll_predicted PRT_GES["yaw"] = int(yaw_predicted) PRT_GES["pitch"] = int(pitch_predicted) PRT_GES["row"] = int(roll_predicted) # 将姿态数据打印出来 label = ",".join("{} : {}".format(gesture_str,gesture_data) for (gesture_str,gesture_data) in PRT_GES.items()) cv2.putText(frame, label,(10, height - 20),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,225,0) , 2) # Print new frame with cube and axis txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted)) #utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width) utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2) #Plot expanded bounding box # cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0,255,0), 1) if out is not None: out.write(frame) # print('yaw=%f pitch=%f roll=%f\n' %(yawx, pitchx, rollx)) frame_num += 1 trigger.emit() # key = cv2.waitKey(1) & 0xFF # # if the `q` key was pressed, break from the loop # if key == ord("q"): # break # check to see if we need to release the video writer pointer if out is not None: out.release() # if we are not using a video file, stop the camera video stream if not args.get("video_path", False): video.stop() # otherwise, release the video file pointer else: video.release() # close any open windows cv2.destroyAllWindows()
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # Other parameters. cudnn.enabled = True batch_size = 1 gpu = 0 # Load image, tilt and pan arrays for the dataset. img, tilt, pan = array_from_npy(dataset_npy, dataset_csv) print(img.shape) # Estimator model. pose_estimator = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(estimator_path) pose_estimator.load_state_dict(saved_state_dict) pose_estimator.cuda(gpu) pose_estimator.eval() # Get score for the dataset (tilt, pan and global error). idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu) pred = [] start_time = time.time() for i in img:
args = parse_args() cudnn.enabled = True gpu = args.gpu_id snapshot_path = args.snapshot model_type = args.model bin_width_degrees = args.bin_width_degrees number_of_classes = 198 // bin_width_degrees # Init selected architecture if model_type == 'MobileNetV2': model = hopenet.MobileNetV2(number_of_classes) elif model_type == 'ResNet18': model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], number_of_classes) elif model_type == 'ResNet101': model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], number_of_classes) elif model_type == 'ResNet50': model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], number_of_classes) print 'Loading snapshot.' # Load snapshot saved_state_dict = torch.load(snapshot_path) model.load_state_dict(saved_state_dict) print 'Loading data.' transformations = transforms.Compose([