def __init__(self ,weights,model_name='mobile0.25',use_mlu=True,use_jit=False): super(mlu_face_det_inference,self).__init__() self.use_mlu = use_mlu self.use_jit = use_jit loading = False if use_mlu else True infer = RetinaFaceDet(model_type=model_name,model_path=weights,use_cpu=True,loading=loading) model = infer.net if use_mlu: print('==using mlu quantization model==') model = mlu_quantize.quantize_dynamic_mlu(model) checkpoint = torch.load(weights, map_location='cpu') model.load_state_dict(checkpoint, strict=False) model.eval() model = model.to(ct.mlu_device()) if use_jit: print('==jit==') randinput = torch.rand(1,3,640,480)*255 randinput = randinput.to(ct.mlu_device()) traced_model = torch.jit.trace(model, randinput, check_trace=False) self.model = traced_model else: self.model = model else: print('==using pytorch model==') model.eval() self.model = model self.infer = infer
def __init__(self ,weights,model_name='resnet101_irse_mx',use_mlu=True,use_jit=False): super(mlu_face_rec_inference,self).__init__() self.use_mlu = use_mlu self.use_jit = use_jit use_device = 'cpu' ckpt_fpath = None if use_mlu else weights infer = Inference(backbone_type=model_name, ckpt_fpath=ckpt_fpath, device=use_device) model = infer.model if use_mlu: print('==using mlu quantization model==') model = mlu_quantize.quantize_dynamic_mlu(model) checkpoint = torch.load(weights, map_location='cpu') model.load_state_dict(checkpoint, strict=False) model.eval() model = model.to(ct.mlu_device()) if use_jit: print('==jit==') randinput = torch.rand(1,3,112,112)*255 randinput = randinput.to(ct.mlu_device()) traced_model = torch.jit.trace(model, randinput, check_trace=False) self.model = traced_model else: self.model = model else: print('==using pytorch model==') model.eval() self.model = model
def execute(self,img_cv2): """ :param img_cv2: img_cv2 = cv2.imread() or [cv2.imread(c) for c in image_list] :return: unnormalized feature [N,512], N = len(img_cv2) if isinstance(img_cv2,list) else 1 """ if isinstance(img_cv2,list): data = [preprocess(c, mlu=self.use_mlu) for c in img_cv2] data = torch.cat(data, dim=0) else: data = preprocess(img_cv2,mlu=self.use_mlu) if self.use_mlu: data = data.to(ct.mlu_device()) out = self.model(data) out = out.cpu().detach().numpy().reshape(-1, 512) return out
def execute(self,img_cv2,dst_size=[480,640],threshold=0.8,topk=5000,keep_topk=750,nms_threshold=0.2): """ :param dst_size: [width,height] all image will be scaled into that size for detection, but bbox will be returned in its original scale :param img_cv2: img_cv2 = cv2.imread() or [cv2.imread(c) for c in image_list] :return: detss = list of np.array, [ np.array(n,15)] where len(detss) = len(img_cv2) if isinstance(img_cv2,list) else 1 n = detected faces in each image 15 :[x0,y0,x1,y1,score,landmarkx0,landmarky0,...,] """ if isinstance(img_cv2,list): data = [preprocess_retinaface(c, dst_size ,mlu=self.use_mlu) for c in img_cv2] ratio = [ c[1] for c in data ] data = [c[0] for c in data] data = torch.cat(data, dim=0) else: data = preprocess_retinaface(img_cv2,dst_size=dst_size,mlu=self.use_mlu) ratio = [data[1]] data = data[0] if self.use_mlu: data = data.to(ct.mlu_device()) locs,confs,landmss = self.model(data) if self.use_mlu: locs = fetch_cpu_data(locs,use_half_input=False,to_numpy=False) confs = fetch_cpu_data(confs,use_half_input=False,to_numpy=False) landmss = fetch_cpu_data(landmss,use_half_input=False,to_numpy=False) net_output = [locs,confs,landmss] dets = self.infer.execute_batch_mlu(net_output=net_output, batch_shape=data.shape, threshold=threshold,topk=topk,keep_topk=keep_topk, nms_threshold=nms_threshold) assert len(dets) == len(ratio), 'Err len(dets) != len(ratio)' detss = [] for n,det in enumerate(dets): det = det/ratio[n] det[:,4] = det[:,4]*ratio[n] detss.append(det) return detss
K = min([len(image_list), args.batch_size]) # image_list = random.sample(image_list,K) image_list = image_list[:K] print('sampled %d data' % len(image_list)) input_img = [cv2.imread(c) for c in image_list] data = [preprocess(c, mlu=args.mlu) for c in input_img] print('len of data: %d' % len(data)) #print('data:',data) data = torch.cat(data, dim=0) print('data shape =', data.shape) if args.mlu: if args.half_input: data = data.type(torch.HalfTensor) data = data.to(ct.mlu_device()) # model = torchvision.models.resnet50() print('==pytorch==') use_device = 'cpu' backbone_type = args.model_name model_type = model_dict[args.model_name]['weights'] model_pth = pj(model_dict[args.model_name]['path'], model_type) model_pth = os.path.abspath(model_pth) infer = Inference(backbone_type=backbone_type, ckpt_fpath=model_pth, device=use_device) print('==end==') if not args.mlu: model = infer.model
import torch import torch_mlu.core.mlu_model as ct ct.set_cnml_enabled(False) ct.set_quantized_bitwidth(16) mat1 = torch.randn([18, 80], dtype=torch.float).half() mat2 = torch.randn([80, 18], dtype=torch.float).half() # out_cpu = torch.matmul(mat1, mat2) mat1 = mat1.to(ct.mlu_device()) mat2 = mat2.to(ct.mlu_device()) # out_mlu = torch.matmul(mat1.to(ct.mlu_device()), mat2.to(ct.mlu_device())) mat1.mm(mat2)
default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--jit', type=bool, help='fusion', default=False) parser.add_argument('--save', type=bool, default=False, help='selection of save *.cambrcion') opt = parser.parse_args() # 获取yolov5网络文件 net = yolo.get_empty_model(opt) quantized_net = torch_mlu.core.mlu_quantize.quantize_dynamic_mlu(net) state_dict = torch.load('yolov5s_int8.pt') quantized_net.load_state_dict(state_dict, strict=False) # 设置为推理模式 quantized_net = quantized_net.eval().float() device = ct.mlu_device() quantized_net.to(ct.mlu_device()) # 读取图片 img_mat = cv2.imread('images/image.jpg') # 预处理 img = letter_box(img_mat) print(img.shape) # 设置在线融合模式 if opt.jit: if opt.save: ct.save_as_cambricon('yolov5s') torch.set_grad_enabled(False) ct.set_core_number(4) trace_input = torch.randn(1, 3, 640, 640, dtype=torch.float) trace_input = trace_input.to(ct.mlu_device()) quantized_net = torch.jit.trace(quantized_net,