def train(): dataset = voc0712.VOCDetection(root=Config.dataset_root, transform=augmentations.SSDAugmentation( Config.image_size, Config.MEANS)) data_loader = data.DataLoader(dataset, Config.batch_size, num_workers=Config.data_load_number_worker, shuffle=True, collate_fn=detection_collate, pin_memory=True) net = ssd_net_vgg.SSD() vgg_weights = torch.load('./weights/vgg16_reducedfc.pth') #加载预训练模型 # vgg_weights = torch.load('./weights/final_20200223_VOC_100000.pth') net = nn.DataParallel(net) # net.apply(weights_init) net.vgg.load_state_dict(vgg_weights) # net.load_state_dict(vgg_weights) # net.apply(weights_init) if Config.use_cuda: net = torch.nn.DataParallel(net) net = net.cuda() net.train() loss_fun = loss_function.LossFun() optimizer = optim.SGD(net.parameters(), lr=Config.lr, momentum=Config.momentum, weight_decay=Config.weight_decacy) iter = 0 step_index = 0 before_epoch = -1 for epoch in range(1000): for step, (img, target) in enumerate(data_loader): if Config.use_cuda: img = img.cuda() target = [ann.cuda() for ann in target] img = torch.Tensor(img) loc_pre, conf_pre = net(img) priors = utils.default_prior_box() optimizer.zero_grad() loss_l, loss_c = loss_fun((loc_pre, conf_pre), target, priors) loss = loss_l + loss_c loss.backward() optimizer.step() if iter % 1 == 0 or before_epoch != epoch: print('epoch : ', epoch, ' iter : ', iter, ' step : ', step, ' loss : ', loss.item()) before_epoch = epoch iter += 1 if iter in Config.lr_steps: step_index += 1 adjust_learning_rate(optimizer, Config.gamma, step_index) if iter % 10000 == 0 and iter != 0: #每1万次训练保存一个模型 torch.save( net.state_dict(), 'weights/final_20200226_VOC_' + repr(100000 + iter) + '.pth') if iter >= Config.max_iter: break torch.save(net.state_dict(), 'weights/final_20200223_voc_200000.pth')
def test_one_picture(picture_path): if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229), (158, 218, 229), (158, 218, 229)] net = SSD() # initialize SSD net = torch.nn.DataParallel(net) net.train(mode=False) net.load_state_dict( torch.load('./weights/ssd_voc_100EPOCH.pth', map_location=lambda storage, loc: storage)) image = cv2.imread(picture_path, cv2.IMREAD_COLOR) x = cv2.resize(image, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() # plt.imshow(x) x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect(loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = VOC_CLASSES # scale each detection back up to the image scale = torch.Tensor(image.shape[1::-1]).repeat(2) for i in range(detections.size(1)): for j in range(detections.size(2)): if detections[0, i, j, 0] >= 0.1: score = detections[0, i, j, 0] label_name = labels[i - 1] display_txt = '%s: %.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() color = colors_tableau[i] cv2.rectangle(image, (pt[0], pt[1]), (pt[2], pt[3]), color, 2) cv2.putText(image, display_txt, (int(pt[0]), int(pt[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, 8) cv2.imshow('test', image) cv2.waitKey(100000)
def Detect(self, image): img_id = 60 # image = cv2.imread('./pic/00000.png', cv2.IMREAD_COLOR) x = cv2.resize(image, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() # plt.imshow(x) x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = self.net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect( loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = VOC_CLASSES top_k = 10 # plt.imshow(rgb_image) # plot the image for matplotlib # scale each detection back up to the image result_all = [] scale = torch.Tensor(image.shape[1::-1]).repeat(2) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.4: result_single = [] score = detections[0, i, j, 0] label_name = labels[i - 1] pt = (detections[0, i, j, 1:] * scale).cpu().numpy() j += 1 result_single.append(label_name) result_single.append(score) result_single.append(pt[0]) result_single.append(pt[1]) result_single.append(pt[2]) result_single.append(pt[3]) result_all.append(result_single) #display_txt = '%s: %.2f'%(label_name, score) #coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1 #color = colors_tableau[i] #cv2.rectangle(image,(pt[0],pt[1]), (pt[2],pt[3]), color, 2) #cv2.putText(image, display_txt, (int(pt[0]), int(pt[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8) return result_all
img_id = 60 name = 'dnf_test' image = cv2.imread('./' + name + '.jpg', cv2.IMREAD_COLOR) x = cv2.resize(image, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() # plt.imshow(x) x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect(loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = VOC_CLASSES top_k = 10 # plt.imshow(rgb_image) # plot the image for matplotlib # scale each detection back up to the image
def show_img(self): global temp_t success, self.img = self.camera.read() if success: self.Image_num += 1 if self.Image_num % 10 == 9: frame_rate = 10 / (time.clock() - self.timelb) self.FmRateLCD.display(frame_rate) self.timelb = time.clock() if self.case == 0: showImg = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) showImg = qimage2ndarray.array2qimage(showImg) self.Camera_2.setPixmap(QPixmap(showImg)) # 展示图片 self.Camera_2.show() if self.case == 1: bounding_boxes, landmarks = detect_faces(self.img) self.img = show_bboxes(self.img, bounding_boxes, landmarks) showImg = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) showImg = qimage2ndarray.array2qimage(showImg) self.Camera_2.setPixmap(QPixmap(showImg)) # 展示图片 self.Camera_2.show() if self.case == 2: img_copy = self.img.copy() frag_gray = False self.time_ing = time.time() # point=[100,0,540,480] if self.frag_cap: bounding_boxes, landmarks = detect_faces(self.img) print('正在定位······') if len(bounding_boxes) == 1: self.point.clear() for b in bounding_boxes: b = [int(round(value)) for value in b] for i in b: self.point.append(i) self.frag_cap = False # print(point) # cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), 2) # 裁剪坐标为[y0:y1, x0:x1] if not self.frag_cap: if self.point[0] < 540: self.img = self.img[self.point[1] - 10:479, self.point[0] - 100:self.point[2] + 100] else: self.img = self.img[self.point[1] - 10:479, self.point[0] - 100:639] else: self.img = self.img[1:479, 1:640] if int(self.time_ing - self.time_first) % 60 == 0: self.frag_cap = True else: self.frag_cap = False bounding_boxes, landmarks = detect_faces(self.img) #通过MTCNN人脸框判断,当检测不到人脸时判断低头or瞌睡 if len(bounding_boxes) == 0: self.nod_fps += 1 if self.nod_fps >= 3: self.Head_state.setText('点头') self.nod_count += 1 if len(bounding_boxes) > 0: self.nod_fps = 0 #通过头部姿态欧拉角角度变化判断是否摇头 if len(bounding_boxes) > 0: Head_Y_X_Z = get_head_pose(landmarks) print('pitch:{}, yaw:{}, roll:{}'.format( Head_Y_X_Z[1], Head_Y_X_Z[2], Head_Y_X_Z[3])) if (Head_Y_X_Z[2] < -0.75): self.shake_fps_l += 1 if (Head_Y_X_Z[2] >= -0.75): self.shake_fps_l = 0 if self.shake_fps_l >= 5: self.shake_count += 1 self.Head_state.setText('摇头') if Head_Y_X_Z[3] >= 0.30: self.shake_fps_r += 1 if self.shake_fps_r >= 5: self.shake_count += 1 self.Head_state.setText('摇头') if Head_Y_X_Z[3] < 0.30: self.shake_fps_r = 0 # print(Head_Y_X_Z[1]) # print(Head_Y_X_Z[2]) # print(Head_Y_X_Z[3]) if time.time() - self.nod_start > 3: self.Head_state.setText('') if time.time() - self.shake_start > 3: self.Head_state.setText('') # 计算低头频率 每10s计算一次 if time.time() - self.nod_start > 10: times = time.time() - self.nod_start self.nod_freq = self.nod_count / times self.nod_start = time.time() self.Nod_LCD.display(self.nod_freq) # 计算摇头频率 if time.time() - self.shake_start > 10: times = time.time() - self.shake_start self.shake_freq = self.shake_count / times self.shake_start = time.time() self.shake_LCD.display(self.shake_freq) if len(bounding_boxes) > 0: Emotions = get_emotion( get_face_expression(self.img, bounding_boxes)) self.Emotion.setText(Emotions[1]) self.Emotion_pred.display(float(Emotions[0])) # print(Emotions) canvas = cv2.imread('img_resource/label_pred.jpg', flags=cv2.IMREAD_UNCHANGED) for (i, (emotion, prob)) in enumerate(zip(self.EMOTIONS, Emotions[2])): # text = "{}: {:.2f}%".format(emotion, prob * 100) text = "{:.2f}%".format(prob * 100) # 绘制表情类和对应概率的条形图 w = int(prob * 180) # print(text) # canvas = 255 * np.ones((250, 300, 3), dtype="uint8") cv2.rectangle(canvas, (0, (i * 44) + 25), (w, (i * 43) + 40), (100, 200, 130), -1) cv2.putText(canvas, text, (170, (i * 43) + 40), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1) show = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) # cv2.imshow('test', showImage) # showImg=QPixmap(showImage) self.label_pred_img.setPixmap( QtGui.QPixmap.fromImage(showImage)) # # print('test') # print('Head_Y_X_Z') # print(Head_Y_X_Z) x = cv2.resize(self.img, (300, 300)).astype(np.float32) flag_B = True # 是否闭眼的flag flag_Y = False num_rec = 0 # 检测到的眼睛的数量 # 分界线 x -= self.img_mean x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # if torch.cuda.is_available(): # xx = xx.cuda() xx = xx.cuda() y = self.net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect( loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = VOC_CLASSES # 将检测结果放置于图片上 scale = torch.Tensor(self.img.shape[1::-1]).repeat(2) self.img = show_bboxes(self.img, bounding_boxes, landmarks) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.4: score = detections[0, i, j, 0] label_name = labels[i - 1] if label_name == 'calling' and score > 0.8: self.Danger_state.setText('打电话') self.danger_count += 1 frag_gray = True if label_name == 'smoke' and score > 0.8: self.Danger_state.setText('吸烟') self.danger_count += 1 frag_gray = True if label_name != 'smoke' and label_name != 'calling': self.danger_t += 1 if self.danger_t >= 20: self.Danger_state.setText('') self.danger_t = 0 if label_name == 'open_eye': self.open_t += 1 if self.open_t >= 20: self.Eyes_state.setText('') self.open_t = 0 if label_name == 'closed_mouth': self.Mouth_state.setText(' ') if label_name == 'closed_eye': flag_B = False frag_gray = True if label_name == 'open_mouth': flag_Y = True display_txt = '%s:%.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() self.coords = ( pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = self.colors_tableau[i] cv2.rectangle(self.img, (pt[0], pt[1]), (pt[2], pt[3]), color, 2) cv2.putText(self.img, display_txt, (int(pt[0]), int(pt[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, 8) j += 1 num_rec += 1 # cv2.imshow('test', self.img) if num_rec > 0: if flag_B: # print(' 1:eye-open') self.list_B = np.append(self.list_B, 1) # 睁眼为‘1’ self.list_blink = np.append(self.list_blink, 1) else: # print(' 0:eye-closed') self.list_B = np.append(self.list_B, 0) # 闭眼为‘0’ self.list_blink = np.append(self.list_blink, 0) self.list_blink = np.delete(self.list_blink, 0) self.list_B = np.delete(self.list_B, 0) if flag_Y: self.list_Y = np.append(self.list_Y, 1) else: self.list_Y = np.append(self.list_Y, 0) self.list_Y = np.delete(self.list_Y, 0) else: self.Msg.clear() self.Msg.setPlainText('Nothing detected.') # print(list) # 实时计算PERCLOS self.perclos = 1 - np.average(self.list_blink) # print('perclos={:f}'.format(perclos)) self.PERCLOS.display(self.perclos) if self.list_B[8] == 1 and self.list_B[9] == 0: # 如果上一帧为’1‘,此帧为’0‘则判定为眨眼 self.Eyes_state.setText('眨眼') self.blink_count += 1 frag_gray = True str = datetime.datetime.now().strftime("%H:%M:%S") self.State_record.append(str + ':眨眼') # img_copy=cv2.cvtColor(img_copy,cv2.COLOR_RGB2GRAY) blink_T = time.time() - self.blink_start if blink_T > 30: # 每30秒计算一次眨眼频率 blink_freq = self.blink_count / blink_T self.blink_start = time.time() self.blink_count = 0 print('blink_freq={:f}'.format(blink_freq)) self.Blink_freq.display(blink_freq * 2) # 检测打哈欠 # if Yawn(list_Y,list_Y1): if (self.list_Y[len(self.list_Y) - len(self.list_Y1):] == self.list_Y1).all(): # print('----------------------打哈欠----------------------') self.Mouth_state.setText('打哈欠') self.yawn_count += 1 frag_gray = True str = datetime.datetime.now().strftime("%H:%M:%S") self.State_record.append(str + ':打哈欠') self.list_Y = np.zeros(50) # 计算打哈欠频率 yawn_T = time.time() - self.yawn_start if yawn_T > 60: yawn_freq = self.yawn_count / yawn_T self.yawn_start = time.time() self.yawn_count = 0 print('yawn_freq={:f}'.format(yawn_freq)) self.Yawn_freq.display(yawn_freq) # 计算危险行为频率 DangerAct_T = time.time() - self.danger_start if DangerAct_T > 60: danger_freq = self.danger_count / DangerAct_T self.danger_start = time.time() self.danger_count = 0 print('danger_freq={:f}'.format(danger_freq)) self.Danger_LCD.display(danger_freq) if (self.perclos > 0.4): # print('疲劳') self.State.setText('疲劳') elif (self.blink_freq > 0.3): # print('疲劳') self.State.setText('疲劳') self.blink_freq = 0 # 如果因为眨眼频率判断疲劳,则初始化眨眼频率 elif (self.yawn_freq > 5.0 / 60): # print("疲劳") self.State.setText('疲劳') self.yawn_freq = 0 # 初始化,同上 else: self.State.setText('清醒') if not frag_gray: showImg = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB) else: if self.isRecordImg: str = datetime.datetime.now().strftime( "%Y_%m_%d_%H_%M_%S") temp = 'ImgRecord/' + str + '.jpg' cv2.imwrite(temp, img_copy) showImg = cv2.cvtColor(img_copy, cv2.COLOR_RGB2GRAY) showImg = qimage2ndarray.array2qimage(showImg) self.Camera_2.setPixmap(QPixmap(showImg)) # 展示图片 self.Camera_2.show() if self.case == 3: img_copy = self.img.copy() frag_gray = False self.time_ing = time.time() # point=[100,0,540,480] if self.frag_cap: bounding_boxes, landmarks = detect_faces(self.img) print('正在定位······') if len(bounding_boxes) == 1: self.point.clear() for b in bounding_boxes: b = [int(round(value)) for value in b] for i in b: self.point.append(i) self.frag_cap = False # print(point) # cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), 2) # 裁剪坐标为[y0:y1, x0:x1] if not self.frag_cap: if self.point[0] < 540: self.img = self.img[self.point[1] - 10:479, self.point[0] - 100:self.point[2] + 100] else: self.img = self.img[self.point[1] - 10:479, self.point[0] - 100:639] else: self.img = self.img[1:479, 1:640] if int(self.time_ing - self.time_first) % 60 == 0: self.frag_cap = True else: self.frag_cap = False bounding_boxes, landmarks = detect_faces(self.img) # 通过MTCNN人脸框判断,当检测不到人脸时判断低头or瞌睡 if len(bounding_boxes) == 0: self.nod_fps += 1 if self.nod_fps >= 3: self.Head_state.setText('点头') self.nod_count += 1 if len(bounding_boxes) > 0: self.nod_fps = 0 # 通过头部姿态欧拉角角度变化判断是否摇头 if len(bounding_boxes) > 0: Head_Y_X_Z = get_head_pose(landmarks) print('pitch:{}, yaw:{}, roll:{}'.format( Head_Y_X_Z[1], Head_Y_X_Z[2], Head_Y_X_Z[3])) if (Head_Y_X_Z[2] < -0.75): self.shake_fps_l += 1 if (Head_Y_X_Z[2] >= -0.75): self.shake_fps_l = 0 if self.shake_fps_l >= 5: self.shake_count += 1 self.Head_state.setText('摇头') if Head_Y_X_Z[3] >= 0.30: self.shake_fps_r += 1 if self.shake_fps_r >= 5: self.shake_count += 1 self.Head_state.setText('摇头') if Head_Y_X_Z[3] < 0.30: self.shake_fps_r = 0 # print(Head_Y_X_Z[1]) # print(Head_Y_X_Z[2]) # print(Head_Y_X_Z[3]) if time.time() - self.nod_start > 3: self.Head_state.setText('') if time.time() - self.shake_start > 3: self.Head_state.setText('') # 计算低头频率 每10s计算一次 if time.time() - self.nod_start > 10: times = time.time() - self.nod_start self.nod_freq = self.nod_count / times self.nod_start = time.time() self.Nod_LCD.display(self.nod_freq) # 计算摇头频率 if time.time() - self.shake_start > 10: times = time.time() - self.shake_start self.shake_freq = self.shake_count / times self.shake_start = time.time() self.shake_LCD.display(self.shake_freq) if len(bounding_boxes) > 0: Emotions = get_emotion( get_face_expression(self.img, bounding_boxes)) self.Emotion.setText(Emotions[1]) self.Emotion_pred.display(float(Emotions[0])) # print(Emotions) canvas = cv2.imread('img_resource/label_pred.jpg', flags=cv2.IMREAD_UNCHANGED) for (i, (emotion, prob)) in enumerate(zip(self.EMOTIONS, Emotions[2])): # text = "{}: {:.2f}%".format(emotion, prob * 100) text = "{:.2f}%".format(prob * 100) # 绘制表情类和对应概率的条形图 w = int(prob * 180) # print(text) # canvas = 255 * np.ones((250, 300, 3), dtype="uint8") cv2.rectangle(canvas, (0, (i * 44) + 25), (w, (i * 43) + 40), (100, 200, 130), -1) cv2.putText(canvas, text, (170, (i * 43) + 40), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1) show = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) # cv2.imshow('test', showImage) # showImg=QPixmap(showImage) self.label_pred_img.setPixmap( QtGui.QPixmap.fromImage(showImage)) # # print('test') # print('Head_Y_X_Z') # print(Head_Y_X_Z) x = cv2.resize(self.img, (300, 300)).astype(np.float32) flag_B = True # 是否闭眼的flag flag_Y = False num_rec = 0 # 检测到的眼睛的数量 # 分界线 x -= self.img_mean x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # if torch.cuda.is_available(): # xx = xx.cuda() xx = xx.cuda() y = self.net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect( loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = VOC_CLASSES # 将检测结果放置于图片上 scale = torch.Tensor(self.img.shape[1::-1]).repeat(2) self.img = show_bboxes(self.img, bounding_boxes, landmarks) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.4: score = detections[0, i, j, 0] label_name = labels[i - 1] if label_name == 'calling' and score > 0.8: self.Danger_state.setText('打电话') self.danger_count += 1 frag_gray = True if label_name == 'smoke' and score > 0.8: self.Danger_state.setText('吸烟') self.danger_count += 1 frag_gray = True if label_name != 'smoke' and label_name != 'calling': self.danger_t += 1 if self.danger_t >= 20: self.Danger_state.setText('') self.danger_t = 0 if label_name == 'open_eye': self.open_t += 1 if self.open_t >= 20: self.Eyes_state.setText('') self.open_t = 0 if label_name == 'closed_mouth': self.Mouth_state.setText(' ') if label_name == 'closed_eye': flag_B = False frag_gray = True if label_name == 'open_mouth': flag_Y = True display_txt = '%s:%.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() self.coords = ( pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = self.colors_tableau[i] cv2.rectangle(self.img, (pt[0], pt[1]), (pt[2], pt[3]), color, 2) cv2.putText(self.img, display_txt, (int(pt[0]), int(pt[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, 8) j += 1 num_rec += 1 # cv2.imshow('test', self.img) if num_rec > 0: if flag_B: # print(' 1:eye-open') self.list_B = np.append(self.list_B, 1) # 睁眼为‘1’ self.list_blink = np.append(self.list_blink, 1) else: # print(' 0:eye-closed') self.list_B = np.append(self.list_B, 0) # 闭眼为‘0’ self.list_blink = np.append(self.list_blink, 0) self.list_blink = np.delete(self.list_blink, 0) self.list_B = np.delete(self.list_B, 0) if flag_Y: self.list_Y = np.append(self.list_Y, 1) else: self.list_Y = np.append(self.list_Y, 0) self.list_Y = np.delete(self.list_Y, 0) else: self.Msg.clear() self.Msg.setPlainText('Nothing detected.') # print(list) # 实时计算PERCLOS self.perclos = 1 - np.average(self.list_blink) # print('perclos={:f}'.format(perclos)) self.PERCLOS.display(self.perclos) if self.list_B[8] == 1 and self.list_B[9] == 0: # 如果上一帧为’1‘,此帧为’0‘则判定为眨眼 self.Eyes_state.setText('眨眼') self.blink_count += 1 frag_gray = True str = datetime.datetime.now().strftime("%H:%M:%S") self.State_record.append(str + ':眨眼') # img_copy=cv2.cvtColor(img_copy,cv2.COLOR_RGB2GRAY) blink_T = time.time() - self.blink_start if blink_T > 30: # 每30秒计算一次眨眼频率 blink_freq = self.blink_count / blink_T self.blink_start = time.time() self.blink_count = 0 print('blink_freq={:f}'.format(blink_freq)) self.Blink_freq.display(blink_freq * 2) # 检测打哈欠 # if Yawn(list_Y,list_Y1): if (self.list_Y[len(self.list_Y) - len(self.list_Y1):] == self.list_Y1).all(): # print('----------------------打哈欠----------------------') self.Mouth_state.setText('打哈欠') self.yawn_count += 1 frag_gray = True str = datetime.datetime.now().strftime("%H:%M:%S") self.State_record.append(str + ':打哈欠') self.list_Y = np.zeros(50) # 计算打哈欠频率 yawn_T = time.time() - self.yawn_start if yawn_T > 60: yawn_freq = self.yawn_count / yawn_T self.yawn_start = time.time() self.yawn_count = 0 print('yawn_freq={:f}'.format(yawn_freq)) self.Yawn_freq.display(yawn_freq) # 计算危险行为频率 DangerAct_T = time.time() - self.danger_start if DangerAct_T > 60: danger_freq = self.danger_count / DangerAct_T self.danger_start = time.time() self.danger_count = 0 print('danger_freq={:f}'.format(danger_freq)) self.Danger_LCD.display(danger_freq) if (self.perclos > 0.4): # print('疲劳') self.State.setText('疲劳') elif (self.blink_freq > 0.3): # print('疲劳') self.State.setText('疲劳') self.blink_freq = 0 # 如果因为眨眼频率判断疲劳,则初始化眨眼频率 elif (self.yawn_freq > 5.0 / 60): # print("疲劳") self.State.setText('疲劳') self.yawn_freq = 0 # 初始化,同上 else: self.State.setText('清醒') if not frag_gray: showImg = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB) else: if self.isRecordImg: str = datetime.datetime.now().strftime( "%Y_%m_%d_%H_%M_%S") temp = 'ImgRecord/' + str + '.jpg' cv2.imwrite(temp, img_copy) showImg = cv2.cvtColor(img_copy, cv2.COLOR_RGB2GRAY) self.State_record.moveCursor(QTextCursor.End) showImg = qimage2ndarray.array2qimage(showImg) self.Camera_2.setPixmap(QPixmap(showImg)) # 展示图片 self.Camera_2.show()
def train(): # , ("core_500","coreless_5000") dataset = ml_data.SIXrayDetection( Config.dataset_root, ['core_500', 'coreless_5000'], augmentations.SSDAugmentation(Config.image_size, Config.MEANS)) data_loader = data.DataLoader(dataset, Config.batch_size, num_workers=Config.data_load_number_worker, shuffle=True, collate_fn=detection_collate, pin_memory=True) net = ssd_net_vgg.SSD() # vgg_weights = torch.load('./weights/vgg16_reducedfc.pth') vgg_weights = torch.load('./weights/vgg16_reducedfc.pth') net.apply(weights_init) net.vgg.load_state_dict(vgg_weights) # net.apply(weights_init) if Config.use_cuda: net = torch.nn.DataParallel(net) net = net.cuda() net.train() loss_fun = loss_function.LossFun() optimizer = optim.SGD(net.parameters(), lr=Config.lr, momentum=Config.momentum, weight_decay=Config.weight_decacy) iter = 0 step_index = 0 before_epoch = -1 for epoch in range(Config.epoch_num): for step, (img, target) in enumerate(data_loader): if Config.use_cuda: img = img.cuda() target = [ann.cuda() for ann in target] try: img = torch.Tensor(img) except TypeError as e: print(e) loc_pre, conf_pre = net(img) priors = utils.default_prior_box() optimizer.zero_grad() loss_l, loss_c = loss_fun((loc_pre, conf_pre), target, priors) loss = loss_l + loss_c loss.backward() optimizer.step() if iter % 1 == 0 or before_epoch != epoch: print('epoch : ', epoch, ' iter : ', iter, ' step : ', step, ' loss : ', loss.item()) before_epoch = epoch iter += 1 if iter in Config.lr_steps: step_index += 1 adjust_learning_rate(optimizer, Config.gamma, step_index) if iter % 10000 == 0 and iter != 0: torch.save(net.state_dict(), 'weights/ssd300_VOC_' + repr(iter) + '.pth') if iter >= Config.max_iter: break torch.save(net.state_dict(), 'weights/core500.pth')
def test_net(save_folder, net, cuda, dataset, transform, top_k, im_size=300, thresh=0.05): det_result_core_file = 'predicted_file/det_test_带电芯充电宝3.txt' det_result_core_less_file = 'predicted_file/det_test_不带电芯充电宝3.txt' num_images = len(dataset) # all_boxes = [[[] for _ in range(num_images)] # for _ in range(len(labelmap) + 1)] # output_dir = get_output_dir(args.SIXray_root, set_type) # det_file = os.path.join(output_dir, 'detections.pkl') for root, dirs, files in os.walk(dataset): # root 表示当前正在访问的文件夹路径 # dirs 表示该文件夹下的子目录名list # files 表示该文件夹下的文件list # 遍历文件 for file in files: file_with_path = os.path.join(root, file) #print('Testing image {:d}/{:d}....'.format(id+1, num_images)) #a = dataset.pull_item(id) #im, gt, h, w, im_og, img_id= dataset.pull_item(id) # 这里im的颜色偏暗,因为BaseTransform减去了一个mean # im_saver = cv2.resize(im[(a2,a1,0),:,:].permute((a1,a2,0)).numpy(), (w,h)) im_og = cv2.imread(file_with_path, cv2.IMREAD_COLOR) x = cv2.resize(im_og, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() # plt.imshow(x) x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) if args.cuda: xx = x.cuda() y = net(xx) softmax = nn.Softmax(dim=-1) detect = Detect(config.class_num, 0, 200, 0.01, 0.45) priors = utils.default_prior_box() loc, conf = y loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) detections = detect( loc.view(loc.size(0), -1, 4), softmax(conf.view(conf.size(0), -1, config.class_num)), torch.cat([o.view(-1, 4) for o in priors], 0)).data labels = ['core', 'coreless'] # plt.imshow(rgb_image) # plot the image for matplotlib # scale each detection back up to the image scale = torch.Tensor(im_og.shape[1::-1]).repeat(2) for i in range(detections.size(1)): for j in range(detections.size(2)): if detections[0, i, j, 0] >= 0.01: score = detections[0, i, j, 0] label_name = labels[i - 1] display_txt = '%s: %.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = (0, 0, 255) cv2.rectangle(im_og, (pt[0], pt[1]), (pt[2], pt[3]), color, 2) cv2.putText(im_og, display_txt, (int(pt[0]), int(pt[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 0), 1, 8) text = '%s %.2f %.2f %.2f %.2f %.2f' % ( file[:-4], score, pt[0], pt[1], pt[2], pt[3]) if label_name == 'core': with open(det_result_core_file, 'a+') as f: f.write(text + '\n') if label_name == 'coreless': with open(det_result_core_less_file, 'a+') as f: f.write(text + '\n')