def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp): best_match = 0 best_match2 = 0 for det_word in words: try: cv2.rectangle(imgc, (det_word[0], det_word[1]), (det_word[2], det_word[3]), (0, 0, 255)) for gt_box in word_gt: rect_int = utils.intersect(det_word, gt_box) int_area = utils.area(rect_int) union_area = utils.area(utils.union(det_word, gt_box)) ratio = int_area / float(union_area) ratio2 = int_area / utils.area(gt_box) if ratio > best_match: best_match = ratio w = det_word best_match2 = ratio2 if best_match2 > 0.3: wordsOk.append(det_word) elif best_match == 0: wordsFp.append(det_word) except: pass return (best_match, best_match2)
def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp): best_match = 0 best_match2 = 0 for det_word in words: try: cv2.rectangle(imgc, (det_word[0], det_word[1]), (det_word[2], det_word[3]), (0, 0, 255)) for gt_box in word_gt: rect_int = utils.intersect( det_word, gt_box ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(det_word, gt_box)) ratio = int_area / float(union_area) ratio2 = int_area / utils.area(gt_box) if ratio > best_match: best_match = ratio w = det_word best_match2 = ratio2 if best_match2 > 0.3: wordsOk.append(det_word) elif best_match == 0: wordsFp.append(det_word) except: pass return (best_match, best_match2)
def define_location_quality(self): """ parameters: self : we use self.point and check it against the dictionary of shape_corners shapes = {'name1': APIshape, name2: Abstract/Section, name3: qq, ...} """ # if we were unable to calculate a point if not self.point: return 0 score = self.location_quality for name, shape in self.reference_shapes.iteritems(): try: shape = ensure_polygon(shape) if shape.intersection(transform(4269, 3857, self.point)): score += math.pow(area(shape, units='square miles'), -1) # if within a square mile => score = 1 # print '\tpoint inside the shape: %s, area: %.3f square miles' % (name, area(shape, units='square miles')) else: pass # print '\tpoint outside the shape: %s' % name except: print '\tinvalid geomtery for %s' % name # reset score if the point does not fall within any of the related shapes if score == self.location_quality: score = -1 self.location_quality = score
def associate_plate_to_vehicle(cars, plates): """ Associate each plate to the closest available vehicle. Also check that the bounding box of the license plate is inside the bounding box of the car it is assigned to. Parameters ---------- cars : list The list of vehicles without any license plate annotation. plates : list The list of license plates detected in a frame. Returns ------- list The list of vehicles updated with license plates associated to the correct vehicle. """ # Keep track of indexes of cars that have already been assigned unavailable_cars_indexes = list() for p in plates: min_dist = np.inf px, py = compute_center_coordinates(p['bounding_box']) for ic, car in enumerate(cars): # Skip vehicles that have already been assigned a plate if ic in unavailable_cars_indexes: continue # Compute coordinates of the center of the vehicle's bb cx, cy = compute_center_coordinates(car['bounding_box']) dist = (cx - px)**2 + (cy - py)**2 if dist < min_dist: min_dist = dist min_dist_car_index = ic # If a suitable car has been found AND there is an overlap between the # bounding boux of the license plate and the one of the closest car, do # proceed to assign that license plate to that car. if (min_dist != np.inf and area( p['bounding_box'], cars[min_dist_car_index]['bounding_box']) is not None): # Assign plate to the closest available car cars[min_dist_car_index]['plates'] = [p] # Mark car as unavailable unavailable_cars_indexes.append(min_dist_car_index) return cars
def define_location_quality(self): """ parameters: self : we use self.point and check it against the dictionary of shape_corners shapes = {'name1': APIshape, name2: Abstract/Section, name3: qq, ...} """ # if we were unable to calculate a point if not self.point: return 0 score = self.location_quality for name, shape in self.reference_shapes.iteritems(): try: shape = ensure_polygon(shape) if shape.intersection(transform(4269, 3857, self.point)): score += math.pow( area(shape, units='square miles'), -1) # if within a square mile => score = 1 # print '\tpoint inside the shape: %s, area: %.3f square miles' % (name, area(shape, units='square miles')) else: pass # print '\tpoint outside the shape: %s' % name except: print '\tinvalid geomtery for %s' % name # reset score if the point does not fall within any of the related shapes if score == self.location_quality: score = -1 self.location_quality = score
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP=0.6): segm2chars = 0 for k in range(len(gt_rects)): gt_rect = gt_rects[k] best_match = 0 best_match_line = 0 if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation: continue best_match2 = 0 for detId in range(segmentations.shape[0]): rectn = segmentations[detId, :] rect_int = utils.intersect(rectn, gt_rect) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_rect)) ratio = int_area / float(union_area) if ratio > best_match: best_match = ratio if ratio > best_match_line and rectn[7] == 1.0: best_match_line = ratio gt_rect[5] = best_match if best_match < MIN_SEGM_OVRLAP: if k < len(gt_rects) - 1: gt_rect2 = gt_rects[k + 1] chars2Rect = utils.union(gt_rect2, gt_rect) rect_int = utils.intersect(rectn, chars2Rect) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, chars2Rect)) ratio = int_area / float(union_area) if ratio > best_match2: if ratio > MIN_SEGM_OVRLAP: segm2chars += 1 best_match2 = ratio gt_rect[5] = ratio gt_rect2[5] = ratio
def main(): cam = cv2.VideoCapture(0) # load data embed_file_path = "data/embed.pkl" face_database = loadData(embed_file_path) # load models device = "MYRIAD" plugin = IEPlugin(device, plugin_dirs=None) face_embed = FaceEmbedding(plugin) face_detect = MobileFaceDetect(plugin) # params config = configparser.ConfigParser() config.read("config.ini") fm_threshold = float(config["FACE_MATCH"]['Threshold']) label = "new" while (True): ret, frame = cam.read() if not ret: print("dead cam") cam = cv2.VideoCapture(0) if cv2.waitKey(1) & 0xFF == ord('q'): break continue face_bboxes = face_detect.inference(frame) if len(face_bboxes) > 0: areas = [area(box) for box in face_bboxes] max_id = np.argmax(np.asarray(areas)) mfb = face_bboxes[max_id] main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :] # TODO real face detection # TODO face alignment # face_feature = face_embed(main_face, fe_net, fe_input_blob) # s = time.time() face_feature = face_embed.inference(main_face) # print(time.time() - s) # TODO face record best_match = bruteforce(face_feature, face_database, fm_threshold) if best_match is None: label = "new" else: label = str(best_match['id']) # visualize for debug cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]), (255, 0, 0), 2) cv2.putText(frame, label, (mfb[0], mfb[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), lineType=cv2.LINE_AA) cv2.imshow("gandalf", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP = 0.6): segm2chars = 0 for k in range(len(gt_rects)): gt_rect = gt_rects[k] best_match = 0 best_match_line = 0 if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation: continue best_match2 = 0 for detId in range(segmentations.shape[0]): rectn = segmentations[detId, :] rect_int = utils.intersect( rectn, gt_rect ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_rect)) ratio = int_area / float(union_area) if ratio > best_match: best_match = ratio if ratio > best_match_line and rectn[7] == 1.0 : best_match_line = ratio gt_rect[5] = best_match if best_match < MIN_SEGM_OVRLAP: if k < len(gt_rects) - 1: gt_rect2 = gt_rects[k + 1] chars2Rect = utils.union(gt_rect2, gt_rect) rect_int = utils.intersect( rectn, chars2Rect ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, chars2Rect)) ratio = int_area / float(union_area) if ratio > best_match2: if ratio > MIN_SEGM_OVRLAP: segm2chars += 1 best_match2 = ratio gt_rect[5] = ratio gt_rect2[5] = ratio
def fradius(theta, a, b, X, Y): ph = [_tr(theta, b / a, X[i], Y[i]) for i in range(3)] PX = [p[0] for p in ph] PY = [p[1] for p in ph] du = distsq([PX[0], PX[1]], [PY[0], PY[1]]) dv = distsq([PX[1], PX[2]], [PY[1], PY[2]]) dw = distsq([PX[0], PX[2]], [PY[0], PY[2]]) A = area(PX, PY) return b**2 * 16 * A**2 - du * dv * dw
def is_included_in_nutrition_table(self, word): """Compute intersection area between a word area and nutrition table area and return a boolean to check if percentage of the word's area included in nutritional table is greater than 50% Keyword arguments: word -- Word """ #Get word bounding_box vetices word_vertices = word.bounding_box #Find straight circumscribed rectangle bounding_box = circumscribed_rectangle(word_vertices) #Get inclusion score un nutrition table = area of intersection / area of word bounding box inclusion_score = intersection_area( bounding_box, self.table_bbx) / area(bounding_box) return inclusion_score > 0.5
def remove_duplicate_plates(plates, annotations_history): # TODO document excluded_plates_indexes = list() unique_plates_list = list() for i in range(len(plates)): # If it has been already excluded, skip if i in excluded_plates_indexes: continue p1 = plates[i] for j in range(i + 1, len(plates)): p2 = plates[j] # Handle collision if area(p1['bounding_box'], p2['bounding_box']) is not None: # If they're the same skip the other one if p1['plate_text'] == p2['plate_text']: excluded_plates_indexes.append(j) if p1['valid_plate'] and not p2['valid_plate']: # If the first one is valid and the second one isn't, # remove the second one. excluded_plates_indexes.append(j) elif not p1['valid_plate'] and p2['valid_plate']: # If the first one is NOT valid and the second one is # valid, break the inner loop, thus preventing the first # one to be added (it's the else part of the loop). break else: # Choose the one that appeared more frequently in the past # TODO excluded_plates_indexes.append(j) else: unique_plates_list.append(p1) return unique_plates_list
def process_batch(nets, optim, optim2, image_size, args): global it, mean_loss, mean_rec net, net_ctc = nets net = net.net net_ctc = net_ctc.net net.blobs['data'].reshape(args.batch_size,1,image_size[1],image_size[0]) net.reshape() it += 1 optim2.step(1) im = net.blobs['data'].data[...] draw = np.swapaxes(im,2,3) draw = np.swapaxes(draw,1,3) im_ctc = np.copy(draw) draw += 1 draw *= 128 draw = np.array(draw, dtype="uint8").copy() if args.debug: grid_step = 16 line = 0 while line < image_size[0]: cv2.line(draw[0], (0, line), (image_size[1], line), (128, 128, 128)) line += grid_step boxes = net.blobs['boxes'].data[...] word_gtob = net.blobs['gt_boxes'].data[...] word_txt = net.blobs['gt_labels'].data[...] lines_gtob = net.blobs['line_boxes'].data[...] lines_txt = net.blobs['line_labels'].data[...] #nms = boxeso[:, 0, 0, 8] == 0 #boxes = boxes[:, :, nms, :] boxes[:, 0, :, 0] *= image_size[0] boxes[:, 0, :, 1] *= image_size[1] normFactor = math.sqrt(image_size[1] * image_size[1] + image_size[0] * image_size[0]) boxes[:, 0, :, 2] *= normFactor boxes[:, 0, :, 3] *= normFactor sum_cost = 0 count = 0 labels_gt = [] labels_det = [] gt_to_detection = {} net_ctc.clear_param_diffs() batch_buckets = [] dummy = {} matched_detections = 0 for bid in range(im.shape[0]): o_image = net.layers[0].get_image_file_name(bid) o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE) cx = net.layers[0].get_crop(bid, 0) cy = net.layers[0].get_crop(bid, 1) cmx = net.layers[0].get_crop(bid, 2) cmy = net.layers[0].get_crop(bid, 3) o_image = o_image[cy:cmy, cx:cmx] boxes_count = 0 for i in range(0, boxes.shape[2]): det_word = boxes[bid, 0, i] if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01: break boxes_count += 1 x = [i for i in range(boxes_count)] #random.shuffle(x) bucket_images = {} batch_buckets.append(bucket_images) word_gto = word_gtob[bid] word_gto_txt = word_txt[bid] gt_count = 0 for gt_no in range(word_gto.shape[0]): gt = word_gto[gt_no, :] gt = gt.reshape(6) gtnum = 1000 * bid + gt_no if gt[5] == -1: #print("ignore gt!") continue gt_count += 1 txt = word_gto_txt[gt_no, :] gtbox = ((gt[0] * image_size[0], gt[1] * image_size[1]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) gtbox = cv2.boxPoints(gtbox) gtbox = np.array(gtbox, dtype="int") rect_gt = cv2.boundingRect(gtbox) if rect_gt[0] == 0 or rect_gt[1] == 0 or rect_gt[0] + rect_gt[2] >= image_size[0] or rect_gt[1] + rect_gt[3] >= image_size[1]: continue if gt[3] * normFactor < 3: if args.debug: #print('too small gt!') vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 0)) cv2.imshow('draw', draw[bid]) continue if args.debug: vis.draw_box_points(draw[bid], gtbox, color = (0, 0, 0), thickness=2) #vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 255)) #cv2.imshow('draw', draw[bid]) rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]] rect_gt[2] += rect_gt[0] rect_gt[3] += rect_gt[1] for i in range(0, min(100, boxes_count)): if math.fabs(gt[4] - det_word[4]) > math.pi / 16: continue det_word = boxes[bid, 0, x[i], :] if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01: break box = ((det_word[0], det_word[1]), (det_word[2], det_word[3]), det_word[4] * 180 / 3.14) box = cv2.boxPoints(box) if args.debug: boxp = np.array(box, dtype="int") vis.draw_box_points(draw[bid], boxp, color = (0, 255, 0)) box = np.array(box, dtype="int") bbox = cv2.boundingRect(box) bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] bbox[2] += bbox[0] bbox[3] += bbox[1] #rectangle intersection ... inter = intersect(bbox, rect_gt) uni = union(bbox, rect_gt) ratio = area(inter) / float(area(uni)) ratio_gt = area(inter) / float(area(rect_gt)) if ratio_gt < 0.95: continue if ratio < 0.5: continue if not gt_to_detection.has_key(gtnum): gt_to_detection[gtnum] = [0, 0, 0] tupl = gt_to_detection[gtnum] if tupl[0] < ratio: tupl[0] = ratio tupl[1] = x[i] tupl[2] = ratio_gt det_word = boxes[bid, 0, x[i], :] box = ([det_word[0], det_word[1]], [det_word[2], det_word[3]], det_word[4] * 180 / 3.14) boxO = get_obox(im_ctc[bid], o_image, box) boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]), boxO[2]) norm2, rot_mat = get_normalized_image(o_image, boxO) #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14)) if norm2 is None: continue #if norm3 is None: # continue #continue #cv2.imshow('ts', norm2) #cv2.imshow('ts3', norm3) #cv2.waitKey(1) width_scale = 32.0 / norm2.shape[0] width = norm2.shape[1] * width_scale best_diff = width bestb = 0 for b in range(0, len(buckets)): if best_diff > abs(width * 1.3 - buckets[b]): best_diff = abs(width * 1.3 - buckets[b]) bestb = b scaled = cv2.resize(norm2, (buckets[bestb], 32)) scaled = np.asarray(scaled, dtype=np.float) delta = scaled.max() - scaled.min() scaled = (scaled) / (delta / 2) scaled -= scaled.mean() if not bucket_images.has_key(bestb): bucket_images[bestb] = {} bucket_images[bestb]['img'] = [] bucket_images[bestb]['sizes'] = [] bucket_images[bestb]['txt'] = [] bucket_images[bestb]['gt_enc'] = [] dummy[bestb] = 1 else: if args.debug and len(bucket_images[bestb]) > 4: continue elif len(bucket_images[bestb]) > 32: continue gt_labels = [] txt_enc = '' for k in range(txt.shape[1]): if txt[0, k] > 0: if codec_rev.has_key(txt[0, k]): gt_labels.append( codec_rev[txt[0, k]] ) else: gt_labels.append( 3 ) txt_enc += unichr(txt[0, k]) else: gt_labels.append( 0 ) if scaled.ndim == 3: scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY) if args.debug: cv2.imshow('scaled', scaled) bucket_images[bestb]['sizes'].append(len(gt_labels)) bucket_images[bestb]['gt_enc'].append(gt_labels) bucket_images[bestb]['txt'].append(txt_enc) bucket_images[bestb]['img'].append(scaled) matched_detections += 1 #and learn OCR for bucket in bucket_images.keys(): imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float) imtf = np.reshape(imtf, (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2])) #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1)) #imtf = np.swapaxes(imtf,1,3) net_ctc.blobs['data'].reshape(imtf.shape[0],imtf.shape[1],imtf.shape[2], imtf.shape[3]) net_ctc.blobs['data'].data[...] = imtf labels = bucket_images[bucket]['gt_enc'] txt = bucket_images[bucket]['txt'] max_len = 0 for l in range(0, len(labels)): max_len = max(max_len, len(labels[l])) for l in range(0, len(labels)): while len(labels[l]) < max_len: labels[l].append(0) labels = np.asarray(labels, np.float) net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1]) net_ctc.blobs['label'].data[...] = labels if args.debug: vis.vis_square(imtf[0]) cv2.imshow('draw', draw[0]) cv2.waitKey(5) optim.step(1) sum_cost += net_ctc.blobs['loss'].data[...] if net_ctc.blobs['loss'].data[...] > 10: vis.vis_square(imtf[0]) cv2.imshow('draw', draw[0]) sf = net_ctc.blobs['transpose'].data[...] labels2 = sf.argmax(3) out = utils.print_seq(labels2[:, 0, :]) print(u'{0} - {1}'.format(out, txt[0]) ) cv2.waitKey(5) count += imtf.shape[0] correct_cout = 0 for i in range(len(labels_gt)): det_text = labels_det[i] gt_text = labels_gt[i] if it % 100 == 0: print( u"{0} - {1}".format(det_text, gt_text).encode('utf8') ) if det_text == gt_text: correct_cout += 1 count = max(count, 1) mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float(max(1, len(labels_gt))) #count detection ratio tp = 0 for bid in range(im.shape[0]): word_gto = word_gtob[bid] for gt_no in range(len(word_gto)): gt = word_gto[gt_no] gtnum = 1000 * bid + gt_no if gt_to_detection.has_key(gtnum): tupl = gt_to_detection[gtnum] if tupl[0] > 0.5: tp += 1 loc_recall = tp / float(max(1, gt_count)) if args.debug: cv2.imshow('draw', draw[0]) if im.shape[0] > 1: cv2.imshow('draw2', draw[1]) cv2.waitKey(10) if it % 10 == 0: print('{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}'.format(it, 0.0001, sum_cost / count, mean_loss, correct_cout / float(max(1, len(labels_gt))), mean_rec, loc_recall, matched_detections)) if it % 1000 == 0: optim.snapshot() optim2.snapshot()
def evaluate_image(batch, detections, word_gto, iou_th=0.3, iou_th_vis=0.5, iou_th_eval=0.4): ''' Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2). Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true positives, number of detection true positives, number of GT entries to be considered for evaluation are computed. Parameters ---------- iou_th_eval : float Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes iou_th_vis : float Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser. iou_th : float Threshold value of intersection-over-union between GT and prediction. word_gto : list of lists List of ground-truth bounding boxes along with transcription. batch : list of lists List containing data (input image, image file name, ground truth). detections : tuple of tuples Tuple of predicted bounding boxes along with transcriptions and text/no-text score. Returns ------- tp : int Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval. tp_e2e : int Number of predicted bounding-boxes having same transciption as GT and len > 2. gt_e2e : int Number of GT entries for which transcription len > 2. ''' gt_to_detection = {} tp = 0 tp_e2e = 0 gt_e2e = 0 draw = batch[4][0] normFactor = math.sqrt( draw.shape[1] * draw.shape[1] + draw.shape[0] * draw.shape[0]) # Normalization factor for i in range(0, len(detections)): det = detections[i] boxr = det[0] box = cv2.boxPoints(boxr) # Predicted bounding-box parameters box = np.array( box, dtype="int") # Convert predicted bounding-box to numpy array bbox = cv2.boundingRect(box) bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] bbox[2] += bbox[0] # Convert width to right-coordinate bbox[3] += bbox[1] # Convert height to bottom-coordinate vis.draw_box_points(draw, box, color=(255, 0, 0)) det_text = det[1][0] # Predicted transcription for bounding-box #print(det_text) for gt_no in range(len(word_gto)): gt = word_gto[gt_no] txt = gt[5] # GT transcription for given GT bounding-box gtbox = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) # Re-scaling GT values gtbox = cv2.boxPoints(gtbox) gtbox = np.array(gtbox, dtype="int") rect_gt = cv2.boundingRect(gtbox) rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]] rect_gt[2] += rect_gt[0] # Convert GT width to right-coordinate rect_gt[3] += rect_gt[1] # Convert GT height to bottom-coordinate inter = intersect( bbox, rect_gt) # Intersection of predicted and GT bounding-boxes uni = union(bbox, rect_gt) # Union of predicted and GT bounding-boxes ratio = area(inter) / float(area( uni)) # IoU measure between predicted and GT bounding-boxes # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required) # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold if ratio > iou_th: vis.draw_box_points(draw, box, color=(0, 128, 0)) if not gt_to_detection.has_key(gt_no): gt_to_detection[gt_no] = [0, 0] if txt.lower() == det_text.lower(): to_cls_x.append( [len(det_text), det[1][1], det[1][2], det[1][3]]) to_cls_y.append(1) vis.draw_box_points(draw, box, color=(0, 255, 0), thickness=2) gt[7] = 1 # Change this parameter to 1 when predicted transcription is correct. if ratio < iou_th_vis: vis.draw_box_points(draw, box, color=(255, 255, 255), thickness=2) cv2.imshow('draw', draw) #cv2.waitKey(0) else: to_cls_x.append( [len(det_text), det[1][1], det[1][2], det[1][3]]) to_cls_y.append(0) tupl = gt_to_detection[gt_no] if tupl[0] < ratio: tupl[0] = ratio tupl[1] = i # Count the number of end-to-end and detection true-positives for gt_no in range(len(word_gto)): gt = word_gto[gt_no] txt = gt[5] if len(txt) > 2: gt_e2e += 1 if gt[7] == 1: tp_e2e += 1 if gt_to_detection.has_key(gt_no): tupl = gt_to_detection[gt_no] if tupl[0] > iou_th_eval: # Increment detection true-positive, if IoU is greater than iou_th_eval tp += 1 cv2.imshow('draw', draw) return tp, tp_e2e, gt_e2e
def evaluate_image(batch, detections, word_gto, iou_th=0.3, iou_th_vis=0.5, iou_th_eval=0.4): ''' Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2). Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true positives, number of detection true positives, number of GT entries to be considered for evaluation are computed. Parameters ---------- iou_th_eval : float Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes iou_th_vis : float Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser. iou_th : float Threshold value of intersection-over-union between GT and prediction. word_gto : list of lists List of ground-truth bounding boxes along with transcription. batch : list of lists List containing data (input image, image file name, ground truth). detections : tuple of tuples Tuple of predicted bounding boxes along with transcriptions and text/no-text score. Returns ------- tp : int Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval. tp_e2e : int Number of predicted bounding-boxes having same transciption as GT and len > 2. gt_e2e : int Number of GT entries for which transcription len > 2. ''' gt_to_detection = {} tp = 0 tp_e2e = 0 gt_e2e = 0 draw = batch[4][0] normFactor = math.sqrt(draw.shape[1] * draw.shape[1] + draw.shape[0] * draw.shape[0]) # Normalization factor for i in range(0, len(detections)): det = detections[i] boxr = det[0] box = cv2.boxPoints(boxr) # Predicted bounding-box parameters box = np.array(box, dtype="int") # Convert predicted bounding-box to numpy array bbox = cv2.boundingRect(box) bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] bbox[2] += bbox[0] # Convert width to right-coordinate bbox[3] += bbox[1] # Convert height to bottom-coordinate vis.draw_box_points(draw, box, color = (255, 0, 0)) det_text = det[1][0] # Predicted transcription for bounding-box #print(det_text) for gt_no in range(len(word_gto)): gt = word_gto[gt_no] txt = gt[5] # GT transcription for given GT bounding-box gtbox = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) # Re-scaling GT values gtbox = cv2.boxPoints(gtbox) gtbox = np.array(gtbox, dtype="int") rect_gt = cv2.boundingRect(gtbox) rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]] rect_gt[2] += rect_gt[0] # Convert GT width to right-coordinate rect_gt[3] += rect_gt[1] # Convert GT height to bottom-coordinate inter = intersect(bbox, rect_gt) # Intersection of predicted and GT bounding-boxes uni = union(bbox, rect_gt) # Union of predicted and GT bounding-boxes ratio = area(inter) / float(area(uni)) # IoU measure between predicted and GT bounding-boxes # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required) # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold if ratio > iou_th: vis.draw_box_points(draw, box, color = (0, 128, 0)) if not gt_to_detection.has_key(gt_no): gt_to_detection[gt_no] = [0, 0] if txt.lower() == det_text.lower(): to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]]) to_cls_y.append(1) vis.draw_box_points(draw, box, color = (0, 255, 0), thickness=2) gt[7] = 1 # Change this parameter to 1 when predicted transcription is correct. if ratio < iou_th_vis: vis.draw_box_points(draw, box, color = (255, 255, 255), thickness=2) cv2.imshow('draw', draw) #cv2.waitKey(0) else: to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]]) to_cls_y.append(0) tupl = gt_to_detection[gt_no] if tupl[0] < ratio: tupl[0] = ratio tupl[1] = i # Count the number of end-to-end and detection true-positives for gt_no in range(len(word_gto)): gt = word_gto[gt_no] txt = gt[5] if len(txt) > 2: gt_e2e += 1 if gt[7] == 1: tp_e2e += 1 if gt_to_detection.has_key(gt_no): tupl = gt_to_detection[gt_no] if tupl[0] > iou_th_eval: # Increment detection true-positive, if IoU is greater than iou_th_eval tp += 1 cv2.imshow('draw', draw) return tp, tp_e2e, gt_e2e
def run_evaluation(inputDir, outputDir, invert=False, isFp=False): if not os.path.exists(outputDir): os.mkdir(outputDir) images = glob.glob('{0}/*.jpg'.format(inputDir)) images.extend(glob.glob('{0}/*.JPG'.format(inputDir))) images.extend(glob.glob('{0}/*.png'.format(inputDir))) segmDir = '{0}/segmentations'.format(inputDir) for image in images: print('Processing {0}'.format(image)) img = cv2.imread(image, 0) imgc = cv2.imread(image) imgproc = img imgKp = np.copy(img) imgKp.fill(0) baseName = os.path.basename(image) baseName = baseName[:-4] workPoint = 0.3 segmentations = ftext.getCharSegmentations( imgproc) #, outputDir, baseName) segmentations = segmentations[:, 0:10] segmentations = np.column_stack([ segmentations, np.zeros((segmentations.shape[0], 2), dtype=np.float) ]) maskDuplicates = segmentations[:, 8] == -1 segmentationsDuplicates = segmentations[maskDuplicates, :] maskNoNei = segmentationsDuplicates[:, 9] > workPoint segmentationsNoNei = segmentationsDuplicates[maskNoNei, :] keypoints = ftext.getLastDetectionKeypoints() imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255 scales = ftext.getImageScales() statc = ftext.getDetectionStat() words = ftext.findTextLines() segmLine = segmentations[segmentations[:, 7] == 1.0, :] segmentations[:, 2] += segmentations[:, 0] segmentations[:, 3] += segmentations[:, 1] if isFp: for detId in range(0, segmentations.shape[0]): ftext.acummulateCharFeatures(0, detId) continue lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) if not os.path.exists(lineGt): lineGt = '{0}/{1}.txt'.format(inputDir, baseName) lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) if os.path.exists(lineGt): try: word_gt = utls.read_icdar2013_txt_gt(lineGt) except ValueError: try: word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',') except ValueError: word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',') else: lineGt = '{0}/{1}.txt'.format(inputDir, baseName) word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',') rWcurrent = 0.0 for gt_box in word_gt: if len(gt_box[4]) == 1: continue best_match = 0 cv2.rectangle(imgc, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), (0, 255, 0)) for det_word in words: rect_int = utils.intersect(det_word, gt_box) int_area = utils.area(rect_int) union_area = utils.area(utils.union(det_word, gt_box)) if union_area == 0: continue ratio = int_area / float(union_area) det_word[11] = max(det_word[11], ratio) if ratio > best_match: best_match = ratio rWcurrent += best_match best_match = 0 for detId in range(segmentations.shape[0]): rectn = segmentations[detId, :] rect_int = utils.intersect(rectn, gt_box) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_box)) ratio = int_area / float(union_area) rectn[11] = max(ratio, rectn[11]) if ratio > best_match: best_match = ratio if ratio > 0.7: #print( "Word Match!" ) #tmp = ftext.getSegmentationMask(detId) #cv2.imshow("ts", tmp) #cv2.waitKey(0) ftext.acummulateCharFeatures(2, detId) segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName) if not os.path.exists(segmImg): segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName) if not os.path.exists(segmImg): segmImg = '{0}/{1}.png'.format(segmDir, baseName) segmImg = cv2.imread(segmImg, 0) if invert and segmImg is not None: segmImg = ~segmImg gt_rects = [] miss_rects = [] segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName) if os.path.exists(segmGt) and False: (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt) segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName) if not os.path.exists(segmImg): segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName) segmImg = cv2.imread(segmImg) else: contours = cv2.findContours(np.copy(segmImg), mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)[1] for cont in contours: rect = cv2.boundingRect(cont) rect = [ rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3], '?', 0, 0 ] gt_rects.append(rect) for detId in range(segmentations.shape[0]): rectn = segmentations[detId, :] for k in range(len(gt_rects)): gt_rect = gt_rects[k] best_match = 0 best_match_line = 0 if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation: continue minSingleOverlap = MIN_SEGM_OVRLAP if gt_rect[4] == 'i' or gt_rect[4] == '!': minSingleOverlap = 0.5 rect_int = utils.intersect(rectn, gt_rect) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_rect)) ratio = int_area / float(union_area) rectn[10] = max(ratio, rectn[10]) if rectn[9] > workPoint: gt_rect[6] = max(ratio, gt_rect[6]) if ratio > best_match: best_match = ratio if ratio > best_match_line and rectn[7] == 1.0: best_match_line = ratio if ratio > minSingleOverlap: ftext.acummulateCharFeatures(1, detId) if ratio < minSingleOverlap: if k < len(gt_rects) - 1: gt_rect2 = gt_rects[k + 1] chars2Rect = utils.union(gt_rect2, gt_rect) rect_int = utils.intersect(rectn, chars2Rect) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, chars2Rect)) ratio = int_area / float(union_area) rectn[10] = max(ratio, rectn[10]) if ratio > 0.8: best_match2 = ratio gt_rect[5] = ratio gt_rect2[5] = ratio ftext.acummulateCharFeatures(2, detId) thickness = 1 color = (255, 0, 255) if best_match >= minSingleOverlap: color = (0, 255, 0) if best_match > 0.7: thickness = 2 cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]), (gt_rect[2], gt_rect[3]), color, thickness) if rectn[10] == 0 and rectn[11] == 0: ftext.acummulateCharFeatures(0, detId) '''
def run_words(inputDir, outputDir, invert=False): if not os.path.exists(outputDir): os.mkdir(outputDir) #images = glob.glob('{0}/*.png'.format('/datagrid/personal/TextSpotter/evaluation-sets/MS-text_database')) #images = glob.glob('{0}/*.jpg'.format('/datagrid/personal/TextSpotter/evaluation-sets/neocr_dataset')) images = glob.glob('{0}/*.jpg'.format(inputDir)) images.extend(glob.glob('{0}/*.JPG'.format(inputDir))) images.extend(glob.glob('{0}/*.png'.format(inputDir))) matched_words = 0 word_count = 0 for image in sorted(images): print('Processing {0}'.format(image)) img = cv2.imread(image, 0) imgc = cv2.imread(image) imgproc = img imgKp = np.copy(img) imgKp.fill(0) baseName = os.path.basename(image) baseName = baseName[:-4] workPoint = 0.3 segmentations = ftext.getCharSegmentations( imgproc) #, outputDir, baseName) segmentations = segmentations[:, 0:10] segmentations = np.column_stack([ segmentations, np.zeros((segmentations.shape[0], 2), dtype=np.float) ]) maskDuplicates = segmentations[:, 8] == -1 segmentationsDuplicates = segmentations[maskDuplicates, :] maskNoNei = segmentationsDuplicates[:, 9] > workPoint keypoints = ftext.getLastDetectionKeypoints() imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255 scales = ftext.getImageScales() statc = ftext.getDetectionStat() words = ftext.findTextLines() segmentations[:, 2] += segmentations[:, 0] segmentations[:, 3] += segmentations[:, 1] lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) if not os.path.exists(lineGt): lineGt = '{0}/{1}.txt'.format(inputDir, baseName) lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) if os.path.exists(lineGt): try: word_gt = utls.read_icdar2013_txt_gt(lineGt) except ValueError: try: word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',') except ValueError: word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',') else: lineGt = '{0}/{1}.txt'.format(inputDir, baseName) word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',') cw = 0 for detId in range(segmentations.shape[0]): best_match = 0 for gt_box in word_gt: if len(gt_box[4]) == 1: continue if gt_box[4][0] == "#": continue cw += 1 rectn = segmentations[detId, :] rect_int = utils.intersect(rectn, gt_box) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_box)) ratio = int_area / float(union_area) rectn[11] = max(ratio, rectn[11]) if ratio > best_match: best_match = ratio if ratio > 0.7: #print( "Word Match!" ) #cv2.rectangle(imgc, (rectn[0], rectn[1]), (rectn[2], rectn[3]), (0, 255, 0)) #cv2.imshow("ts", imgc) #cv2.waitKey(0) ftext.acummulateCharFeatures(2, detId) if gt_box[5] != -1: matched_words += 1 gt_box[5] = -1 if best_match == 0: ftext.acummulateCharFeatures(0, detId) word_count += cw print("word recall: {0}".format(matched_words / float(word_count)))
def serve(self): face_rec_delay = time.time() no_face_frame = 0 cv2.namedWindow('gandalf', cv2.WINDOW_NORMAL) cv2.resizeWindow('gandalf', int(self.cw * self.window_mult), int(self.ch * self.window_mult)) cv2.setWindowProperty('gandalf', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cam = cv2.VideoCapture(0) # cam = cv2.VideoCapture(0 + cv2.CAP_DSHOW) # label = "Hello" while (True): ret, frame = cam.read() # h, w, c = frame.shape # print(h, w, c) if not ret: # dead cam cam = cv2.VideoCapture(0) time.sleep(3.000) # some delay to init cam # cam = cv2.VideoCapture(0 + cv2.CAP_DSHOW) continue # cv2.destroyAllWindows() # return 1 if self.test_face: frame[120:300, 280:400, :] = self.face_img # add test face on frame # go from standby to face detection phase # if no_face_frame > self.no_face_frame_limit: # # no error # cv2.destroyAllWindows() # return 0 # face detection phase face_bboxes = self.face_detect.inference(frame) if len(face_bboxes) > 0: no_face_frame = 0 areas = [area(box) for box in face_bboxes] max_id = np.argmax(np.asarray(areas)) mfb = face_bboxes[max_id] # print(area(mfb), self.face_min_size, frame.shape) # face consolidation phase, calculate face angle if area(mfb) > self.face_min_size: x0 = max(0, mfb[0] - self.face_margin) y0 = max(0, mfb[1] - self.face_margin) x1 = min(self.ch, mfb[2] + self.face_margin) y1 = min(self.cw, mfb[3] + self.face_margin) main_head = frame[y0:y1, x0:x1, :] # detect blurry face h, w, c = main_head.shape # print("hp img shape: ", img.shape) if (h > 0) and (w > 0): blur_face = cv2.resize(main_head, (112, 112)) blur_face_var = cv2.Laplacian(blur_face, cv2.CV_64F).var() if blur_face_var < self.face_lap_min_score: cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]), (255, 0, 0), 2) face_rec_delay_amount = time.time( ) - face_rec_delay if face_rec_delay_amount > self.recognition_delay: frame = self.display_draw.drawLACText(frame) else: frame = self.display_draw.drawLastText(frame) # label = "please look at the camera" cv2.imshow("gandalf", frame) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return 2 continue # detect head pose yaw, pitch, roll = self.head_pose.inference(main_head) if not good_head_angle(yaw, pitch, roll, self.angle_min, self.angle_max): cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]), (255, 0, 0), 2) face_rec_delay_amount = time.time() - face_rec_delay if face_rec_delay_amount > self.recognition_delay: frame = self.display_draw.drawLACText(frame) else: frame = self.display_draw.drawLastText(frame) cv2.imshow("gandalf", frame) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return 2 continue # TODO: face liveness detection else: # face too small face_rec_delay_amount = time.time() - face_rec_delay if face_rec_delay_amount > self.recognition_delay: frame = self.display_draw.drawMCText(frame) else: frame = self.display_draw.drawLastText(frame) cv2.imshow("gandalf", frame) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return 2 continue # face recognition phase face_rec_delay_amount = time.time() - face_rec_delay if face_rec_delay_amount >= self.recognition_delay: main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :] # TODO face alignment face_feature = self.face_embed.inference(main_face) best_match = bruteforce(face_feature, self.face_database, self.fm_threshold) # TODO face record if best_match is None: new_log = { 'result': 'failed', 'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S') } self.updateLog(new_log, main_face) self.display_draw.drawFailedText(frame) self.recognition_delay = self.recog_fai_delay else: self.callDoorControllerSocket() new_log = { 'result': 'success', 'face_id': best_match['_id'], 'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S') } self.updateLog(new_log, main_face) self.display_draw.drawSuccessText( frame, str(best_match['name'])) self.recognition_delay = self.recog_suc_delay face_rec_delay = time.time() cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]), (255, 0, 0), 2) else: no_face_frame += 1 face_rec_delay_amount = time.time() - face_rec_delay if face_rec_delay_amount > self.recognition_delay: frame = self.display_draw.drawDefaultText(frame) else: frame = self.display_draw.drawLastText(frame) cv2.imshow("gandalf", frame) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return 2 elif cv2.waitKey(1) & 0xFF == ord('t'): self.test_face = True elif cv2.waitKey(1) & 0xFF == ord('y'): self.test_face = False
def process_batch(nets, optim, optim2, image_size, args): global it, mean_loss, mean_rec it += 1 # 迭代次数加一 net, net_ctc = nets net = net.net net_ctc = net_ctc.net net.blobs['data'].reshape(args.batch_size, 1, image_size[1], image_size[0]) # 把一个batch的输入图片reshape net.reshape() optim2.step(1) im = net.blobs['data'].data[...] # shape [batch_size,1,416,416] draw = np.swapaxes(im, 2, 3) draw = np.swapaxes(draw, 1, 3) im_ctc = np.copy(draw) draw += 1 draw *= 128 draw = np.array(draw, dtype="uint8").copy() if args.debug: grid_step = 16 line = 0 while line < image_size[0]: cv2.line(draw[0], (0, line), (image_size[1], line), (128, 128, 128)) line += grid_step boxes = net.blobs['boxes'].data[...] # shape (4, 1, 500, 15) word_gtob = net.blobs['gt_boxes'].data[...] # shape (4, 6, 1, 6) word_txt = net.blobs['gt_labels'].data[...] # shape (4, 6, 1, 14) lines_gtob = net.blobs['line_boxes'].data[...] # shape (4, 1, 1, 5) lines_txt = net.blobs['line_labels'].data[...] # shape (4, 1, 1, 7) #nms = boxeso[:, 0, 0, 8] == 0 #boxes = boxes[:, :, nms, :] boxes[:, 0, :, 0] *= image_size[0] boxes[:, 0, :, 1] *= image_size[1] normFactor = math.sqrt(image_size[1] * image_size[1] + image_size[0] * image_size[0]) boxes[:, 0, :, 2] *= normFactor boxes[:, 0, :, 3] *= normFactor sum_cost = 0 count = 0 labels_gt = [] labels_det = [] gt_to_detection = {} net_ctc.clear_param_diffs() batch_buckets = [] dummy = {} matched_detections = 0 for bid in range(im.shape[0]): # 遍历batchsize下的每一个样本 o_image = net.layers[0].get_image_file_name(bid) o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE) cx = net.layers[0].get_crop(bid, 0) cy = net.layers[0].get_crop(bid, 1) cmx = net.layers[0].get_crop(bid, 2) cmy = net.layers[0].get_crop(bid, 3) o_image = o_image[cy:cmy, cx:cmx] boxes_count = 0 for i in range(0, boxes.shape[2]): det_word = boxes[bid, 0, i] if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01: break boxes_count += 1 x = [i for i in range(boxes_count)] #random.shuffle(x) bucket_images = {} batch_buckets.append(bucket_images) word_gto = word_gtob[bid] word_gto_txt = word_txt[bid] gt_count = 0 for gt_no in range(word_gto.shape[0]): gt = word_gto[gt_no, :] gt = gt.reshape(6) gtnum = 1000 * bid + gt_no if gt[5] == -1: #print("ignore gt!") continue gt_count += 1 txt = word_gto_txt[gt_no, :] gtbox = ((gt[0] * image_size[0], gt[1] * image_size[1]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) gtbox = cv2.boxPoints(gtbox) gtbox = np.array(gtbox, dtype="int") rect_gt = cv2.boundingRect(gtbox) if rect_gt[0] == 0 or rect_gt[ 1] == 0 or rect_gt[0] + rect_gt[2] >= image_size[ 0] or rect_gt[1] + rect_gt[3] >= image_size[1]: continue if gt[3] * normFactor < 3: if args.debug: pass print('too small gt!') continue rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]] rect_gt[2] += rect_gt[0] rect_gt[3] += rect_gt[1] for i in range(0, min(100, boxes_count)): if math.fabs(gt[4] - det_word[4]) > math.pi / 16: continue det_word = boxes[bid, 0, x[i], :] if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01: break box = ((det_word[0], det_word[1]), (det_word[2], det_word[3]), det_word[4] * 180 / 3.14) box = cv2.boxPoints(box) if args.debug: boxp = np.array(box, dtype="int") vis.draw_box_points(draw[bid], boxp, color=(0, 255, 0)) box = np.array(box, dtype="int") bbox = cv2.boundingRect(box) bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] bbox[2] += bbox[0] bbox[3] += bbox[1] #rectangle intersection ... inter = intersect(bbox, rect_gt) uni = union(bbox, rect_gt) ratio = area(inter) / float(area(uni)) ratio_gt = area(inter) / float(area(rect_gt)) if ratio_gt < 0.95: continue if ratio < 0.5: continue if not gt_to_detection.has_key(gtnum): gt_to_detection[gtnum] = [0, 0, 0] tupl = gt_to_detection[gtnum] if tupl[0] < ratio: tupl[0] = ratio tupl[1] = x[i] tupl[2] = ratio_gt det_word = boxes[bid, 0, x[i], :] box = ([det_word[0], det_word[1]], [det_word[2], det_word[3]], det_word[4] * 180 / 3.14) boxO = get_obox(im_ctc[bid], o_image, box) boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]), boxO[2]) norm2, rot_mat = get_normalized_image(o_image, boxO) #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14)) if norm2 is None: continue #if norm3 is None: # continue #continue #cv2.imshow('ts', norm2) #cv2.imshow('ts3', norm3) #cv2.waitKey(1) width_scale = 32.0 / norm2.shape[0] width = norm2.shape[1] * width_scale best_diff = width bestb = 0 for b in range(0, len(buckets)): if best_diff > abs(width * 1.3 - buckets[b]): best_diff = abs(width * 1.3 - buckets[b]) bestb = b scaled = cv2.resize(norm2, (buckets[bestb], 32)) scaled = np.asarray(scaled, dtype=np.float) delta = scaled.max() - scaled.min() scaled = (scaled) / (delta / 2) scaled -= scaled.mean() if not bucket_images.has_key(bestb): bucket_images[bestb] = {} bucket_images[bestb]['img'] = [] bucket_images[bestb]['sizes'] = [] bucket_images[bestb]['txt'] = [] bucket_images[bestb]['gt_enc'] = [] dummy[bestb] = 1 else: if args.debug and len(bucket_images[bestb]) > 4: continue elif len(bucket_images[bestb]) > 32: continue gt_labels = [] txt_enc = '' for k in range(txt.shape[1]): if txt[0, k] > 0: if codec_rev.has_key(txt[0, k]): gt_labels.append(codec_rev[txt[0, k]]) else: gt_labels.append(3) txt_enc += unichr(txt[0, k]) else: gt_labels.append(0) if scaled.ndim == 3: scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY) if args.debug: cv2.imshow('scaled', scaled) bucket_images[bestb]['sizes'].append(len(gt_labels)) bucket_images[bestb]['gt_enc'].append(gt_labels) bucket_images[bestb]['txt'].append(txt_enc) bucket_images[bestb]['img'].append(scaled) matched_detections += 1 #and learn OCR for bucket in bucket_images.keys(): imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float) imtf = np.reshape(imtf, (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2])) #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1)) #imtf = np.swapaxes(imtf,1,3) net_ctc.blobs['data'].reshape(imtf.shape[0], imtf.shape[1], imtf.shape[2], imtf.shape[3]) net_ctc.blobs['data'].data[...] = imtf labels = bucket_images[bucket]['gt_enc'] txt = bucket_images[bucket]['txt'] max_len = 0 for l in range(0, len(labels)): max_len = max(max_len, len(labels[l])) for l in range(0, len(labels)): while len(labels[l]) < max_len: labels[l].append(0) labels = np.asarray(labels, np.float) net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1]) net_ctc.blobs['label'].data[...] = labels if args.debug: vis.vis_square(imtf[0]) cv2.imshow('draw', draw[0]) cv2.waitKey(5) #optim.step(1) sum_cost += net_ctc.blobs['loss'].data[...] if net_ctc.blobs['loss'].data[...] > 10: #vis.vis_square(imtf[0]) #cv2.imshow('draw', draw[0]) sf = net_ctc.blobs['transpose'].data[...] labels2 = sf.argmax(3) out = utils.print_seq(labels2[:, 0, :]) print(u'{0} --- {1}'.format(out, txt[0])) #cv2.waitKey(5) count += imtf.shape[0] correct_cout = 0 for i in range(len(labels_gt)): det_text = labels_det[i] gt_text = labels_gt[i] if it % 100 == 0: pass #print( u"{0} -- {1}".format(det_text, gt_text).encode('utf8') ) if det_text == gt_text: correct_cout += 1 count = max(count, 1) mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float( max(1, len(labels_gt))) #count detection ratio tp = 0 for bid in range(im.shape[0]): word_gto = word_gtob[bid] for gt_no in range(len(word_gto)): gt = word_gto[gt_no] gtnum = 1000 * bid + gt_no if gt_to_detection.has_key(gtnum): tupl = gt_to_detection[gtnum] if tupl[0] > 0.5: tp += 1 loc_recall = tp / float(max(1, gt_count)) if it % 10 == 0: print( '{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}' .format(it, 0.0001, sum_cost / count, mean_loss, correct_cout / float(max(1, len(labels_gt))), mean_rec, loc_recall, matched_detections)) if it % snapshot_interval == 0: #optim.snapshot() optim2.snapshot()
def run_evaluation(inputDir, outputDir, process_color = 0, processTest = 0): if not os.path.exists(outputDir): os.mkdir(outputDir) edgeThreshold = 14 fastex = FASTex(edgeThreshold = edgeThreshold) modelFile = '/home/busta/outModel.boost' model = cv2.Boost() model.load(modelFile) images = glob.glob('{0}/*.jpg'.format(inputDir)) segmDir = '{0}/segmentations'.format(inputDir) precision = 0; precisionDen = 0 recall = 0 recall05 = 0 recallNonMax = 0 recallDen = 0 wordRecall = 0 wordRecallDen = 0 segm2chars = 0 regionsCount = 0 regionsCountNonMax = 0 missing_segmNonMaxCount = 0 letterKeypointHistogram = defaultdict(lambda : defaultdict(float)) octaveLetterKeypointHistogram = defaultdict(lambda : defaultdict(float)) missing_letters = {} letterHistogram = defaultdict(int) missing_segm = {} missing_segm2 = {} missing_segmNonMax = {} diffMaxOctavesMap = {} diffScoreOctavesMap = {} segmHistogram = [] segmWordHistogram = [] results = [] hist = None histFp = None histDist = None histDistFp = None histDistMax = None histDistMaxWhite = None histDistMaxFp = None hist2dDist =None hist2dDistFp = None hist2dDistScore = None hist2dDistScoreFp = None histDistMaxWhiteFp = None histSegm = np.zeros((256), dtype = np.float) histSegmCount = np.zeros((256), dtype = np.int) stat = np.asarray([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=np.float) times = [] gtSegmCount = 0 wordsOk = [] wordsFp = [] keypointsTotal = 0 keypointsTotalInside = 0 orbTime = 0 lineNo = 0 perfectWords = 0; perfectWordsNS = 0; hasSegm = False for image in images: print('Processing {0}'.format(image)) img = cv2.imread(image, 0) imgc = cv2.imread(image) imgcO = cv2.imread(image) if process_color == 1: imgproc = imgc else: imgproc = img baseName = os.path.basename(image) baseName = baseName[:-4] workPoint = 0.3 segmentations = fastex.getCharSegmentations(imgproc, outputDir, baseName) segmentations = segmentations[:, 0:10] segmentations = np.column_stack( [ segmentations , np.zeros( (segmentations.shape[0], 2), dtype = np.float ) ] ) maskDuplicates = segmentations[:, 8] == -1 segmentationsDuplicates = segmentations[maskDuplicates, :] maskNoNei = segmentationsDuplicates[:, 9] > workPoint segmentationsNoNei = segmentationsDuplicates[maskNoNei, :] if segmentations.shape[0] > 0: print( 'Dupl ratio: {0} - {1}/ {2} - {3}'.format(segmentationsDuplicates.shape[0] / float(segmentations.shape[0]), segmentationsDuplicates.shape[0], segmentations.shape[0], segmentationsNoNei.shape[0] ) ) keypoints = fastex.getLastDetectionKeypoints() keypointsTotal += keypoints.shape[0] statc = fastex.getDetectionStat() times.append([ statc[1], statc[2], statc[3], statc[4], statc[5], statc[6], statc[7], statc[8], statc[9], statc[10]]) stat += statc values = img[ keypoints[:, 1].astype(int), keypoints[:, 0].astype(int) ] valuesMax = img[keypoints[:, 6].astype(int), keypoints[:, 5].astype(int)] diffValMax = np.abs(values - valuesMax) regionsCount += segmentations.shape[0] regionsCountNonMax += segmentationsNoNei.shape[0] segmentations[:, 2] += segmentations[:, 0] segmentations[:, 3] += segmentations[:, 1] keypointsOrb = fastex.getLastDetectionOrbKeypoints() orbTime += keypointsOrb[0][9] segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName) pden = 0 rden = 0 if os.path.exists(segmGt): hasSegm = True (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt) segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName) if not os.path.exists(segmImg): segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName) segmImg = cv2.imread(segmImg) try: (hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp, keypointsInside) = collect_histograms(img, segmImg, keypoints, values, diffValMax, keypointsTotalInside, diffMaxOctavesMap, diffScoreOctavesMap, hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp) except: pass rcurrent = 0 rcurrent05 = 0 rcurrentNonMax = 0 for k in range(len(gt_rects)): gt_rect = gt_rects[k] best_match = 0 best_match_line = 0 if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation: continue gtSegmCount += 1 rectMask = np.bitwise_and(np.bitwise_and( keypointsInside[:, 0] >= gt_rect[0], keypointsInside[:, 0] <= gt_rect[2]), np.bitwise_and(keypointsInside[:, 1] >= gt_rect[1], keypointsInside[:, 1] <= gt_rect[3])) letterInside = keypointsInside[rectMask, :] #make keypoints histogram if letterInside.shape[0] > 0: octaves = np.unique( letterInside[:, 2]) maxOctave = np.max(octaves) maxOctavePoints = 0 for i in range(int(maxOctave) + 1): octavePoints = letterInside[letterInside[:, 2] == i, :] maxOctavePoints = max(maxOctavePoints, octavePoints.shape[0]) if maxOctavePoints > 0: octaveLetterKeypointHistogram[gt_rect[4]][0] += 1 if maxOctavePoints > 1: octaveLetterKeypointHistogram[gt_rect[4]][1] += 1 if maxOctavePoints > 2: octaveLetterKeypointHistogram[gt_rect[4]][2] += 1 if maxOctavePoints > 3: octaveLetterKeypointHistogram[gt_rect[4]][3] += 1 if letterInside.shape[0] == 0: if not missing_letters.has_key(gt_rect[4]): missing_letters[gt_rect[4]] = [] missing_letters[gt_rect[4]].append( (image, gt_rect) ) if letterInside.shape[0] > 0: letterKeypointHistogram[gt_rect[4]][0] += 1 if letterInside.shape[0] > 1: letterKeypointHistogram[gt_rect[4]][1] += 1 if letterInside.shape[0] > 2: letterKeypointHistogram[gt_rect[4]][2] += 1 if letterInside.shape[0] > 3: letterKeypointHistogram[gt_rect[4]][3] += 1 letterHistogram[gt_rect[4]] += 1 best_match2 = 0 minSingleOverlap = MIN_SEGM_OVRLAP if gt_rect[4] == 'i' or gt_rect[4] == '!': minSingleOverlap = 0.5 for detId in range(segmentations.shape[0]): rectn = segmentations[detId, :] rect_int = utils.intersect( rectn, gt_rect ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_rect)) ratio = int_area / float(union_area) rectn[10] = max(ratio, rectn[10]) if rectn[9] > workPoint: gt_rect[6] = max(ratio, gt_rect[6]) if ratio > best_match: best_match = ratio best_segm = segmentations[detId, :] if ratio > best_match_line and rectn[7] == 1.0 : best_match_line = ratio if best_match < minSingleOverlap: if k < len(gt_rects) - 1: gt_rect2 = gt_rects[k + 1] chars2Rect = utils.union(gt_rect2, gt_rect) rect_int = utils.intersect( rectn, chars2Rect ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, chars2Rect)) ratio = int_area / float(union_area) rectn[10] = max(ratio, rectn[10]) if ratio > best_match2: if ratio > MIN_SEGM_OVRLAP: segm2chars += 1 best_match2 = ratio gt_rect[5] = ratio gt_rect2[5] = ratio thickness = 1 color = (255, 0, 255) if best_match >= minSingleOverlap: color = (0, 255, 0) if best_match > 0.7: thickness = 2 cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]), (gt_rect[2], gt_rect[3]), color, thickness) recall += best_match recallNonMax += gt_rect[6] if best_match >= minSingleOverlap: recall05 += best_match rcurrent05 += best_match else: if not missing_segm.has_key(image): missing_segm[image] = [] missing_segm[image].append(gt_rect) if gt_rect[5] < MIN_SEGM_OVRLAP: if not missing_segm2.has_key(image): missing_segm2[image] = [] missing_segm2[image].append(gt_rect) segm2chars += 1 if gt_rect[6] < minSingleOverlap: if not missing_segmNonMax.has_key(image): missing_segmNonMax[image] = [] missing_segmNonMax[image].append(gt_rect) missing_segmNonMaxCount += 1 rcurrent += best_match rcurrentNonMax += gt_rect[6] recallDen += 1 rden += 1 if best_match > 0 and process_color != 1: val = img[best_segm[5], best_segm[4]] histSegm[val] += best_match histSegmCount[val] += 1 pcurrent = 0 for detId in range(segmentations.shape[0]): best_match = 0 rectn = segmentations[detId, :] for gt_rect in gt_rects: rect_int = utils.intersect( rectn, gt_rect ) int_area = utils.area(rect_int) union_area = utils.area(utils.union(rectn, gt_rect)) ratio = int_area / float(union_area) if ratio > best_match: best_match = ratio precision += best_match pcurrent += best_match precisionDen += 1 pden += 1 if pden == 0: pcurrent = 0 else: pcurrent = pcurrent / pden if rden == 0: rcurrent = 0 rcurrent05 = 0 rcurrentNonMax = 0 else: rcurrent = rcurrent / rden rcurrent05 = rcurrent05 / rden rcurrentNonMax = rcurrentNonMax / rden segmHistogram.append([ segmentations.shape[0], segmentations[segmentations[:, 10] > 0.4].shape[0], segmentations[segmentations[:, 10] > 0.5].shape[0], segmentations[segmentations[:, 10] > 0.6].shape[0], segmentations[segmentations[:, 10] > 0.7].shape[0] ]) segmWordHistogram.append([segmentations.shape[0], segmentations[np.bitwise_or(segmentations[:, 10] > 0.5, segmentations[:, 11] > 0.5 )].shape[0]]) results.append((baseName, rcurrent, pcurrent, rcurrent05)) if precisionDen == 0: pcurrent = 0 else: precision = precision / precisionDen if recallDen == 0: rcurrent = 0 else: recall = recall / recallDen recall05 = recall05 / recallDen recallNonMax = recallNonMax / recallDen wordRecall = wordRecall / max(1, wordRecallDen) try: histSegm = histSegm / max(1, histSegmCount) except ValueError: pass print('Evalation Results:') print( 'recall: {0}, precision: {1}, recall 0.5: {2}, recall NonMax: {3}'.format(recall, precision, recall05, recallNonMax) ) kpTimes = np.histogram(np.asarray(times)[:, 0], bins=20) print('Keypoint Time Histogram: {0}'.format(kpTimes)) print('Detection statistics:') print(stat) for letter in letterKeypointHistogram.keys(): for num in letterKeypointHistogram[letter].keys(): letterKeypointHistogram[letter][num] = letterKeypointHistogram[letter][num] / float(letterHistogram[letter]) for num in octaveLetterKeypointHistogram[letter].keys(): octaveLetterKeypointHistogram[letter][num] = octaveLetterKeypointHistogram[letter][num] / float(letterHistogram[letter]) letterKeypointHistogram[letter] = dict(letterKeypointHistogram[letter]) octaveLetterKeypointHistogram[letter] = dict(octaveLetterKeypointHistogram[letter]) print('Perfect words: {0}'.format(perfectWords)) eval_date = datetime.date.today() np.savez('{0}/evaluation'.format(outputDir), recall=recall, recall05 = recall05, recallNonMax=recallNonMax, precision=precision, eval_date=eval_date, regionsCount=regionsCount, inputDir = inputDir, hist = hist, histSegm = histSegm, stat=stat, letterKeypointHistogram = dict(letterKeypointHistogram), missing_letters=missing_letters, octaveLetterKeypointHistogram=dict(octaveLetterKeypointHistogram), missing_segm=missing_segm, times=np.asarray(times), histFp = histFp, gtSegmCount = gtSegmCount, wordRecall=wordRecall, histDist=histDist, histDistFp = histDistFp, histDistMax=histDistMax, histDistMaxFp=histDistMaxFp, hist2dDist=hist2dDist, hist2dDistFp=hist2dDistFp, hist2dDistScore=hist2dDistScore, hist2dDistScoreFp=hist2dDistScoreFp, histDistMaxWhite=histDistMaxWhite, histDistMaxWhiteFp=histDistMaxWhiteFp, wordsOk=wordsOk, wordsFp=wordsFp, diffMaxOctavesMap = diffMaxOctavesMap, diffScoreOctavesMap = diffScoreOctavesMap, missing_segm2=missing_segm2, segmHistogram=segmHistogram, segmWordHistogram=segmWordHistogram, regionsCountNonMax=regionsCountNonMax, missing_segmNonMax=missing_segmNonMax) print( "GT segmentations count {0}".format(gtSegmCount) ) print('FasTex Inside {0}/{1} ({2})'.format(keypointsTotalInside, keypointsTotal, keypointsTotalInside / float(keypointsTotal) )) print('FasText time: {0}, Orb time: {1} '.format( np.sum(times, 0)[0], orbTime)) print('2 Chars Segmentation: {0}'.format(segm2chars) ) print('NonMax Regions Count: {0}/{1}'.format(regionsCountNonMax, missing_segmNonMaxCount))
def creat_mask_VOC_labelfile(): with open(gt_label_file_path, 'r') as f: gt_lines = f.readlines() with open(ignore_bbox_file_path, 'r') as f: ign_lines = f.readlines() plt.figure(figsize=(10, 10)) for i in gt_lines: info = i.split(' ') img_origin = pimg.imread(images_root_path + info[0]) img_copy = img_origin.copy() plt.imshow(img_copy, aspect='equal') ign_ = [j.strip().split(' ')[1:] for j in ign_lines if j.split(' ')[0] == info[0][:9]] ign_bboxs = [] if ign_: ign_bboxs = [float(b) for b in ign_[0]] # print(ign_box) ign_bboxs = np.array(ign_bboxs, dtype=np.float32).reshape(-1, 4) for b in ign_bboxs: rect = plt.Rectangle((b[0], b[1]), b[2] - b[0], b[3] - b[1], fill=True, facecolor='black', linewidth=1) plt.gca().add_patch(rect) # add black mask to image img_copy[int(b[1]):int(b[3]), int(b[0]):int(b[2]), :] = 0 #print (ign_bbox) bbox = [float(b) for b in info[1:]] boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) efficient_bboxs = [] # judge the area_intersect of bbox and ing_bbox for b in boxes: bbox_area = area(b) ratio_intersect = 0.0 area_intersect = [] for ign_bbox in ign_bboxs: area_intersect.append(area(intersect(b, ign_bbox))) max_area_intersect = max(area_intersect) ratio_intersect = max_area_intersect / bbox_area if ratio_intersect >= 0.2: continue else: rect = plt.Rectangle((b[0], b[1]), b[2] - b[0], b[3] - b[1], fill=False, edgecolor=(0, 1, 0), linewidth=1) efficient_bboxs.append(b) plt.gca().add_patch(rect) image_new_name = info[0].split('/')[0] + '_' + info[0].split('/')[1] #print (efficient_bboxs) create_VOC_label_file(os.path.join(image_label_save_rootpath, xml_file_path), image_new_name, img_origin.shape, efficient_bboxs) image_save_path = os.path.join(os.path.join(image_label_save_rootpath, image_path), image_new_name) cv2.imwrite(image_save_path, cv2.cvtColor(img_copy, cv2.COLOR_RGB2BGR)) plt.pause(0.01) plt.cla()
def main(): cam = cv2.VideoCapture(0) # load data config = configparser.ConfigParser() config.read("config.ini") face_database, face_collection = loadMongoData(config) # load models device = "MYRIAD" data_dir = "data" plugin = IEPlugin(device, plugin_dirs=None) face_embed = FaceEmbedding(plugin) face_detect = MobileFaceDetect(plugin) # params fd_conf = 0.5 fm_threshold = 0.6 label = "new" period = 1 button_pressed = False max_num = 3 num = 0 face_features = [] face_imgs = [] s = time.time() while (True): ret, frame = cam.read() if not ret: print("dead cam") cam = cv2.VideoCapture(0) if cv2.waitKey(1) & 0xFF == ord('q'): break continue face_bboxes = [] # if time.time() - s > period: if cv2.waitKey(1) & 0xFF == ord('c'): button_pressed = True if button_pressed and (num < max_num): face_bboxes = face_detect.inference(frame) if (len(face_bboxes) > 0) and button_pressed: areas = [area(box) for box in face_bboxes] max_id = np.argmax(np.asarray(areas)) mfb = face_bboxes[max_id] main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :] # TODO real face detection # TODO face alignment face_feature = face_embed.inference(main_face) face_feature = face_feature.tolist() face_features.append(face_feature) face_imgs.append(main_face) num += 1 button_pressed = False s = time.time() # visualize for debug cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]), (255, 0, 0), 2) cv2.putText(frame, str(num), (mfb[0], mfb[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), lineType=cv2.LINE_AA) print(num) if num >= max_num: # add new face features to database new_id = face_database.count() new_face = {'name': str(new_id), 'feats': face_features} p_id = face_collection.insert_one(new_face).inserted_id # commit changes face_collection.update_one({'_id': p_id}, {"$set": new_face}, upsert=False) # save images img_dir = os.path.join(data_dir, str(new_id)) os.mkdir(img_dir) for i, face in enumerate(face_imgs): img_path = os.path.join(img_dir, "{}.jpg".format(i)) cv2.imwrite(img_path, face) face_imgs = [] face_features = [] num = 0 s = time.time() print("done!") cv2.imshow("face registration", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): utils.area() utils.reverse() utils.time_date()