def main(): #----------------initialization---------------- t0 = time.time() plt.ion() index = 0 print("Loaded graph in {:.2f}s".format(time.time() - t0)) capture = cv2.VideoCapture(input_video) # loading pre-trained model and config file net = cv2.dnn.readNet(wieght_arg, config_arg) # start to process ret, _ = capture.read() # Is there any frame to read? while ret: index += 1 ret, frame = capture.read() img = copy.deepcopy(frame) # applying transformation and apropriate changes to frame to feed the loaded model #---- this section is empty in OpenCV ---- t0 = time.time() # feeding tensor to loaded model and obtaining the bounding boxes of detected objects roi_boxes, roi_confidences, roi_class, roi_indices = detection( frame, net) # drawing the boxes around the detected objects and saving the objects simultaneously for i in roi_indices: i = i[0] for j, v in enumerate(roi_boxes[i]): if v < 0: roi_boxes[i][j] = 0 box = roi_boxes[i] x = round(box[0]) y = round(box[1]) w = round(box[2]) h = round(box[3]) # #croping and extracting bounding boxes of detected objects in frames, then save them in './extracted_regions/' Directory cv2.imwrite( './extracted_objects/frame_' + str(index) + '_obj_' + str(i) + '.jpg', img[y:y + h, x:x + w]) draw_bounding_box(frame, roi_class[i], roi_confidences[i], x, y, x + w, y + h) print("Predictions found in {:.2f}s".format(time.time() - t0)) # show results of processing each frame to user plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) plt.xticks([]), plt.yticks([]) plt.pause(0.02) plt.show()
def annotate_image(image, idxs, boxes, COLORS, LABELS, confidences, classIDs): bounding_boxes = [] if len(idxs) > 0: for i in idxs.flatten(): (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) bounding_boxes.append(boxes[i]) color = [int(c) for c in COLORS[classIDs[i]]] label = LABELS[classIDs[i]] draw_bounding_box(image, color, label, confidences[i], x, y, w, h)
def main(): #开始测试 for file in dirs: image_path = os.path.join(text_floder, os.path.basename(file)) #print(os.path.basename(file)) #print(image_path) # 加载灰度图像和灰度图片 rgb_image = load_image(image_path, grayscale=False, color_mode = "rgb") gray_image = load_image(image_path, grayscale=True,color_mode = "grayscale") # 去掉维度为1的维度(只留下宽高,去掉灰度维度) gray_image = np.squeeze(gray_image) gray_image = gray_image.astype("uint8") faces = detect_faces(face_detection, gray_image) #print("-----") #print(len(faces)) for face_coordinates in faces: #获取人脸在图像中的矩形坐标的对角两点 x1, x2, y1, y2 = get_coordinates(face_coordinates) #print(x1, x2, y1, y2 ) # 截取人脸图像像素数组 gray_face = gray_image[y1:y2, x1:x2] try: # 将人脸reshape模型需要的尺寸 gray_face = cv2.resize(gray_face,(emotion_target_size)) except: continue gray_face = preprocessing_input(gray_face) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) # print(gray_face.shape) # 预测 emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face)) emotion_text = emotion_labels[emotion_label_arg] color = (255,0,0) # 画边框 draw_bounding_box(face_coordinates, rgb_image, color) # 画表情说明 draw_text(face_coordinates, rgb_image, emotion_text, color, 0, face_coordinates[3]+30, 1, 2) # 将图像转换为BGR模式显示 bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imwrite("./pic_test/"+"predict"+os.path.basename(file), bgr_image) cv2.waitKey(1) cv2.destroyAllWindows() print("已识别%d张图片" % int(len(dirs)))
im_dim_batches = [ torch.cat((im_dim_list[i * args.batch_size:min( (i + 1) * args.batch_size, len(im_batches))])) for i in range(num_batches) ] output = [] for i, batch in enumerate(im_batches): start = time.time() with torch.no_grad(): prediction, _ = model(batch) prediction = utils.non_max_suppression(prediction, args.conf_thresh, args.nms_thresh) end = time.time() print("The inference time of batch %d is %.3f" % (i, end - start)) output.extend(prediction) colors = utils.get_cmap() for i in range(len(output)): if output[i] is not None: res = utils.recover_img_size(output[i], im_dim_list[i], args.img_size) list( map( lambda x: utils.draw_bounding_box(x, loaded_ims[i], colors, classes), res)) name = os.path.join(args.output_path, 'det_' + os.path.basename(imlist[i])) cv2.imwrite(name, loaded_ims[i])
# initialize the camera and grab a reference to the raw camera capture cap = cv.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtpjitterbuffer ! rtph264depay ! avdec_h264 ! videoconvert ! queue ! appsink sync=false ', cv.CAP_GSTREAMER) detector = dlib.get_frontal_face_detector() start = time.time() frame_id = 0 # capture frames from the camera while True: ret, frame = cap.read() faces = detector(frame, 1) for rect in faces: (x, y, w, h) = rect_to_bb(rect) x1, x2, y1, y2 = apply_offsets((x, y, w, h), (20, 40)) color = (0, 255, 0) draw_bounding_box(image=frame, coordinates=(x1, y1, x2 - x1, y2 - y1), color=color) num_faces = len(faces) end = time.time() seconds = end - start fps = 1.0 / seconds draw_str(frame, (20, 20), 'fps: %d' % (fps)) # show the frame cv.imshow("Frame", frame) key = cv.waitKey(1) & 0xFF start = time.time() frame_id += 1 # if the `q` key was pressed, break from the loop
def index(): print('Request-form', list(request.form.keys()), file=sys.stderr) print('Request-form-name', request.form['name'], file=sys.stderr) # print('Request-form-image',request.form['image'],file=sys.stderr) image_name = request.form['name'] image_string = request.form['image'] #image_bytes = bytes(image_string,'utf-8') #image_decoded = base64.decodestring(image_string) image = Image.open(BytesIO(base64.b64decode(image_string))) # rotated_image = image.rotate(270,expand=True) # input_array = np.array(rotated_image) input_array = np.array(image) input_array = np.expand_dims(input_array, axis=0) #result_array = detect.run(input_array) (_boxes, _scores, _classes, _masks) = sess.run([boxes, scores, classes, masks], feed_dict={input_: input_array}) _boxes = np.squeeze(_boxes, axis=0) _scores = np.squeeze(_scores, axis=0) _classes = np.squeeze(_classes, axis=0) _masks = np.squeeze(_masks, axis=0) input_array = np.squeeze(input_array, axis=0) detections = utils.get_detections(_scores, config.threshold_score) utils.draw_bounding_box(input_array, detections, _boxes, _classes, class_map, _masks) result_image = Image.fromarray(input_array) #print('rotated_image.shape = ',input_array.shape) result_image.save('output.jpg', format='JPEG') #convert image back to string.. buffered = BytesIO() result_image.save(buffered, format="JPEG") final_img_str = base64.b64encode(buffered.getvalue()) # print('Request-files:',request.files,file=sys.stderr) # print('Requestfiletype:',type(request.files),file=sys.stderr) # data = request.files.to_dict() # print('data',data,file=sys.stderr) # #to-do Input file validation... (ensure input file is valid jpg or png) # file = data['upload'] # print('File name:',file.filename,file=sys.stderr) # file_path = os.path.join("Images",file.filename) # file.save(file_path) # print('File saved with name:',file.filename,file=sys.stderr) #Deserialize the image.. # with open(image_name,'wb') as image_file: # image_file.write(image) response = final_img_str # print("Returning Image Response...",file=sys.stderr) return response
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold if 'faster' in args.model: faster_rnn = True else: faster_rnn = False ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, device=args.device, cpu_extension=args.cpu_extension) # We need model required input dimensions: required_input_shape = infer_network.get_input_shape(faster_rnn=faster_rnn) required_input_width = required_input_shape[2] required_input_height = required_input_shape[3] ### TODO: Handle the input stream ### if args.input != 'CAM': try: # It seems that OpenCV can use VideoCapture to treat videos and images: input_stream = cv2.VideoCapture(args.input) length = int(input_stream.get(cv2.CAP_PROP_FRAME_COUNT)) webcamera = False # Check if input is an image or video file: if length > 1: single_image_mode = False else: single_image_mode = True except: print( 'Not supported image or video file format. Please pass a supported one.' ) exit() else: input_stream = cv2.VideoCapture(0) single_image_mode = False webcamera = True # We need fps for time related calculations: fps = input_stream.get(cv2.CAP_PROP_FPS) # We also need input stream width and height: stream_width = int(input_stream.get(3)) stream_height = int(input_stream.get(4)) not_in_frame = 0 # Counter for Faster RNN: frames_for_quit = 10 # Number of consecutive frames we wait until we consider a person is completly out of frame. if not single_image_mode: ### TODO: Loop until stream is over ### # These are tuning values and others required for the counter logic: ## Tuning, could be asked as possible arguments: LOWER_HALF = 0.7 # Fraction of total height a centroid is considered to be in the "lower half" RIGHT_HALF = 0.8 # Fraction of total width a centroid is considered to be in the "right half". With 0.87 works but it is too extreme. DETECTION_FRAMES = 1 # If current count_frame is divisible by this number, detection model is run. count_frame = 0 # Frame counter. status_lower_half = False # Status of the lower half. status_upper_half = False # Status of the upper half. id = 0 # Identifier for people. current_person = [] # For storing current person in frame. current_time = [0] # For storing last recorded time. # Params to send to MQTT Server: total_counted = 0 # People counter. people_in_frame = 0 # People in frame status. while (input_stream.isOpened()): ### TODO: Read from the video capture ### # Read the next frame: flag, frame = input_stream.read() # Quit if there is no more stream: if not flag: break # Quit if 'q' is pressed: if cv2.waitKey(1) & 0xFF == ord('q'): break # Execute detection model if required in this frame: if count_frame % DETECTION_FRAMES == 0: ### TODO: Pre-process the image as needed ### preprocessed_frame = utils.handle_image( frame, width=required_input_width, height=required_input_height) ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(preprocessed_frame, faster_rnn=faster_rnn) ### TODO: Wait for the result ### status = infer_network.wait() if status == 0: # Wait until we have results. prev_results = infer_network.get_output() # Get outputs. ### TODO: Get the results of the inference request ### results_bb = [] for p_r in prev_results[0, 0]: # Iterate over outputs. if p_r[2] >= args.prob_threshold and p_r[ 1] == 1.0: # Filter relevant outputs. p_r[1]==1: check only for people. results_bb.append( p_r[3:]) # Save those relevant results. ### TODO: Extract any desired stats from the results ### if not faster_rnn: # Faster RNN has better detection capabilities, not necessary to porcess the same way. if len(results_bb) > 0: for detection in results_bb: # Iterate through each detection: centroid = utils.calculate_centroid(detection) frame = utils.draw_bounding_box( frame, detection) if centroid[ 1] > LOWER_HALF and status_lower_half == False and status_upper_half == False: # Meaning there is a new detection in the lower border. status_lower_half = True person = utils.Person( id=id, frame_init=count_frame) current_person.append(person) total_counted = total_counted + 1 id = id + 1 elif status_lower_half: status_lower_half = False status_upper_half = True # To check that there is a detection in one of the halves: people_in_frame = status_upper_half + status_lower_half if centroid[ 0] > RIGHT_HALF and status_upper_half == True: status_lower_half = False status_upper_half = False people_in_frame = 0 current_time[0] = ( count_frame - current_person[0].frame_init) / fps current_person = [] client.publish( "person/duration", json.dumps( {"duration": current_time[0]})) else: # Using Faster RNN Model: if len(results_bb) == 0: not_in_frame = not_in_frame + 1 if not_in_frame >= frames_for_quit and current_person: not_in_frame = 0 people_in_frame = 0 if current_person: # Substracting 'frames_for_quit' because we stopped detecting this person those "frames ago" current_time[0] = ( count_frame - current_person[0].frame_init - frames_for_quit) / fps current_person = [] client.publish( "person/duration", json.dumps({"duration": current_time[0]})) else: people_in_frame = 1 not_in_frame = 0 for detection in results_bb: # Iterate through each detection: frame = utils.draw_bounding_box( frame, detection) if not current_person: # Meaning that there is no recorded person. person = utils.Person(id=id, frame_init=count_frame) current_person.append(person) total_counted = total_counted + 1 id = id + 1 ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### client.publish( "person", json.dumps({ "count": people_in_frame, "total": total_counted })) # Additional feature: Change timer color when a person is more than 15 secs on screen. if people_in_frame: current_time[0] = (count_frame - current_person[0].frame_init) / fps if current_time[0] > 15: font_color = (0, 0, 255) else: font_color = (0, 0, 0) frame = utils.draw_text(frame, "Current person: " + str(current_time[0]) + " secs", font_color=font_color) else: if current_time[0] > 15: font_color = (0, 0, 255) else: font_color = (0, 0, 0) frame = utils.draw_text(frame, "Last person: " + str(current_time[0]) + " secs", font_color=font_color) ### TODO: Send the frame to the FFMPEG server ### if not webcamera: sys.stdout.buffer.write(frame) else: cv2.imshow('Resultado', frame) count_frame = count_frame + 1 # Release resources: input_stream.release() else: flag, frame = input_stream.read() preprocessed_frame = utils.handle_image(frame, width=required_input_width, height=required_input_height) infer_network.exec_net(preprocessed_frame) status = infer_network.wait() if status == 0: # Wait until we have results. prev_results = infer_network.get_output() # Get outputs. results_bb = [] for p_r in prev_results[0, 0]: # Iterate over outputs. if p_r[2] >= args.prob_threshold and p_r[ 1] == 1.0: # Filter relevant outputs. p_r[1]==1: check only for people. results_bb.append(p_r[3:]) # Save those relevant results. if len(results_bb) > 0: for detection in results_bb: # Iterate through each detection: frame = utils.draw_bounding_box(frame, detection) frame = utils.draw_text(frame, "People in Frame: " + str(len(results_bb)), coordinates=(0.05, 0.05)) cv2.imwrite('result_single_image.png', frame) cv2.destroyAllWindows() # Disconnect from MQTT: client.disconnect()
def index(): print('Request-form', list(request.form.keys()), file=sys.stderr) #print('Request-form-name',request.form['name'],file=sys.stderr) # print('Request-form-image',request.form['image'],file=sys.stderr) #image_name = request.form['name'] image_string = request.form['image'] #image_bytes = bytes(image_string,'utf-8') #image_decoded = base64.decodestring(image_string) image = Image.open(BytesIO(base64.b64decode(image_string))) # rotated_image = image.rotate(270,expand=True) # input_array = np.array(rotated_image) input_array = np.array(image) input_array = np.expand_dims(input_array, axis=0) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(config.mask_model_infer_path, mode='rb') as graph_file: serialized_graph = graph_file.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def) class_map = utils.get_class_map(config.class_map_file) with tf.Session(graph=detection_graph) as sess: input_ = sess.graph.get_tensor_by_name("import/image_tensor:0") boxes = sess.graph.get_tensor_by_name("import/detection_boxes:0") scores = sess.graph.get_tensor_by_name("import/detection_scores:0") classes = sess.graph.get_tensor_by_name("import/detection_classes:0") masks = sess.graph.get_tensor_by_name("import/detection_masks:0") #result_array = detect.run(input_array) (_boxes, _scores, _classes, _masks) = sess.run([boxes, scores, classes, masks], feed_dict={input_: input_array}) _boxes = np.squeeze(_boxes, axis=0) _scores = np.squeeze(_scores, axis=0) _classes = np.squeeze(_classes, axis=0) _masks = np.squeeze(_masks, axis=0) input_array = np.squeeze(input_array, axis=0) detections = utils.get_detections(_scores, config.threshold_score) utils.draw_bounding_box(input_array, detections, _boxes, _classes, class_map, _masks) result_image = Image.fromarray(input_array) #print('rotated_image.shape = ',input_array.shape) #result_image.save('output.jpg',format='JPEG') #convert image back to string.. buffered = BytesIO() result_image.save(buffered, format="JPEG") final_img_str = base64.b64encode(buffered.getvalue()) # print('Request-files:',request.files,file=sys.stderr) # print('Requestfiletype:',type(request.files),file=sys.stderr) # data = request.files.to_dict() # print('data',data,file=sys.stderr) # #to-do Input file validation... (ensure input file is valid jpg or png) # file = data['upload'] # print('File name:',file.filename,file=sys.stderr) # file_path = os.path.join("Images",file.filename) # file.save(file_path) # print('File saved with name:',file.filename,file=sys.stderr) #Deserialize the image.. # with open(image_name,'wb') as image_file: # image_file.write(image) response = final_img_str # print("Returning Image Response...",file=sys.stderr) # tf.reset_defualt_graph(); return response
import streamlit as st import base64 import json import requests import numpy as np from utils import FILE_TYPES, IP_ADDRESS, draw_bounding_box, bytes_to_PIL_image st.set_option('deprecation.showfileUploaderEncoding', False) st.title("Mask RCNN with FastAPI") file_buffer = st.file_uploader("Please upload an image", type=FILE_TYPES) if file_buffer is not None: img_bytes = file_buffer.read() st.image(img_bytes, caption="Test image") if st.button("Detect Objects"): if file_buffer is None: st.write("No image uploaded...") else: img = bytes_to_PIL_image(img_bytes) img_bytes = base64.b64encode(img_bytes) img_bytes = img_bytes.decode("utf-8") payload = json.dumps({"img_bytes": img_bytes}) res = requests.put(IP_ADDRESS, payload) json_object = res.json() img = np.asarray(img) img = draw_bounding_box(img, json_object["boxes"], json_object["classes"]) st.image(img, caption="Processed image")
gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) # emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction,axis=1) emotion_text = class_names[int(emotion_label_arg)] picType,prob = face_model.predict(face_recog) if picType != -1: name_list = read_name_list('/Users/gaoxingyun/Documents/uw/courses/Sp19/EE576_CV/project/realtime_emotion_recognition/dataset') print (name_list[picType],prob) face_text = name_list[picType] else: print (" Don't know this person") face_text = 'unknown' color = (0,255,0) draw_bounding_box(face_coordinates, rgb_image, color) draw_text(face_coordinates, rgb_image, emotion_text, color, 0, 45, 1, 1) draw_text(face_coordinates, rgb_image, face_text, color, 0, -45, 1, 1) bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imshow('window_frame', bgr_image) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()