def YOLO(video_path, filename): # global metaMain, netMain, altNames trash_set = set() license_set = set() start_time = time.time() configPath = "./cfg/yolov4_trash.cfg" weightPath = "./backup_trash/yolov4_trash_best.weights" metaPath = "./cfg/trash.data" if not os.path.exists(configPath): raise ValueError("Invalid config path `" + os.path.abspath(configPath) + "`") if not os.path.exists(weightPath): raise ValueError("Invalid weight path `" + os.path.abspath(weightPath) + "`") if not os.path.exists(metaPath): raise ValueError("Invalid data file path `" + os.path.abspath(metaPath) + "`") m = Darknet(configPath) m.print_network() m.load_weights(weightPath) print('Loading weights from %s... Done!' % (weightPath)) if use_cuda: m.cuda() num_classes = m.num_classes namesfile = './cfg/trash.names' class_names = load_class_names(namesfile) fullpath = join(video_path, filename) cap = CameraStream(fullpath).start() fps = cap.get_fps() # OpenCV2 version 2 used "CV_CAP_PROP_FPS" size = cap.get_size() trash_max = 0 old_time = 0 # 記錄下來每個丟垃圾時間 trash_record_dict = {} # 拆出時間 filename_start_time = filename[-10:-4] # 分成 hour:minsec filename_start_time = filename_start_time[:2] + ":" + filename_start_time[ 2:] # 分成 hour:min:sec filename_start_time = filename_start_time[:5] + ":" + filename_start_time[ 5:] filename_tmp_x = time.strptime( filename_start_time.split(',')[0], '%H:%M:%S') filename_start_time_s = datetime.timedelta( hours=filename_tmp_x.tm_hour, minutes=filename_tmp_x.tm_min, seconds=filename_tmp_x.tm_sec).total_seconds() out = None new_size = (416, 416) while True: prev_time = time.time() ret, frame_read = cap.read() old_frame = None out_license = False out_time = False if ret: old_frame = frame_read frame_read = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) else: print('Video has ended or failed, try a different video format!') break frame_resized = cv2.resize(frame_read, new_size, interpolation=cv2.INTER_LINEAR) boxes = do_detect(m, frame_resized, 0.4, 0.6, use_cuda) trash_center = (0, 0) license_center = (frame_resized.shape[0], frame_resized.shape[1]) frame_resized = cv2.resize(old_frame, new_size, interpolation=cv2.INTER_LINEAR) width = frame_read.shape[1] height = frame_read.shape[0] for i in range(len(boxes[0])): box = boxes[0][i] # print(type(box)) # print(box) xmin = int(box[0] * size[0]) ymin = int(box[1] * size[1]) xmax = int(box[2] * size[0]) ymax = int(box[3] * size[1]) cls_id = box[6] # 畫圖 pt1 = (xmin, ymin) pt2 = (xmax, ymax) cv2.rectangle(frame_resized, pt1, pt2, (0, 255, 0), 1) cv2.putText( frame_resized, class_names[cls_id] + " [" + str(round(box[5] * 100, 2)) + "]", (pt1[0], pt1[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0], 2) # 確認是否為同個垃圾 center = (int((xmin + xmax) / 2), int((ymin + ymax) / 2)) temp = (center, pt1, pt2) similar_trash = False similar_license = False if class_names[cls_id] == 'trash': # temp = (center, pt1, pt2) if temp not in trash_set: # print('get new trash') # out_time = True for c in trash_set: center_check = abs(c[0][0] - center[0]) + abs(c[0][1] - center[1]) if center_check <= 5: similar_trash = True break # 交疊面積超過60%就當作同一個 else: if (c[2][0] - xmin) + (c[2][1] - ymin) < ( xmax - xmin) + (ymax - ymin) + ( c[2][0] - c[1][0]) + (c[2][1] - c[1][1]): cross_xmin = max(xmin, c[1][0]) cross_ymin = max(ymin, c[1][1]) cross_xmax = min(xmax, c[2][0]) cross_ymax = min(ymax, c[2][1]) cross_area = abs((cross_xmax - cross_xmin) * (cross_ymax - cross_ymin)) total_area = (xmax - xmin) * (ymax - ymin) + ( c[2][0] - c[1][0]) * (c[2][1] - c[1][1]) - cross_area if total_area != 0: iou = float(cross_area / total_area) if abs(iou) > 0.6: similar_trash = True break if not similar_trash: trash_set.add(temp) out_time = True else: if temp not in license_set: for c in license_set: if abs(c[0][0] - center[0]) + abs(c[0][1] - center[1]) <= 8: similar_license = True break else: if (c[2][0] - xmin) + (c[2][1] - ymin) < ( xmax - xmin) + (ymax - ymin) + ( c[2][0] - c[1][0]) + (c[2][1] - c[1][1]): cross_xmin = max(xmin, c[1][0]) cross_ymin = max(ymin, c[1][1]) cross_xmax = min(xmax, c[2][0]) cross_ymax = min(ymax, c[2][1]) cross_area = abs((cross_xmax - cross_xmin) * (cross_ymax - cross_ymin)) total_area = (xmax - xmin) * (ymax - ymin) + ( c[2][0] - c[1][0]) * (c[2][1] - c[1][1]) - cross_area if total_area != 0: iou = float(cross_area / total_area) if abs(iou) > 0.8: similar_license = True break if not similar_license: license_set.add(temp) out_license = True result_img = plot_boxes_cv2(frame_resized, boxes[0], savename=None, class_names=class_names) frame_num = cap.get_frameNum() duration = frame_num / fps if out_time and duration > 5: if duration > old_time + 10 or old_time == 0: trash_max += 1 old_time = duration if out_license: Drop_trash_time_sec = str( datetime.timedelta(seconds=(filename_start_time_s + duration)))[:8] if trash_max < 2: trash_record_dict = { filename[:-4] + "_Num_" + str(trash_max): [Drop_trash_time_sec, str(1)] } else: trash_record_dict[filename[:-4] + "_Num_" + str(trash_max)] = [ Drop_trash_time_sec, str(1) ] resized = cv2.resize(result_img, (1280, 720), interpolation=cv2.INTER_CUBIC) cv2.imwrite( './output/image/' + filename[:-4] + '_' + str(trash_max) + '_1.jpg', resized) else: Drop_trash_time_sec = str( datetime.timedelta(seconds=(filename_start_time_s + duration)))[:8] if trash_max < 2: trash_record_dict = { filename[:-4] + "_Num_" + str(trash_max): [Drop_trash_time_sec, str(0)] } else: trash_record_dict[filename[:-4] + "_Num_" + str(trash_max)] = [ Drop_trash_time_sec, str(0) ] resized = cv2.resize(result_img, (1280, 720), interpolation=cv2.INTER_CUBIC) cv2.imwrite( './output/image/' + filename[:-4] + '_' + str(trash_max) + '_0.jpg', resized) # print('trash_record_dict', trash_record_dict) out_time, out_license = False, False # cv2.putText(image, "Catch number " + str(trash_max), (10,40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, [0, 0, 0], 2) cap.stop() if len(trash_record_dict) > 0: Drop_trash_df = pd.DataFrame(list(trash_record_dict.items()), columns=['Filename', 'Record']) Drop_trash_df[['RecordTime', 'Licence' ]] = pd.DataFrame(Drop_trash_df.Record.tolist(), index=Drop_trash_df.index) Drop_trash_df.drop(columns=["Record"], inplace=True) print(Drop_trash_df) Drop_trash_df.to_csv("./output/csv_file/" + filename[:-4] + ".csv", index=False) # print('#############start Record Video#################') # # filename ='20200927_234800.avi' # Trash_df = pd.read_csv("./trash_data/record/csv_file/"+filename[:-4]+".csv") # filename_start_time = filename[-10:-4] # filename_start_time = filename_start_time[:2] + ":" + filename_start_time[2:] # filename_start_time = filename_start_time[:5] + ":" + filename_start_time[5:] # filename_tmp_x = time.strptime(filename_start_time.split(',')[0][:-1],'%H:%M:%S') # filename_start_time_s = datetime.timedelta(hours=filename_tmp_x.tm_hour,minutes=filename_tmp_x.tm_min,seconds=filename_tmp_x.tm_sec).total_seconds() # print(Trash_df) # for i in range(len(Trash_df)): # fullpath = join(video_path, filename) # video = cv2.VideoCapture(fullpath) # frame_num = 0 # frame_width = int(video.get(3)) # frame_height = int(video.get(4)) # size = (frame_width, frame_height) # result = cv2.VideoWriter('./trash_data/record/video/' + filename[:-4] + '_' + listToString(Trash_df['RecordTime'][i].split(':')) + '_1.avi', # cv2.VideoWriter_fourcc(*'MJPG'), # 10, size) # x = time.strptime(Trash_df['RecordTime'][i].split(',')[0][:-1],'%H:%M:%S') # y = datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds() # y_add_10_sec = y + 10 # y_minus_10_sec = y - 10 # y_add_10_sec = y_add_10_sec - filename_start_time_s # y_minus_10_sec = y_minus_10_sec - filename_start_time_s # str_add_10_sec = str(datetime.timedelta(seconds=y_add_10_sec)) # str_minus_10_sec = str(datetime.timedelta(seconds=y_minus_10_sec)) # print('Cuted Video Enging time ', str_add_10_sec, 'Cuted Video Begin time ', str_minus_10_sec) # while(True): # ret, frame = video.read() # frame_num += 1 # if ret == True: # duration = frame_num/fps # duration_time = str(datetime.timedelta(seconds = (duration)))[:7] # if duration_time >= str_minus_10_sec and duration_time <= str_add_10_sec and frame_num%2 == 0: # # print('Save....', duration_time) # result.write(frame) # else : # print('End ret') # break # video.release() # result.release() # # When everything done, release # # the video capture and video # # write objects # print("The video was successfully saved") else: print('Catch Nothing') print('How much time we spend for this video? ' + str(time.time() - start_time)) print( "*****************************End*************************************" )
class TestView(object): def __init__(self): self.client = mqtt.Client() self.client.connect('xxx.xxx.xxx.xxx', 8084) # predict 用的資訊 # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") self.cap = None # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' fontPath = "./TaipeiSansTCBeta-Bold.ttf" self.font = ImageFont.truetype(fontPath, 32) # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt') # Number of classes the object detector can identify NUM_CLASSES = 20 ## Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) for key in self.category_index: print("key: %s , value: %s" % (key, self.category_index[key])) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image self.image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected self.detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. self.detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') self.detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected self.num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') class MyEncoder(json_dict.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() else: return super(MyEncoder, self).default(obj) def mqtt_pub(self, payload): topic = 'demo/fish' json_out = {} json_out['GwID'] = "MiGW-B1-1" json_out['TimeStamp'] = round(time.time() * 1000) json_out['DeviceID'] = "B1F-IPCam" json_out['Payload'] = payload json_str = json_dict.dumps(json_out, cls=self.MyEncoder) self.client.publish(topic, json_str, 0, False) def predict(self, frame): if frame is None: # print("The read queue is None, plz do not predict!") return None frame_expanded = np.expand_dims(frame, axis=0) #print(frame_expanded.shape) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = self.sess.run([ self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections ], feed_dict={self.image_tensor: frame_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=3, skip_labels=True, skip_scores=True, min_score_thresh=0.60) payload = [] for i in range(len(scores[0])): if scores[0][i] > 0.6: # payload design temp_list = {} # temp_list.setdefault('id', str(i)) temp_list.setdefault('score', str(scores[0][i])) temp_list.setdefault('class', str(classes[0][i])) temp_list.setdefault( 'name', self.category_index[classes[0][i]]['name']) temp_list.setdefault('ymin', boxes[0][i][0]) # print("這是還沒json的座標 ", boxes[j][i][0], " 跟 ", boxes[j][i][1]) temp_list.setdefault('xmin', boxes[0][i][1]) temp_list.setdefault('ymax', boxes[0][i][2]) temp_list.setdefault('xmax', boxes[0][i][3]) payload.append(temp_list) self.mqtt_pub(payload) convert = cv2.imencode('.jpg', frame)[1].tobytes() return convert def gen_frame(self): """ Video stream generator """ self.cap = CameraStream().start() while self.cap: frame = self.cap.read() convert = self.predict(frame) if convert is None: continue yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + convert + b'\r\n' ) # concate frame one by one and show result def __del__(self): try: self.cap.stop() self.cap.stream.release() except: print('probably there\'s no cap yet :(') cv2.destroyAllWindows() self.client.disconnect()
import cv2 from camera import CameraStream if __name__ == "__main__": cam = CameraStream().start() while True: frame = cam.read() cv2.imshow('webcam', frame) if cv2.waitKey(1) == 27: break cam.stop() cv2.destroyAllWindows()