def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) win_name = 'Image detection' cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def main(img_path, image_name): model = YOLOv3Net(cfgfile,model_size,num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(os.path.join(img_path, "{}.jpg".format(image_name))) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0],model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) # win_name = 'Image detection' # cv2.imshow(win_name, img) # time.sleep(20) # cv2.destroyAllWindows() #If you want to save the result, uncommnent the line below: os.path.join(img_path, 'image_yolo.jpg') cv2.imwrite(os.path.join(img_path, "{}_yolo.jpg".format(image_name)), img)
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) #specify the vidoe input. # 0 means input from cam 0. # For vidio, just change the 0 to video path cap = cv2.VideoCapture(0) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() ret, frame = cap.read() if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second fps = 1 / seconds print("Estimated frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break finally: cv2.destroyAllWindows() cap.release() print('Detections have been performed successfully.')
def main(img,model): # model = YOLOv3Net(cfgfile,model_size,num_classes) # model.load_weights(weightfile) # class_names = load_class_names(class_name) # image = cv2.imread(img_path) image = img image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0],model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img,person_num = draw_outputs(image, boxes, scores, classes, nums, class_names) # cv2.putText(img, str(person_num)+" Persons", (10,200), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, cv2.LINE_AA) win_name = 'Image detection' return img,person_num,boxes,scores, classes, nums,class_names cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def get_prediction(inputimage): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) #specify the vidoe input. # 0 means input from cam 0. # For vidio, just change the 0 to video path frame = cv2.imread(inputimage, 1) frame_size = frame.shape try: # Read frame resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) cv2.imwrite('outputimgage.jpg', img) # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second finally: cv2.waitKey() cv2.destroyAllWindows() print('Detections have been performed successfully.') return img
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) print("class_names", class_names) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) cv2.imwrite('result1.jpg', img)
def detect_image(img_path): model = YOLOv3Net(cfg.CFGFILE,cfg.MODEL_SIZE,cfg.NUM_CLASSES) model.load_weights(cfg.WEIGHTFILE) class_names = load_class_names(cfg.CLASS_NAME) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (cfg.MODEL_SIZE[0],cfg.MODEL_SIZE[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, cfg.MODEL_SIZE, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=cfg.IOU_THRESHOLD, confidence_threshold=cfg.CONFIDENCE_THRESHOLD) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) win_name = 'Detection' cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def detect_video(video_path): model = YOLOv3Net(cfg.CFGFILE, cfg.MODEL_SIZE, cfg.NUM_CLASSES) model.load_weights(cfg.WEIGHTFILE) class_names = load_class_names(cfg.CLASS_NAME) win_name = 'Detection' cv2.namedWindow(win_name) cap = cv2.VideoCapture(returnCameraOrFile(video_path)) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() ret, frame = cap.read() if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image( resized_frame, (cfg.MODEL_SIZE[0], cfg.MODEL_SIZE[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, cfg.MODEL_SIZE, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=cfg.IOU_THRESHOLD, confidence_threshold=cfg.CONFIDENCE_THRESHOLD) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # Calculate frames per second fps = 1 / seconds print("Frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break finally: cv2.destroyAllWindows() cap.release() print('Detections performed successfully.')
def main(): model = yolov3_net(cfg_file, num_classes) model.load_weights(weights_file) class_names = load_class_names(class_names_file) image = cv2.imread(img_path) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) start_time = time.time() pred = model.predict(resized_frame, steps=1) print("Time inference: ", time.time() - start_time) boxes, scores, classes, nums = output_boxes(pred, model_size, max_output_size, max_output_size_per_class, iou_threshold, confidence_threshold) image = np.squeeze(image) img = draw_output(image, boxes, scores, classes, nums, class_names) img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5) win_name = "Image detection" cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def write_predict(raw_image, graph, fps): with tf.Session(graph=graph) as sess: # Encode test image raw_img, test_input = encode_img(raw_image, MODEL_SIZE) print('test_input shape', test_input.shape) # Run tf model pred = sess.run(y, feed_dict={x: test_input}) # Handle model output boxes, scores, classes, nums = output_boxes( \ pred, MODEL_SIZE, max_output_size=MAX_OUTPUT_SIZE, max_output_size_per_class=MAX_OUTPUT_SIZE_PER_CLASS, iou_threshold=IOU_THRESHOLD, confidence_threshold=CONFIDENCE_THRESHOLD) img = draw_outputs(raw_img, boxes, scores, classes, nums, class_names) # Add fps value words_color = (0, 0, 255) #BGR if fps is not None: cv2.putText(img, "FPS: {:.2f}".format(fps), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, words_color, 1) # Write final result cv2.imwrite('result.jpg', img) print('scores', scores.eval())
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(img_filename) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) print('boxes', boxes) print('scores', scores[scores >= confidence_threshold]) print('classes', classes[classes != 0]) print('nums', nums) return 0 image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) # win_name = 'Image detection' # cv2.imshow(win_name, img) # cv2.waitKey(0) # cv2.destroyAllWindows() #If you want to save the result, uncommnent the line below: cv2.imwrite('data/images/output_dog.jpg', img)
def create_network(self): ''' Create Yolo network Input: model_size: (width,height,dim) -> size of model Transform each layer from cfg file to tensors. Return: model -> computed tensor model ''' model_size = self.model_size outputs = {} output_filters = [] filters = [] out_pred = [] scale = 0 # create keras input for model inputs = input_image = Input(shape=model_size) num_classes = self.num_classes # Get all layers without net for i, block in enumerate(self.blocks[1:]): # If block is convolutional layer # print("Layer: {} type: {}".format(i, block['type'])) if block["type"] == "convolutional": inputs, filters, strides = self.create_convolutional( block, inputs, i) elif block["type"] == "upsample": stride = int(block["stride"]) inputs = UpSampling2D(size=(stride, stride))(inputs) elif block["type"] == "maxpool": stride = int(block["stride"]) size = int(block["size"]) padd = 'same' inputs = MaxPool2D(size, strides=stride, padding=padd)(inputs) # If block is route layer elif block["type"] == "route": ind_backward = list(map(int, block["layers"].split(","))) # In case of relative indices for ind, el in enumerate(ind_backward): if el < 0: ind_backward[ind] += i start = ind_backward[0] if len(ind_backward) > 1: end = ind_backward[1] filters = output_filters[start] + output_filters[end] inputs = tf.concat([outputs[start], outputs[end]], axis=-1, name="route_{}".format(i)) # One index for layer else: filters = output_filters[start] inputs = outputs[start] # Skip layers elif block["type"] == "shortcut": step = int(block["from"]) activation = block["activation"] if step < 0: # relative step to the current layer step += i last_output = outputs[i - 1] prev_output = outputs[step] out_channels = tf.reduce_min( [last_output.shape[-1], outputs[step].shape[-1]]) # create same dimensions for last channel if prev_output.shape[-1] < out_channels: padd_val = (out_channels - prev_output.shape[-1]) padding = tf.constant([[0, 0], [0, 0], [0, 0], [0, padd_val]]) prev_output = tf.pad(prev_output, padding, "CONSTANT") elif prev_output.shape[-1] > out_channels: prev_output = prev_output[:, :, :, :out_channels] elif last_output.shape[-1] > out_channels: last_output = last_output[:, :, :, :out_channels] inputs = tf.math.add(last_output, prev_output) elif block["type"] == "yolo": inputs, initial_shape, anchors = self.create_yolo( block, inputs) strides, prediction = self.create_prediction( inputs, block, input_image, anchors, initial_shape, strides) if scale: out_pred = tf.concat([out_pred, prediction], axis=1) else: out_pred = prediction scale = 1 outputs[i] = inputs output_filters.append(filters) boxes, classes, scores, nums = output_boxes( out_pred, model_size, max_output_size=10, max_output_size_per_class=5, iou_threshold=self.iou_threshold, confidence_threshold=self.confidence_threshold) model = Model(input_image, outputs=[boxes, classes, scores, nums]) return model
def main(): # Kreiranje modela model = YOLOv3Net(cfgfile, model_size, num_classes) # Učitavanje istreniranih koeficijenata u model model.load_weights(weightfile) # Učitavanje imena klasa class_names = load_class_names(class_name) # Učitavanje ulaznih fotografija i predobrada u format koji očekuje model images_left = [] resized_images_left = [] filenames_left = [] # Load left camera data [images_left, resized_images_left, filenames_left] = loadAndResize(img_path_left_cam) images_right = [] resized_images_right = [] filenames_right = [] # Load right camera data [images_right, resized_images_right, filenames_right] = loadAndResize(img_path_right_cam) # Object distance and bounding box index distanceIndexPair = [] # Inferencija nad ulaznom slikom # izlazne predikcije pred - skup vektora (10647), gde svaki odgovara jednom okviru lokacije objekta for i in range(0, len(filenames_left)): resized_image = [] image = images_left[i] resized_image.append(resized_images_left[i]) resized_image.append(resized_images_right[i]) resized_image = tf.expand_dims(resized_image, 0) resized_image = np.squeeze(resized_image) pred = model.predict(resized_image) # Određivanje okvira oko detektovanih objekata (za određene pragove) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) # calculate distance distanceIndexPair = objectDistance(images_left[i], images_right[i], boxes, nums, classes) out_img = draw_outputs(image, boxes, scores, classes, nums, class_names, cLeftCamId, distanceIndexPair) # Čuvanje rezultata u datoteku out_file_name = './out/Izlazna slika.png' cv2.imwrite(out_file_name, out_img) # Prikaz rezultata na ekran cv2.imshow(out_file_name, out_img) #cv2.waitKey(0) if(cv2.waitKey(20) & 0xFF == ord('q')): cv2.destroyAllWindows() break
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) # Specify the camera url. # For camera, just change the camera URL to match your IP camera RTSP stream or MPEG stream. cap = cv2.VideoCapture( "rtsp://*****:*****@172.168.50.208:554/cam/realmonitor?channel=1&subtype=1" ) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() cap.grab() # Grab the most recent frame from the camera stream ret, frame = cap.read() # Read it into a frame buffer if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second fps = 1 / seconds print("Estimated frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break if key == 27: break # Adjust frame rate #if fps > 30: # fps = fps * 0.5 # cap.set(cv2.CAP_PROP_FPS, int(fps)) # print("Changing frame rate to: {0}".format(int(fps))) #else: # cap.set(cv2.CAP_PROP_FPS, 10) # print("Changing frame rate to: {0}".format(int(fps))) finally: cv2.destroyAllWindows() cap.release() print('Detections have been performed successfully.')