def main(_argv): start = timer() #timerStart = time.time() #basePath = "D:\MMichenthaler\VideoFrames\Video2\Video2_frame1000.jpg" #base = cv2.imread(basePath) allClimbers = [] climbersThisPic = [] #holds = hold_marker(base) #print(holds) print(FLAGS.colorMask) #------------------------------------------ # Dieser Part ist für die Detection mittels Yolo, da das nicht ausreichend funktioniert wird er hier nun bis auf # weiteres auskommentiert gelassen #--------------------------------------------- if FLAGS.baseline and FLAGS.holdsDetection is True: base_raw = tf.image.decode_image(open(FLAGS.baseLine, 'rb').read(), channels=3) base = tf.expand_dims(base_raw, 0) base = transform_images(base, FLAGS.size) t1 = time.time() baseBoxes, baseScores, BaseClasses, BaseNums = yolo(base) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) base = cv2.cvtColor(base_raw.numpy(), cv2.COLOR_RGB2BGR) base = draw_outputs(base, (baseBoxes, baseScores, BaseClasses, BaseNums), class_names) # detection used on Baseline Img cv2.imwrite(FLAGS.output + 'baselineImg.jpg', base) logging.info('Baseline set and saved to: {}'.format(FLAGS.output) + str(count)) # using a separate detector for holds on the bare wall image to set a baseline and saving the results elif os.path.isfile(FLAGS.CSVpath + "holds.csv"): with open(FLAGS.CSVpath + "holds.csv", "r") as file: holdsSt = [] stHolds = list(csv.reader(file, delimiter=',')) # print(stHolds) for elem in stHolds: for elem2 in elem: elem3 = elem2.replace('[', '').replace(']', '') holdsSt.append(elem3.split(',')) holds = [list(map(int, rec)) for rec in holdsSt] logging.info('holds loaded') elif FLAGS.baseline and FLAGS.CSVpath: # dieser code ist um die Griffe in einem Gui zu markieren base = cv2.imread(FLAGS.baseline) holds = hold_marker(base, FLAGS.CSVpath) print(holds) ''' holds = [[843, 2692, 992, 2835], # holds für newVideo1 [712, 2516, 891, 2644], [879, 2409, 1061, 2519], [787, 2039, 924, 2132], [912, 1875, 1025, 1971], [775, 1795, 888, 1887], [1013, 1705, 1120, 1798], [819, 1392, 933, 1520], [1028, 1112, 1141, 1199], [849, 1079, 959, 1213], [807, 909, 941, 987], [956, 713, 1037, 799], [864, 602, 950, 689], [1022, 584, 1129, 671], [903, 408, 986, 495], [1010, 280, 1123, 367]] holds = [[555, 1253, 594, 1288], # für weitere Testungen die mit hold_marker markierten Griffe von Video2 des alten datensatzes [588, 1178, 627, 1215], [584, 1107, 626, 1141], [579, 1035, 631, 1075], [584, 967, 618, 994], [545, 862, 599, 908], [524, 830, 570, 873], [487, 755, 565, 828], [512, 680, 584, 741], [526, 611, 597, 681], [561, 550, 617, 588], [532, 489, 586, 528], [622, 404, 667, 449], [585, 378, 616, 400], [531, 392, 565, 420], [523, 319, 565, 366], [468, 276, 503, 305], [531, 177, 579, 223], [452, 103, 495, 146]] holds = [[354, 1252, 447, 1323], # für weitere Testungen die mit hold_marker markierten Griffe von Video2 des neuen Datensatzes [432, 1347, 492, 1397], [518, 1291, 569, 1349], [439, 1206, 528, 1260], [395, 1113, 470, 1161], [531, 1069, 579, 1105], [540, 988, 581, 1027], [396, 1023, 461, 1068], [459, 937, 513, 983], [321, 964, 389, 1033], [318, 845, 363, 899], [465, 766, 511, 820], [314, 772, 367, 809], [357, 700, 413, 740], [475, 636, 520, 675], [374, 612, 437, 679], [500, 600, 542, 627], [424, 543, 481, 602], [516, 560, 567, 596], [568, 488, 609, 520], [403, 456, 469, 493], [473, 424, 532, 462], [476, 358, 517, 399], [414, 382, 452, 424], [515, 295, 564, 334], [455, 327, 494, 366], [450, 206, 495, 245], [514, 235, 558, 273], [496, 103, 459, 144], [514, 180, 560, 216]] ''' # auskommentieren weil flags colormask nicht funktioniert print(FLAGS.colorMask) if os.path.isfile(FLAGS.CSVpath + 'colors.csv') and FLAGS.colorMask is True: with open(FLAGS.CSVpath + 'colors.csv', "r") as file: StColor = list(csv.reader(file, delimiter=',')) color = [list(map(int, rec)) for rec in StColor] color = color[0] logging.info('color loaded ' + str(color)) elif FLAGS.colorMask is True: color = color_picker(FLAGS.baseline, FLAGS.CSVpath) if FLAGS.detection is True: physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # if FLAGS.tfrecord: # dataset = load_tfrecord_dataset( # FLAGS.tfrecord, FLAGS.classes, FLAGS.size) # dataset = dataset.shuffle(512) # img_raw, _label = next(iter(dataset.take(1))) # img = tf.expand_dims(img_raw, 0) # img = transform_images(img, FLAGS.size) # # t1 = time.time() # boxes, scores, classes, nums = yolo(img) # t2 = time.time() # logging.info('time: {}'.format(t2 - t1)) # # logging.info('detections:') # for i in range(nums[0]): # logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], # np.array(scores[0][i]), # np.array(boxes[0][i]))) # allhands.append(boxes[0][i]) # # img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # cv2.imwrite(FLAGS.output, img) # logging.info('output saved to: {}'.format(FLAGS.output)) # print(sorted(os.listdir(FLAGS.imDir), key=lambda x: int(x[15:-5]))) if FLAGS.imDir: # für Detection auf allen bildern in imDir climberCounter = 0 makeNumbered = False #for count, dirImg in enumerate(Path(FLAGS.imDir).iterdir()): # , key=lambda x: int(x[69:-4]) beim key die anzahl der Zeichen des Paths angeben der vor der nummerierung steht for count, dirImg in enumerate( sorted(os.listdir(FLAGS.imDir), key=lambda x: int(x[16:-4])) ): # ANPASSEN WENN UNTER ODER ÜBER 10 auf 16 #climbDetect = False #img_raw = tf.image.decode_image( # open(dirImg, 'rb').read(), channels=3) img_raw = cv2.imread(FLAGS.imDir + dirImg) img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') img = cv2.cvtColor(img_raw, cv2.COLOR_RGB2BGR) for i in range(nums[0]): #print(np.array(boxes[0][i])) logging.info('\t{}, {}, {}'.format( class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) if class_names[int(classes[0][i])] == "person": climbersThisPic.append( np.array(boxes[0][i]) ) # saving all detected climbers in allclimbers------ für testung auskommentieren PROBLEM mit 2 personen in bild if not os.listdir( FLAGS.numberedSource) or makeNumbered is True: makeNumbered = True #if climbDetect is False: #+---- cv2.imwrite( FLAGS.numberedSource + 'PhotoNr_' + str(climberCounter) + '.jpg', img ) #| Mit neuem Datensatz einmal diesen block unkommentiert mitlaufen lassen #DIESE ZEILEN FÜR NUMBERED #| dieser Block speichert jeden frame nummeriert ab, nicht nur jeden in dem eine person vorkommt, climberCounter += 1 #| da sonst bei versagen des yolo keine möglichkeit besteht die gegriffen erkennung durchzuführen #+---- yOnes = [] for j in range(len(climbersThisPic)): #print('\t{}'.format(np.array(climbersThisPic[j][1:2]))) yOnes.append(climbersThisPic[j][1:2]) if yOnes: allClimbers.append( climbersThisPic[np.argmin(yOnes)] ) # die Person mit der BB mit dem geringeren y1 wert, also die die weiter oben ist, # wird als Kletterer in allClimbers gespeichert else: allClimbers.append([0, 0, 10e-12, 10e-12]) climbersThisPic = [] # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # cv2.imwrite(FLAGS.numberedSource + str(count) + '.jpg', img) img = draw_persons(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output + str(count) + '.jpg', img) logging.info('output saved to: {}'.format(FLAGS.output) + str(count)) f = open("climbers.txt", "w") f.write(str(allClimbers)) #------------------------------------------------------------------------------------------ momentan nicht in verwendung elif FLAGS.video is True: # für Detection am cam feed oder uaf dem Video allen frameWidth = 1080 frameHeight = 1920 if FLAGS.cam is True: cap = cv2.VideoCapture(0) else: cap = cv2.VideoCapture(FLAGS.vidFile) cap.set(3, frameWidth) cap.set(4, frameHeight) #cap.set(10, 150) while True: success, img = cap.read() imgResult = img.copy() imgResult = tf.expand_dims(img_raw, 0) imgResult = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(imgResult) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format( class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) allhands.append(np.array( boxes[0][i])) # saving all detected hands in allhands img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imshow("Video", imgResult) if cv2.waitKey(1) & 0xFF == ord('q'): break else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format( class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output)) # einzel bild detection #----------------------------------------------------------------------------------------------------------------------- # using the hand detection to create a list of all detected Hands # 13.08.2021 diese schleife modifizieren um in der richtigen reihenfolge über griffe und bilder zu loopen # -> auf überschneidung von griffen und personen boundingboxes achten -> nur dann differenzen berechnen #print(str(allClimbers)) olh = [] # overlapping hands points = 0 holdList = "Gripped Holds:" overlapList = 'Overlaps:' if FLAGS.baseline: for holdID in range( len(holds)): # alle hände mit allen griffen verschneiden logging.info('loading hold Nr.' + str(holdID)) for index in range( len(allClimbers) ): # -> fläche von hand und überschneidung vergleichen climbImg = cv2.imread(FLAGS.numberedSource + 'PhotoNr_' + str(index) + '.jpg') climberPix = percToPix(allClimbers[index], cv2.imread(FLAGS.baseline)) #print(climberPix) r = overlapRect(holds[holdID], climberPix) delay = index - FLAGS.delay #Delay hier bearbeiten #print(rect_area(holds[holdID]), rect_area(r), rect_area(climberPix)) if index == 0: base = climbImg.copy() elif index > FLAGS.delay: #hier auch Delay bearbeiten base = cv2.imread(FLAGS.numberedSource + 'PhotoNr_' + str(delay) + '.jpg') if abs(rect_area(holds[holdID]) - rect_area(r)) < 10e-12 and ( index % FLAGS.frameReduction) == 0: # or (index % 600) == 0: olh.append(overlapRect(holds[holdID], climberPix)) # print(climbImg) if FLAGS.colorMask is True: #WIEDER EINKOMMENTIEREN #img = cv2.imread(str(climbImg)) base = mask_colour(base, color) climbImg = mask_colour(climbImg, color) allDiff, score, scorePix = compare_baseline( base, climbImg, holds[holdID]) logging.info( 'overlap detected: image ' + str(index) + ' and hold ' + str(holdID) + '; image similarity ' + str(score) + '; Overlap percent ' + str((allDiff.shape[1] * allDiff.shape[0] - scorePix) / cv2.countNonZero( cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY)))) overlapList = overlapList + '\n overlap detected: image ' + str( index) + 'and hold' + str( holdID) + '; image similarity ' + str(score) cv2.imwrite( FLAGS.holdsOut + str(holdID) + '/overlapping_' + str(index) + '.jpg', allDiff) if score < FLAGS.similarity or ((allDiff.shape[1] * allDiff.shape[0]-scorePix)/cv2.countNonZero(cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))) > FLAGS.holdOverlap \ or (allDiff.shape[1] * allDiff.shape[0]-scorePix) > FLAGS.handPix: holdList = holdList + " \n hold" #(allDiff.shape[1] * allDiff.shape[0]-scorePix)/cv2.countNonZero(cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY)) print('Size of the hold rectangle: ' + str(allDiff.shape[1] * allDiff.shape[0])) print('Fremdpixel: ' + str(allDiff.shape[1] * allDiff.shape[0] - scorePix)) print('NonZero Pixel: ' + str( cv2.countNonZero( cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY)))) print('Similarity: ' + str(score)) print('Overlap: ' + str( (allDiff.shape[1] * allDiff.shape[0] - scorePix) / cv2.countNonZero( cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY)))) cv2.imwrite( FLAGS.holdsOut + str(holdID) + '/gripped_' + str(index) + '.jpg', allDiff) if points < holds[holdID][4]: points = holds[holdID][4] logging.info('progress detected: points = ' + str(points)) break else: #print(allClimbers[index]) logging.info('CLimber Nr. ' + str(index) + ' and Grip Nr. ' + str(holdID) + 'no overlap') #holdID += 1 print("Total points: " + str(points)) # print(holdList) # print(overlapList) print('Number of detected climbers:' + str(len(allClimbers))) # ... end = timer() #print(end - start) logging.info('Elapsed time: {}'.format( str(datetime.timedelta(seconds=(end - start)))))
def main(): train_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_train.record-00000-of-00001' valid_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_val.record-00000-of-00001' weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/checkpoints/yolov3.tf' # Path to text? file containing all classes, 1 per line classes = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/shapes/shapes.names' # Usually fit # mode = 'fit' # Can be 'fit', 'eager_fit', 'eager_tf', 'valid' mode = 'fit' ''' 'fit: model.fit, ' 'eager_fit: model.fit(run_eagerly=True), ' 'eager_tf: custom GradientTape' ''' # Usually darknet transfer = 'none' ''' 'none: Training from scratch, ' 'darknet: Transfer darknet, ' 'no_output: Transfer all but output, ' 'frozen: Transfer and freeze all, ' 'fine_tune: Transfer all and freeze darknet only'), 'pre': Use a pre-trained model for validation ''' image_size = 416 num_epochs = 1 batch_size = 8 learning_rate = 1e-3 num_classes = 4 # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = 80 iou_threshold = 0.5 # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/' saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/shapes_{}.tf'.format( num_epochs) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # Training dataset #dataset_train = tf.data.TFRecordDataset(train_path) #dataset_val = tf.data.TFRecordDataset(valid_path) dataset_train = load_tfrecord_dataset(train_path, classes, image_size) dataset_train = dataset_train.shuffle(buffer_size=512) dataset_train = dataset_train.batch(batch_size) #dataset_train = dataset_train.map(lambda x, y: ( # transform_images(x, image_size), # transform_targets(y, anchors, anchor_masks, image_size))) #dataset_train = dataset_train.prefetch( # buffer_size=tf.data.experimental.AUTOTUNE) dataset_val = load_tfrecord_dataset(valid_path, classes, image_size) dataset_val = dataset_val.shuffle(buffer_size=512) dataset_val = dataset_val.batch(batch_size) #dataset_val = dataset_val.map(lambda x, y: ( # transform_images(x, image_size), # transform_targets(y, anchors, anchor_masks, image_size))) # Create model in training mode yolo = models.YoloV3(image_size, training=True, classes=num_classes) model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) # Which weights to start with? print('Loading Weights...') #yolo.load_weights(weights_path) yolo.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(yolo.get_layer('yolo_darknet')) optimizer = tf.keras.optimizers.Adam(lr=learning_rate) loss = [ YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks ] # Passing loss as a list might sometimes fail? dict might be better? yolo.compile(optimizer=optimizer, loss=loss, run_eagerly=(mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = yolo.fit(dataset_train, epochs=num_epochs, callbacks=callbacks, validation_data=dataset_val) yolo.save_weights(saved_weights_path) # Detect/ROC model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(saved_weights_path).expect_partial() batch_size = 1 val_dataset = load_tfrecord_dataset(valid_path, classes, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map( lambda x, y: (transform_images(x, image_size), transform_targets(y, anchors, anchor_masks, image_size))) images = [] for img, labs in val_dataset: img = np.squeeze(img) images.append(img) predictions = [] evaluator = Evaluator(iou_thresh=iou_threshold) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) # boxes -> (num_imgs, num_detections (200), box coords (4)) # scores -> (num_imgs, num_detections) # classes -> (num_imgs, num_detections) # num_detections -> num_imgs # Aim for labels shape (per batch): [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]] # full_labels = [label for _, label in val_dataset] # Shape : [Num images, 3 scales, grid, grid, anchor, 6 ] filtered_labels = [] for _, label in val_dataset: img_labels = [] # Label has shape [3 scales x[1, grid, grid, 3, 6]] for scale in label: # Shape [1, grid, grid, 3, 6] scale = np.asarray(scale) grid = scale.shape[1] scale2 = np.reshape(scale, (3, grid * grid, 6)) # Shape: [3, grix*grid, 6] for anchor in scale2: filtered_anchors = [] for box in anchor: if box[4] > 0: filtered_anchors.append(np.asarray(box)) img_labels.append(filtered_anchors) img_labels = np.asarray(img_labels) filtered_labels.append(img_labels) print(len(filtered_labels)) print(len(filtered_labels[0])) print(len(filtered_labels[0][2])) # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # Predictions shape: [num_imgs x num_preds x[box coords(4), conf, classes]] # Box coords should be in format x1 y1 x2 y2 # Labels shape: [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]] evaluator(predictions, filtered_labels, images) # Check gts box coords '''
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) if "car" in class_names: print("car detect") img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): th = threading.Thread(target=arduino_read) th.start() physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') CUDA = torch.cuda.is_available() times = [] inp_dim = int('416') assert inp_dim % 32 == 0 assert inp_dim > 32 try: vid = cv2.VideoCapture(0) #cam number - usb=1 vid = cap except: vid = cv2.VideoCapture(FLAGS.video) # vid = cv2.VideoCapture(0) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) tracker = cv2.TrackerKCF_create() fps = None initBB = None # initBB = True redetect = False failCnt = 0 global start check_start = None while True: ret, frame = vid.read() #img = frame # if frmae is None: # logging.warning("Empty Frame") # time.sleep(0.1) # continue # frame = imutils.resize(frame, width = 500) (H, W) = frame.shape[:2] img_in = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) boxes, scores, classes, nums = yolo.predict(img_in) # t1 = time.time() # boxes, scores, classes, nums = yolo.predict(img_in) # t2 = time.time() # times.append(t2-t1) # times = times[] # print('initBB = ', initBB) if initBB is not None: (success, box) = tracker.update(frame) if start == 'q': initBB = None tracker = cv2.TrackerMedianFlow_create() ardu_stop() elif success: failCnt = 0 (x, y, w, h) = [int(v) for v in box] cv2.rectangle(frame, (x + 30, y), (x + w - 30, y + h), (0, 255, 0), 2) ardu(box) # frame = draw_outputs(frame, (boxes, scores, classes, nums), class_names) else: failCnt += 1 ardu_detect() if failCnt > 50: redetect = True initBB = None tracker = cv2.TrackerKCF_create() # fps.update() # fps.stop() # info = [ # ('Tracker', 'kcf'), # ('Success', 'yes' if success else 'No'), # ('FPS', '{:.2f}'.format(fps.fps())), # ] # # for (i, (k, v)) in enumerate(info): # text = '{}:{}'.format(k, v) # cv2.putText(frame, text, (10, H-((i*20)+20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255, 2)) # if redetect: # ret, frame = vid.read() # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # template = cv2.imread('./user_faces/user.jpg', 0) # w, h = template.shape[::-1] # # res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF) # min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # top_left = min_loc # bottom_right = (top_left[0] + w, top_left[1] + h) # cv2.rectangle(frame, top_left, bottom_right, (0, 255, 0), 1) """ imgray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) w, h = imgray.shape[::-1] templ = cv2.imread('./user.jpg', cv2.IMREAD_GRAYSCALE) templ_h, templ_w = templ.shape[::-1] res = cv2.matchTemplate(imgray, templ, cv2.TM_CCOEFF_NORMED) loc = np.where(res >= 0.6) for pt in zip(*loc[::-1]): cv2.rectangle(frame, pt, (pt[0] + w, pt[1] + h), (0, 255, 0), 1) """ key = cv2.waitKey(1) & 0xFF if key == ord('s') or start == 's': check_start = start_tracker(frame, (boxes, scores, classes, nums), class_names) if check_start is not None: start = 'a' initBB = tuple(check_start) tracker.init(frame, initBB) x, y, w, h = check_start frame_user = frame[y:y + h, x + 20:x + w - 20] cv2.imwrite('./user_faces/user.jpg', frame_user) # fps = FPS().start() # img, orig_im, dim = prep_image(frame, inp_dim) # img = prep_image(frame, inp_dim) # cv2.imshow('img', frame) # if CUDA: # # im_dim = im_dim.cuda() # img = img.cuda() # # with torch.no_grad(): # output = model(Variable(img),CUDA) # output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) # try: # if output[0][0].tolist() == 0: # #tensor[0][0] mean the category of predicted class and 0 is person in coco.names # initBB = np.array([int(i) for i in output[0][1:5].tolist]) # x, y, w, h = initBB # initBB = (x, y, w, h) # frame_user = frame[x-10:w+10, y-10:h+10] # cv2.imwrite('./user_faces/user.jpg', frame_user) # tracker.init(frame, initBB) # fps = FPS().start() # except: # print("다시해주세요") #print(initBB) cv2.imshow('img', frame) if key == ord('q'): ardu_stop() break vid.release() cv2.destroyAllWindows()
from MY_print_methods import print_class from RAGHAV_object_tracker import object_tracker from _collections import deque p = print_class() ot = object_tracker() is_init_frame = True # Flag is necessary to setup object tracking properly prev_frame_objects = [] cur_frame_objects = [] font = cv2.FONT_HERSHEY_SIMPLEX # OpenCV font for drawing text on frame crash_flag = False class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) vid = cv2.VideoCapture('./data/video/' + name + '.mp4') # 28, 26, 30 (mTracker güzel test)
def setup_model(): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) return model, optimizer, loss, anchors, anchor_masks
def main(_argv): ##progress bar #Iniating sort tracker mot_tracker = Sort() #starting yolov3 and related processes delete_index=[] physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) class_names = [c.strip() for c in open(FLAGS.classes).readlines()] vid = cv2.VideoCapture(FLAGS.data + '/raw.avi') fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) if FLAGS.output == 'None': FLAGS.output = f'{FLAGS.data}/Track_output.avi' colours = np.random.rand(300, 3) * 255 writer = [FLAGS.output, codec, fps] #out = cv2.VideoWriter(FLAGS.output, codec, fps, (800, 800)) df_RFID_cage = rm.load_RFID(FLAGS.data + '/RFID_data_all.csv') #starting mouse tracker processing vid_length = len(df_RFID_cage) pbar = tqdm(total=vid_length) mouse_tracks = rm.mouse_tracker(tags, FLAGS.data, df_RFID_cage, writer, vid_length) #starting detection and sort loop frame_count = 0 t1 = time.time() print('starting') while vid.isOpened(): #reading frames ret, img = vid.read() #yolo process if ret: frame_count += 1 img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) boxes, scores, classes, nums = yolo.predict(img_in) #print(scores) objects, bb_start, bb_end, probability = get_object_details( img, (boxes, scores, classes, nums), class_names) ds_boxes = [] # array to feed into sort tracker if nums[0] != 0: for i in range(nums[0]): center_coords, standard_cords = get_center( bb_start[i], bb_end[i]) standard_cords.append(round(probability[i], 4)) ds_boxes.append(standard_cords) ds_boxes_array = np.asarray(ds_boxes) trackers = mot_tracker.update(ds_boxes_array) sort_tracks = [] for object in trackers: xmin, ymin, xmax, ymax, index = int(object[0]), int( object[1]), int(object[2]), int(object[3]), int(object[4]) sort_tracker = [xmin, ymin, xmax, ymax, index] sort_tracks.append(sort_tracker) sort_tracks = sorted(sort_tracks, key=lambda x: x[4]) mouse_tracks.update(frame_count, sort_tracks) pbar.update(1) else: if frame_count == 0: print('Unable to open video, please check video path') break else: vid.release() break t2 = time.time() time_yolo_sort = t2 - t1 print( f'time taken for yolo_sort on {str(frame_count)}: {str(time_yolo_sort)} seconds' ) #associating RFID tag print('Associating RFID with Sort_ID') t3 = time.time() df_RFID_tracks, df_matchs = mouse_tracks.evaulate() t4 = time.time() RFID_matching_time = t4 - t3 print( f'RFID Matching of {str(frame_count)} took {str(RFID_matching_time)} seconds' ) print('Writing Video') vid = cv2.VideoCapture(FLAGS.data + '/raw.avi') frame_count = 0 pbar = tqdm(total=len(df_RFID_cage)) df_RFID_tracks.to_csv(f'{FLAGS.data}/RFID_tracks.csv') df_matchs.to_csv(f'{FLAGS.data}/RFID_matches.csv') t5 = time.time() while vid.isOpened(): ret, img = vid.read() if ret: frame_count += 1 edited_img = mouse_tracks.write_video(frame_count, img, FLAGS.Video_type) if eval(FLAGS.showvid): cv2.imshow('output', edited_img) if cv2.waitKey(1) == ord('q'): break else: pass pbar.update(1) else: break t6 = time.time() write_time = t6 - t5 print('Wrting {str(frame_count)} took {str(write_time) seconds}') print('All processes completed')
def main(_argv): # Change flag values if FLAGS.height is None: FLAGS.height = FLAGS.size if FLAGS.width is None: FLAGS.width = FLAGS.size size = (FLAGS.height, FLAGS.width) physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(size, training=True, classes=FLAGS.num_classes, recurrent=FLAGS.recurrent) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(size, training=True, classes=FLAGS.num_classes, recurrent=FLAGS.recurrent) anchors = yolo_anchors anchor_masks = yolo_anchor_masks if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, size) else: train_dataset = dataset.load_fake_dataset() train_dataset = train_dataset.shuffle(buffer_size=8) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images( x, size), dataset.transform_targets(y, anchors, anchor_masks, size))) if FLAGS.recurrent: train_dataset = train_dataset.map( lambda x, y: (dataset.get_recurrect_inputs( x, y, anchors, anchor_masks, FLAGS.num_classes), y)) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, size) else: val_dataset = dataset.load_fake_dataset() val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images( x, size), dataset.transform_targets(y, anchors, anchor_masks, size))) if FLAGS.recurrent: val_dataset = val_dataset.map( lambda x, y: (dataset.get_recurrect_inputs( x, y, anchors, anchor_masks, FLAGS.num_classes), y)) # Configure the model for transfer learning if FLAGS.transfer != 'none': # if we need all weights, no need to create another model if FLAGS.transfer == 'all': model.load_weights(FLAGS.weights) # else, we need only some of the weights # create appropriate model_pretrained, load all weights and copy the ones we need else: if FLAGS.tiny: model_pretrained = YoloV3Tiny(size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes, recurrent=FLAGS.recurrent) else: model_pretrained = YoloV3(size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes, recurrent=FLAGS.recurrent) # load pretrained weights model_pretrained.load_weights(FLAGS.weights) # transfer darknet darknet = model.get_layer('yolo_darknet') darknet.set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) # transfer 'yolo_conv_i' layer weights if FLAGS.transfer in [ 'yolo_conv', 'yolo_output_conv', 'yolo_output' ]: for l in model.layers: if l.name.startswith('yolo_conv'): model.get_layer(l.name).set_weights( model_pretrained.get_layer(l.name).get_weights()) # transfer 'yolo_output_i' first conv2d layer if FLAGS.transfer == 'yolo_output_conv': # transfer tiny output conv2d for l in model.layers: if l.name.startswith('yolo_output'): # get and set the weights of the appropriate layers model.get_layer(l.name).layers[1].set_weights( model_pretrained.get_layer( l.name).layers[1].get_weights()) # should I freeze batch_norm as well? # transfer 'yolo_output_i' layer weights if FLAGS.transfer == 'yolo_output': for l in model.layers: if l.name.startswith('yolo_output'): model.get_layer(l.name).set_weights( model_pretrained.get_layer(l.name).get_weights()) # no transfer learning else: pass # freeze layers, if requested if FLAGS.freeze != 'none': if FLAGS.freeze == 'all': freeze_all(model) if FLAGS.freeze in [ 'yolo_darknet' 'yolo_conv', 'yolo_output_conv', 'yolo_output' ]: darknet = model.get_layer('yolo_darknet') freeze_all(darknet) if FLAGS.freeze in ['yolo_conv', 'yolo_output_conv', 'yolo_output']: for l in model.layers: if l.name.startswith('yolo_conv'): freeze_all(l) if FLAGS.freeze == 'yolo_output_conv': if FLAGS.tiny: # freeze the appropriate layers freeze_all(model.layers[4].layers[1]) freeze_all(model.layers[5].layers[1]) else: # freeze the appropriate layers freeze_all(model.layers[5].layers[1]) freeze_all(model.layers[6].layers[1]) freeze_all(model.layers[7].layers[1]) if FLAGS.transfer == 'yolo_output': for l in model.layers: if l.name.startswith('yolo_output'): freeze_all(l) # freeze nothing else: pass optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): try: tpu = tf.distribute.cluster_resolver.TPUClusterResolver() print('Running on TPU ', tpu.cluster_spec().as_dict()['worker']) except: tpu = None if tpu: tf.config.experimental_connect_to_cluster(tpu) tf.tpu.experimental.initialize_tpu_system(tpu) strategy = tf.distribute.experimental.TPUStrategy(tpu) else: strategy = tf.distribute.get_strategy() #print("REPLICAS: ", strategy.num_replicas_in_sync) FLAGS.batch_size = FLAGS.batch_size * strategy.num_replicas_in_sync with strategy.scope(): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size) train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) if FLAGS.optimizer == 'Adam': optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'nAdam': optimizer = tf.keras.optimizers.Nadam(lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.keras.optimizers.Adagrad(lr=FLAGS.learning_rate) elif FLAGS.optimizer == 'RMSprop': optimizer = tf.keras.optimizers.RMSprop(lr=FLAGS.learning_rate, rho=0.9) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) if tpu: callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint( 'yolov3_train_{epoch}.h5', save_weights_only=True, verbose=1, period=FLAGS.period ) #, monitor='val_loss', mode='min', save_best_only=True), #1000 ] else: callbacks = [ ReduceLROnPlateau(verbose=1), #EarlyStopping(patience=3, verbose=1), ModelCheckpoint('./checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True, period=FLAGS.period), #1000 TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset( '{}*.tfrecord'.format(flags.FLAGS.dataset), FLAGS.classes) train_dataset = train_dataset.shuffle(buffer_size=1024) # TODO: not 1024 train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset( '{}*.tfrecord'.format(flags.FLAGS.val_dataset), FLAGS.classes) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes))) if FLAGS.transfer != 'none': if FLAGS.transfer != 'continue': model_pretrained = YoloV3(416, training=True, classes=80) model_pretrained.load_weights(FLAGS.weights) model.get_layer('yolo_darknet').set_weights(model_pretrained.get_layer('yolo_darknet').get_weights()) if FLAGS.transfer == 'fine_tune': # freeze darknet darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.mode == 'frozen': # freeze everything freeze_all(model) else: # reset top layers if FLAGS.tiny: # get initial weights init_model = YoloV3Tiny( FLAGS.size, training=True, classes=FLAGS.num_classes) else: init_model = YoloV3( FLAGS.size, training=True, classes=FLAGS.num_classes) if FLAGS.transfer == 'continue': model = init_model model.load_weights(FLAGS.weights) elif FLAGS.transfer == 'darknet': for l in model.layers: if l.name != 'yolo_darknet' and l.name.startswith('yolo_'): l.set_weights(init_model.get_layer( l.name).get_weights()) else: freeze_all(l) elif FLAGS.transfer == 'no_output': for l in model.layers: if l.name.startswith('yolo_output'): l.set_weights(init_model.get_layer( l.name).get_weights()) else: freeze_all(l) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() # model.save_weights( # 'checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit'), #metrics=[tf.keras.metrics.MeanIoU(num_classes=FLAGS.num_classes)] ) callbacks = [ #ReduceLROnPlateau(verbose=1), #EarlyStopping(patience=3, verbose=1), ModelCheckpoint('/home/Malte/outputs/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), #TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks) model.save_weights('/home/Malte/outputs/yolov3.tf')
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # Load weights yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # Load classnames class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.single: if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) if FLAGS.shuffle: dataset = dataset.shuffle(128) img_raw, _label = next(iter(dataset.take(1))) else: image = random.choice(os.listdir(FLAGS.BILDE_MAPPE)) print(f"Image chosen: {image}") img_raw = tf.image.decode_image(open( FLAGS.BILDE_MAPPE + "/" + image, 'rb').read(), channels=3) # (1080, 1920, 3) --> (1, 1080, 1920, 3) img = tf.expand_dims(img_raw, 0) # Transformerer bildet til onsket size (416, 416, 3) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info( f'\t{class_names[int(classes[0][i])]}, {np.array(scores[0][i])}, {np.array(boxes[0][i])}' ) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output)) else: if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) if FLAGS.shuffle: dataset = dataset.shuffle(512) dataset = dataset.as_numpy_iterator() times = [] for img_raw, _label in tqdm(dataset): img = transform_images(img_raw, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() times.append(t2 - t1) mean_times = np.mean(times) print( f"Mean detection time for a total of {len(dataset)} was {mean_times}s" )
def main(_argv): load = LoadCOCO('../2DMOT2015/train/ADL-Rundle-6') physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') ''' if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) ''' frame = 0 dets = [] assert (not load.empty()) while not load.empty(): path, img_raw, img = load.pop() name = os.path.basename(path) print (path) # nums = total detections. boxes, scores, classes, nums = yolo(img) nums_np = nums.numpy() num = nums_np[0] frames = frame * np.ones(shape=(num, 1)) null = -1 * np.ones(shape=(num, 1)) boxes_np = boxes.numpy()[0][0:num].reshape(num, 4) scores_np = scores.numpy()[0][0:num].reshape(num, 1) # print (np.shape(boxes_np)) boxes_np[:, 0] = boxes_np[:, 0] * 1920 boxes_np[:, 1] = boxes_np[:, 1] * 1080 boxes_np[:, 2] = boxes_np[:, 2] * 1920 boxes_np[:, 3] = boxes_np[:, 3] * 1080 boxes_np[:, 2] = boxes_np[:, 2] - boxes_np[:, 0] boxes_np[:, 3] = boxes_np[:, 3] - boxes_np[:, 1] ''' if len(boxes_np): print (boxes_np[0]) ''' det = np.concatenate((frames, null, boxes_np, scores_np, null, null, null), axis=1) dets.append(det) ''' logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) ''' img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite('images/' + name, img) frame = frame + 1 ############################# dets = np.concatenate(dets, axis=0) # print (np.shape(dets)) # np.save('dets', dets) # np.savetxt("yolo-det.txt", dets, delimiter=",") np.savetxt("yolo-det.txt", dets, fmt='%d, %d, %0.2f, %0.2f, %0.2f, %0.2f, %0.2f, %d, %d, %d', delimiter=",")
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: #원래 코드 #vid = cv2.VideoCapture(int(FLAGS.video)) #다음 팟플레이어 #vid = cv2.VideoCapture('rtsp://172.20.10.4:8554/test') vid = cv2.VideoCapture('rtsp://192.168.0.28:8554/test') #연결x #os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'protocol_whitelist;file,rtp,udp' #vid = cv2.VideoCapture('C:/Users/Jiwon/Desktop/yolov3_deepsort-master/stream.sdp') #vid = cv2.VideoCapture( #'udpsrc port=8400 caps=application/x-rtp,media=(string)video,clock-rate=(int)9000,encoding-name=(string)H264,payload=(int)96!rtph264depay!decodebin!videoconvert!appsink', #cv2.CAP_GSTREAMER) #vid = cv2.VideoCapture("rtspsrc location=rtsp://192.168.0.25/main latency=30 ! decodebin ! nvvidconv ! appsink") #vid = cv2.VideoCapture('udp://@:5000') #vid = cv2.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtph264depay ! avdec_h264 ! appsink', cv2.CAP_GSTREAMER) #vid = cv2.VideoCapture(1) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of qint width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 #확인을 위한 코드 f_cnt = 0 redetect = False fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name == "person": if int(track.track_id) == 1: cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), (0, 255, 0), -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (0, 0, 0), 2) #if(MQTT초기 확인 값이면 저장)k 여기 코드 수정하셈 #img_user = img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])] img_user = img[int(bbox[1]):int(bbox[1]) + int(bbox[3]), int(bbox[0]):int(bbox[0]) + int(bbox[2]) - 10] cv2.imwrite( 'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png', img_user) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') f_cnt += 1 print("False") if f_cnt > 10: redetect = True f_cnt = 0 #""" if redetect: # https://opencv-python.readthedocs.io/en/latest/doc/24.imageTemplateMatch/imageTemplateMatch.html _, img = vid.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) template = cv2.imread( 'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png', 0) w, h = template.shape[::1] #template 이미지의 가로와 세로 res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) top_left = min_loc bottom_right = (top_left[0] + w, top_left[1] + h) cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 1) print("TRUE") # """ # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: # 设置仅在需要时申请显存空间 tf.config.experimental.set_memory_growth(physical_devices[0], True) # 判断训练tiny版本的YOLO还是完整版的YOLO if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # 如果未指定数据集则加载一张图片作为数据集=>fake_dataset train_dataset = dataset.load_fake_dataset() # 判断数据集路径是否为空 if FLAGS.dataset: # 从TFRecode文件加载数据集 train_dataset:(x_train, y_train) train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) # 生成批训练数据 # 打乱数据顺序 train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) # y.shape:train_dataset.as_numpy_iterator().next()[1].shape # =>(batch_size, yolo_max_boxes, 5) 5=>(xmin, ymin, xmax, ymax, classlabel) train_dataset = train_dataset.map(lambda x, y: ( # 图像数据归一化[0,1] dataset.transform_images(x, FLAGS.size), # 根据先验框anchor确定bbox属于哪一层特征图(13*13, 26*26, 52*52) # 并计算出bbox的中心点在特征图上的位置 dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # 数据预读取,提高延迟和吞吐量 # tf.data.experimental.AUTOTUNE:根据可用CPU动态设置并行调用的数量 train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # 加载伪验证集,防止没有添加验证集路径时报错 val_dataset = dataset.load_fake_dataset() # 加载验证集 if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning # 训练模式选择 # 随机初始化权重,从0开始训练整个网络 if FLAGS.transfer == 'none': pass # Nothing to do # 迁移训练的两种方式 elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: # 模型网络结构 model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) # 加载预训练权重 model_pretrained.load_weights(FLAGS.weights) # 设置darknet网络权重并冻结网络,即主干网络不参与训练,其余参数随机初始化 if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) # 设置YOLO输出层以外的网络的权重并冻结, 即仅训练YOLO的输出层且参数随机初始化 elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) # 迁移学习fine_tune和frozen模式要求训练的类别数和预训练权重一致(80类) else: # All other transfer require matching classes # 加载网络所有预训练权重参数 model.load_weights(FLAGS.weights) # 冻结darknet(骨干网络)权重, 其余参数在预训练权重的基础上训练 if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) # 冻结所有参数,训练不起作用. elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) # 定义优化器:Adam optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] # 调试模型:速度慢: Eager: op 在调用后会立即运行 if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training # 训练集上的平均loss/验证集上的平均loss avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) # 迭代每个epoch for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): # 梯度带:自动计算变量梯度 with tf.GradientTape() as tape: # model(): eager模式下选择此方式,不需要编译直接运行, 速度快. # model.predict()第一次运行时需要先编译图模式 outputs = model(images, training=True) # 计算张量各维度的元素之和. regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss # 梯度 grads = tape.gradient(total_loss, model.trainable_variables) # 执行最优化器 optimizer.apply_gradients(zip(grads, model.trainable_variables)) # 记录日志文件 logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) # 更新平均loss avg_loss.update_state(total_loss) # 在验证集上验证 for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) # .result():返回累计结果 logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) # reset_states:清除累计值 avg_loss.reset_states() avg_val_loss.reset_states() # 每个epoch保存一次模型权重 model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) # 训练模式 else: # 编译模型 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'], run_eagerly=(FLAGS.mode == 'eager_fit')) # 回调函数 callbacks = [ # lr衰减 ReduceLROnPlateau(verbose=1), # lr不变时停止训练 EarlyStopping(patience=3, verbose=1), # 保存模型 ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), # 训练结果可视化 TensorBoard(log_dir='logs', write_images=True, update_freq='batch') ] # 进行迭代训练 history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def nayanam(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) yolo = YoloV3(classes=80) yolo.load_weights(PATH_TO_WEIGHTS) print('weights loaded') class_names = [c.strip() for c in open(PATH_TO_CLASSES).readlines()] print('classes loaded') out = None fps = 0.0 count = 0 vid = cv2.VideoCapture(RTSP_URL) while (vid.isOpened()): try: _, img = vid.read() except: print("Empty frame") continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # running NMS boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Deepsort tracker called here tracker.predict() tracker.update(detections) #dump file set here # file = open(PATH_TO_RESULTS,'a+') for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] if VIDEO_DEBUG == 1: cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) s = str(track.track_id) + ',' + class_name + ',' + str(int( bbox[0])) + ',' + str(int(bbox[1])) + '\n' # file.write(s) print(s) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps = ", fps) # file.close() if VIDEO_DEBUG == 1: cv2.imshow('output', img) if cv2.waitKey(1) == 27: break signal.signal(signal.SIGINT, user_exit) vid.release() if VIDEO_DEBUG == 1: cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) segment_frame_num = 0 segment_id = 0 segment_frame = [] num_of_obeject = [] segment_scores = [] segment_nums = [] key_frame = [] image_id = 0 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('./data/video/town_summary.avi', fourcc, 20.0, (240, 120)) while True: if segment_frame_num == 20: key_num = np.argmax(segment_nums) print(key_num) #cv2.imwrite("./data/images/{}.jpg".format(image_id),segment_frame[key_num]) out.write(segment_frame[key_num]) key_frame.append(segment_frame[key_num]) segment_scores = [] segment_frame = [] segment_nums = [] segment_frame_num = 0 image_id += 1 _, img = vid.read() img = cv2.resize(img, (640, 480)) segment_frame.append(img) segment_frame_num += 1 if img is None: logging.warning("Empty Frame") time.sleep(0.1) break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) segment_scores.append(scores) segment_nums.append(nums) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms,num :{}".format( sum(times) / len(times) * 1000, nums), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) fps = 0.0 count = 0 ser = sl.Serial("COM3", 57600) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count+=1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) fps = ( fps + (1./(time.time()-t1)) ) / 2 img = draw_outputs(img, (boxes, scores, classes, nums), class_names) for i in range(nums[0]): #TEST THAT ONLY SENDS DETECTIONS THAT ARE PEOPLE (ANDREW AND JASONS CODE) if(classes[0][i] == 0 and scores[0][i] >= 0.90): angles, distances = get_angles_and_distances(img, boxes[0], i) #IF THEY ARE WITH A 10px SQUARE GO AHEAD AND SHOOT #IF WE WERE MOVING THE CAMERA/TURRET WE WOULD JUST SEND THE DISTANCES TO THE ARDUINO #AND LET IT FIGURE OUT WHAT TO DO if((distances[0] <= 10 and distances[0] >= -10) and (distances[1] <= 10 and distances[1] >= -10)): ser.write("SHOOT".encode()) print(ser.readline()) print(angles) print(distances) print("\n") img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): global detect_strat_frag # 2値化画像保存先フォルダのファイルの削除 if len(glob.glob(ostu_file_path + "*.png")) > 0: for p in glob.glob(ostu_file_path + "*.png", recursive=True): if os.path.isfile(p): os.remove(p) print("2値化画像を削除しました") else: print("2値化画像はありません") # 推定済画像保存先フォルダのファイルの削除 if len(glob.glob(output_file_path + "*.png")) > 0: for p in glob.glob(output_file_path + "*.png", recursive=True): if os.path.isfile(p): os.remove(p) print("推定済画像を削除しました") else: print("推定済画像はありません") # プログラムの実行開始時間 calculate_start_time = time.time() physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: for device in physical_devices: tf.config.experimental.set_memory_growth(device, True) print('{} memory growth: {}'.format( device, tf.config.experimental.get_memory_growth(device))) else: print("Not enough GPU hardware devices available") logging.info('YoloV3Tiny loading...') if flags_tiny: yolo = YoloV3Tiny(classes=num_classes) else: yolo = YoloV3(classes=num_classes) logging.info('YoloV3Tiny loaded') calculate_finish_time = time.time() print("YoloV3Tinyロード時間 (開始 <--> YoloV3Tinyロード):" + str(calculate_finish_time - calculate_start_time) + "秒" + "\n") yolo.load_weights(weights_path).expect_partial() logging.info('weights loaded') calculate_finish_time = time.time() print("学習モデルロード時間 (開始 <--> モデルロード):" + str(calculate_finish_time - calculate_start_time) + "秒" + "\n") class_names = [c.strip() for c in open(classes_path).readlines()] logging.info('classes loaded') calculate_finish_time = time.time() print("クラスファイルロード時間 (開始 <--> ファイルロード):" + str(calculate_finish_time - calculate_start_time) + "秒" + "\n") # 2値化画像保存先フォルダのファイルの数を読み込む dir_num = sum( os.path.isfile(os.path.join(ostu_file_path, name)) for name in os.listdir(ostu_file_path)) # 2値化画像保存先フォルダのファイルのパスを読み込む image_path = glob.glob(ostu_file_path + "*.png") # 自然順ソート image = [None] * dir_num count = 0 count_temp = 0 detect_time_temp = 0 detect_time = 0 for path in natsorted(image_path): image[count] = path # print(image[count]) count += 1 count = 0 # humanクラスの撮影回数 human_frame_sum = 0 # 推定の合計回数 detect_count = 0 # 推定結果保存用 detect_class = [None] * dir_num * 2 # 推定監視フラグがTrueになるまでループする while True: if detect_strat_frag == True: print("----------------推定開始----------------") break for i in range(dir_num): logging.info('input: {}'.format(image[i])) img_raw = tf.image.decode_image(open(image[i], 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, resize) # 画像1枚あたりの推定時間 t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) detect_time_temp = t2 - t1 detect_time = detect_time_temp + detect_time # 推定結果 logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) # 多数決用の配列 detect_class[count] = class_names[int(classes[0][i])] # 人間クラスと認識されたときに人間クラス撮影画像数を加算 if 'human' in detect_class[count]: detect_count += 1 human_temp = max(list(range(nums[0]))) + 1 human_frame_sum = 1 / human_temp + human_frame_sum count += 1 # 画像保存 img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) output = '/home/pi/yolov3-tf2-master/output_image/' + 'detect_{}.png'.format( count_temp) count_temp += 1 cv2.imwrite(output, img) logging.info('output saved to: {}'.format(output) + '\n') print("----------------推定終了----------------") print("画像1枚あたりに要した平均時間:" + str(detect_time / dir_num) + "秒" + "\n") # プログラムの実行終了時間 calculate_finish_time = time.time() print("物体検出に要した合計時間:" + str(calculate_finish_time - calculate_start_time) + "秒" + "\n") print("推定した合計枚数 : " + str(dir_num) + "\n") # 検出されたクラスを数えるため human_front_num = 0 human_right_num = 0 human_left_num = 0 none_class_num = 0 detect_dict = { 'human_front': 0, 'human_right': 0, 'human_left': 0, 'none_class': 0 } # humanクラスの合計 human_class_sum = 0 # 検出されたクラスを数える for i in range(dir_num): if detect_class[i] == 'human_front': human_front_num += 1 human_class_sum += 1 elif detect_class[i] == 'human_right': human_right_num += 1 human_class_sum += 1 elif detect_class[i] == 'human_left': human_left_num += 1 human_class_sum += 1 else: none_class_num += 1 detect_dict['human_front'] = human_front_num detect_dict['human_right'] = human_right_num detect_dict['human_left'] = human_left_num detect_dict['none_class'] = none_class_num print("推定された全てのクラス : " + str(detect_dict)) # 上位3クラスを選択 detect_res = [] detect_res = sorted(detect_dict.items(), key=lambda x: x[1], reverse=True)[:3] print("上位3クラス : " + str(detect_res) + "タイプ: " + str(type(detect_res))) print("最上位クラス : " + str(detect_res[0][0]) + "タイプ: " + str(type(detect_res[0]))) # 平均人数を計算 if human_class_sum == 0 or human_frame_sum == 0: class_avg = 0 # print("人間クラスは無い") else: class_avg = human_class_sum / human_frame_sum print("人間クラスと認識された画像内に存在する平均人数 : " + str((round((class_avg), 1)))) # サーバへセンサデータを送信 (WiFi経由) send_data = {} send_data['id'] = device_id send_data['class'] = detect_res[0][0] send_data['avg'] = class_avg send_data['date'] = time.time() * 1000 upload_func(send_data)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) check_weighs_exist(tiny=FLAGS.tiny) if FLAGS.tiny: model = YoloV3Tiny( FLAGS.size, training=True, classes=FLAGS.num_classes ) model.summary() plot_model(model, to_file='yoloV3Tiny-model-plot.png', show_shapes=True, show_layer_names=True) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3( FLAGS.size, training=True, classes=FLAGS.num_classes ) model.summary() plot_model(model, to_file='yoloV3-model-plot.png', show_shapes=True, show_layer_names=True) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # Load the dataset train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset( file_pattern=FLAGS.dataset, class_file=FLAGS.classes, size=FLAGS.size ) # Shuffle the dataset train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size, reshuffle_each_iteration=True) train_dataset_length = [i for i, _ in enumerate(train_dataset)][-1] + 1 print(f"Dataset for training consists of {train_dataset_length} images.") train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat() train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset( FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset_length = [i for i, _ in enumerate(val_dataset)][-1] + 1 print(f"Dataset for validation consists of {val_dataset_length} images.") val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat() # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny( size=FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights_tf_format_tiny) else: model_pretrained = YoloV3( size=FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights_tf_format) if FLAGS.transfer == 'darknet': # Set yolo darknet layer weights to the loaded pretrained model weights model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) # Freeze these layers freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for i in model.layers: if not i.name.startswith('yolo_output'): i.set_weights(model_pretrained.get_layer( i.name).get_weights()) freeze_all(i) else: # All other transfer require matching classes if FLAGS.tiny: model.load_weights(FLAGS.weights_tf_format_tiny) else: model.load_weights(FLAGS.weights_tf_format) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) # Use the Adam optimizer with the specified learning rate optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) # YoloLoss function loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks] if FLAGS.mode == 'eager_tf': print(f"Mode is: {FLAGS.mode}") # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info(f"epoch_{epoch}_train_batch_{batch}," f"{total_loss.numpy()}," f"{list(map(lambda x: np.sum(x.numpy()), pred_loss))}") avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss))) ) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf') else: print(f"Compiling the model") model.compile( optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit'), metrics=['accuracy']) callbacks = [ EarlyStopping(monitor='val_loss', patience=125, verbose=1), ReduceLROnPlateau(monitor='val_loss', verbose=1, factor=0.90, min_lr=0, patience=20, mode="auto"), ModelCheckpoint( str(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf'), verbose=1, save_weights_only=True, save_best_only=True, mode="auto", ), TensorBoard(log_dir='logs'), CSVLogger(f'checkpoints/logs/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}', separator=',') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, verbose=2, callbacks=callbacks, validation_data=val_dataset, steps_per_epoch=np.ceil(train_dataset_length / FLAGS.batch_size), validation_steps=np.ceil(val_dataset_length / FLAGS.batch_size))
def main(args): image_size = 416 # 416 num_epochs = args.epochs batch_size = args.batch_size learning_rate = 1e-3 num_classes = args.num_classes # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = args.num_weight_class valid_path = args.valid_dataset weights_path = args.weights # Path to text? file containing all classes, 1 per line classes = args.classes anchors = yolo_anchors anchor_masks = yolo_anchor_masks val_dataset = dataset.load_tfrecord_dataset(valid_path, classes, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) model = YoloV3(image_size, training=True, classes=num_classes) # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) if transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) predictions = [] evaluator = Evaluator(iou_thresh=args.iou) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) # boxes -> (num_imgs, num_detections, box coords) # Full labels shape -> [num_batches, grid scale, imgs] # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]] full_labels = np.asarray([label for _, label in val_dataset]) # Shape -> [num_batches, num_imgs_in_batch, 3] # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]] full_labels_trans = full_labels.transpose(0, 2, 1) full_labels_flat = [] for batch in full_labels_trans: for img in batch: row = [] for scale in img: row.append(scale) full_labels_flat.append(row) # Shape -> [num_imgs x 3] full_labels_flat = np.asarray(full_labels_flat) # Remove any labels consisting of all 0's filt_labels = [] # for img in range(len(full_labels_flat)): for img in full_labels_flat: test = [] # for scale in full_labels_flat[img]: for scale in img: lab_list = [] for g1 in scale: for g2 in g1: for anchor in g2: if anchor[0] > 0: temp = [ anchor[0] * image_size, anchor[1] * image_size, anchor[2] * image_size, anchor[3] * image_size, anchor[4], anchor[5] ] temp = [float(x) for x in temp] lab_list.append(np.asarray(temp)) test.append(np.asarray(lab_list)) filt_labels.append(np.asarray(test)) filt_labels = np.asarray( filt_labels ) # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]] # filt_labels = filt_labels[:, :4] * image_size # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filt_labels, images) # Check gts box coords confidence_thresholds = np.linspace(0.1, 1, 15) confidence_thresholds = [0.5] all_tp_rates = [] all_fp_rates = [] # Compute ROCs for above range of thresholds # Compute one for each class vs. the other classes for index, conf in enumerate(confidence_thresholds): tp_of_img = [] fp_of_img = [] all_classes = [] tp_rates = {} fp_rates = {} boxes, scores, classes, num_detections = model.predict(val_dataset) # Full labels shape -> [num_batches, grid scale, imgs] # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]] full_labels = np.asarray([label for _, label in val_dataset]) # Shape -> [num_batches, num_imgs_in_batch, 3] # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]] full_labels_trans = full_labels.transpose(0, 2, 1) full_labels_flat = [] for batch in full_labels_trans: for img in batch: row = [] for scale in img: row.append(scale) full_labels_flat.append(row) # Shape -> [num_imgs x 3] full_labels_flat = np.asarray(full_labels_flat) # Remove any labels consisting of all 0's filt_labels = [] # for img in range(len(full_labels_flat)): for img in full_labels_flat: test = [] # for scale in full_labels_flat[img]: for scale in img: lab_list = [] for g1 in scale: for g2 in g1: for anchor in g2: if anchor[0] > 0: temp = [ anchor[0] * image_size, anchor[1] * image_size, anchor[2] * image_size, anchor[3] * image_size, anchor[4], anchor[5] ] temp = [float(x) for x in temp] lab_list.append(np.asarray(temp)) test.append(np.asarray(lab_list)) filt_labels.append(np.asarray(test)) filt_labels = np.asarray( filt_labels ) # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]] # filt_labels = filt_labels[:, :4] * image_size # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filt_labels, images) # Check gts box coords classes = list(set(r['class_ids'])) # All unique class ids for c in classes: if c not in all_classes: all_classes.append(c) complete_classes = dataset_val.class_ids[1:] # Need TPR and FPR rates for each class versus the other classes # Recall == TPR tpr = utils.compute_ap_indiv_class(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'], complete_classes) total_fpr = utils.compute_fpr_indiv_class(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'], complete_classes) # print(f'For Image: TPR: {tpr} -- FPR: {total_fpr}') tp_of_img.append(tpr) fp_of_img.append(total_fpr) all_classes = dataset_val.class_ids[1:] # Need to get average TPR and FPR for number of images used for c in all_classes: tp_s = 0 for item in tp_of_img: if c in item.keys(): tp_s += item[c] else: tp_s += 0 tp_rates[c] = tp_s / len(image_ids) # tp_rates[c] = tp_s # print(tp_rates) for c in all_classes: fp_s = 0 for item in fp_of_img: if c in item.keys(): fp_s += item[c] else: fp_s += 0 fp_rates[c] = fp_s / len(image_ids) # fp_rates[c] = fp_s all_fp_rates.append(fp_rates) all_tp_rates.append(tp_rates) print(f'TP Rates: {all_tp_rates}') print(f'FP Rates: {all_fp_rates}')
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 inputIndex = 0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 20, 30)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name == "Person" or class_name == "person": inputIndex += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) ####### im = img[int(int(bbox[1])):int(int(bbox[3])), int(int(bbox[0])):int(int(bbox[2]))] ####### cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) ############## #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Cropped"+str(inputIndex)+".png", im) color = ('b', 'g', 'r') cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ## for channel, col in enumerate(color): # for histogram ## ax1 = plt.subplot(1, 1, 1) #ax2 = plt.subplot(1, 2, 2) ## histr = cv2.calcHist([im], [channel], None, [256], [0, 256]) #plt.plot(histr, color=col) #plt.plot(histr) #plt.xlim([0, 256]) #plt.title('Histogram for color scale picture') ## plt.axis('off') ## ax1.imshow(im) #ax2.plot(histr) ## plt.savefig("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\APlot"+str(inputIndex)+".png"); #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Final"+str(inputIndex)+".png", im) ################### ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() print('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] print('classes loaded') if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: raw_images = [] images = FLAGS.images for image in images: img_raw = tf.image.decode_image(open(image, 'rb').read(), channels=3) height = img_raw.shape[0] width = img_raw.shape[1] raw_images.append(img_raw) num = 0 print("raw image :", raw_images) for raw_img in raw_images: num += 1 img_in = tf.expand_dims(raw_img, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img_in) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) print('detections:') tot = 0 for i in range(nums[0]): if (class_names[int(classes[0][i])] == 'person'): tot += 1 print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) #identity only persons ind = np.where(classes[0] == 0)[0] # print(ind) #identify bounding box of only persons boxes1 = np.array(boxes) person = boxes1[0][ind] #total no. of persons num = len(person) img = cv2.imread('img.png') midpoints = [ mid_point(img, person, i, height, width) for i in range(tot) ] heights_of_people = [ height_dist(img, person, i, height, width) for i in range(tot) ] print("\n\nHeights :", heights_of_people) print("Avg height : ", ) if (len(heights_of_people) != 0): avg = sum(heights_of_people) / len(heights_of_people) print("\n\nMidpoints:", midpoints) dist = compute_distance(midpoints, tot) print("\n\ndistance : ", dist) if avg >= 100: avg = avg * 0.85 thresh = avg p1, p2, d = find_closest(dist, tot, thresh) for i in range(len(p1)): cv2.line(img, midpoints[p1[i]], midpoints[p2[i]], (88, 43, 237), 2) img, count = change_2_red(img, person, p1, p2, height, width) df = pd.DataFrame({"p1": p1, "p2": p2, "dist": d}) print(df) total_interaction = int((tot * (tot - 1)) / 2) faulty_interaction = len(p1) sd_index = (faulty_interaction / total_interaction) * 100 print(sd_index) overlay = img.copy() output = img.copy() img = cv2.rectangle(overlay, (0, 0), (0 + (len("Not following : 100")) * 17, 80), (0, 0, 0), -1) img = cv2.putText(img, "Total People : {:.0f}".format(tot), (0, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255), 1) # img = cv2.putText(img, "Following : {:.0f}".format(tot-count), (0 , 60), # cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 1) img = cv2.putText(img, "Not Following : {:.0f}".format(count), (0, 60), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255), 1) alpha = 0.5 cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output) cv2.imshow('output', output) key = cv2.waitKey(20000) if key == 27: #if ESC is pressed, exit loop cv2.destroyAllWindows() # cv2.imwrite(FLAGS.output + 'detection_avg_changing' + '.jpg', img) print("height : ", height) print("width : ", width)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset( FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset( FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny( FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3( FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights(model_pretrained.get_layer( l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes print('yes') model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights( 'checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
import time from absl import app, flags, logging from absl.flags import FLAGS import cv2 import tensorflow as tf from yolov3_tf2.models import (YoloV3, YoloV3Tiny) from yolov3_tf2.dataset import transform_images from yolov3_tf2.utils import draw_outputs import subprocess as sp import numpy yolo = YoloV3() yolo.load_weights("./checkpoints/yolov3.tf") logging.info('weights loaded') class_names = [c.strip() for c in open("./data/coco.names").readlines()] logging.info('classes loaded') times = [] # Added FFMPEG stuff FFMPEG_BIN = "ffmpeg" command = [ FFMPEG_BIN, '-i', 'fifo264', # fifo is the named pipe '-pix_fmt', 'bgr24', # opencv requires bgr24 pixel format. '-vcodec',
def main(_argv): # Definition of the parameters rospy.init_node('tracker', anonymous=True) rospy.Subscriber("/new_image_raw", Image, callback) max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') #vid = cv2.VideoCapture(0) # vid = cv_image #cv_image = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = 720 height = 862 fps = 2 codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: #_, img = vid.read() img = cv_image #cv2.imshow("loading image", cv_image) #image is comming over here fine if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break print("shape = {} , ".format(img.shape)) print("dtype = {} , ".format(img.dtype)) #img = np.array(img, dtype=np.uint16) img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in, steps=1) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) #cv2.imshow("output", img.astype('float32')) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break # vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks post_process_outputs = post_process_block(model.outputs, classes=FLAGS.num_classes) post_process_model = Model(model.inputs, post_process_outputs) train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), y)) # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size), y)) # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] # (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) # model.outputs shape: [[N, 13, 13, 3, 85], [N, 26, 26, 3, 85], [N, 52, 52, 3, 85]] # labels shape: ([N, 13, 13, 3, 6], [N, 26, 26, 3, 6], [N, 52, 52, 3, 6]) if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] transf_labels = dataset.transform_targets( labels, anchors, anchor_masks, FLAGS.size) for output, label, loss_fn in zip(outputs, transf_labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss, axis=None) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) log_batch(logging, epoch, batch, total_loss, pred_loss) avg_loss.update_state(total_loss) if batch >= 100: break true_pos_total = np.zeros(FLAGS.num_classes) false_pos_total = np.zeros(FLAGS.num_classes) n_pos_total = np.zeros(FLAGS.num_classes) for batch, (images, labels) in enumerate(val_dataset): # get losses outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] transf_labels = dataset.transform_targets( labels, anchors, anchor_masks, FLAGS.size) for output, label, loss_fn in zip(outputs, transf_labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss log_batch(logging, epoch, batch, total_loss, pred_loss) avg_val_loss.update_state(total_loss) # get true positives, false positives, and positive labels preds = post_process_model(images) true_pos, false_pos, n_pos = batch_true_false_positives( preds.numpy(), labels.numpy(), FLAGS.num_classes) true_pos_total += true_pos false_pos_total += false_pos n_pos_total += n_pos if batch >= 20: break # precision-recall by class precision, recall = batch_precision_recall(true_pos_total, false_pos_total, n_pos_total) for c in range(FLAGS.num_classes): print('Class {} - Prec: {}, Rec: {}'.format( c, precision[c], recall[c])) # total precision-recall print('Total - Prec: {}, Rec: {}'.format( calc_precision(np.sum(true_pos_total), np.sum(false_pos_total)), calc_recall(np.sum(true_pos_total), np.sum(n_pos_total)))) import pdb pdb.set_trace() # log losses logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) # reset loop and save weights avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights( os.path.join(FLAGS.checkpoint_dir, 'yolov3_train_{}.tf'\ .format(epoch))) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint(os.path.join(FLAGS.checkpoint_dir, 'yolov3_train_{epoch}.tf'), verbose=1, save_weights_only=True), TensorBoard(log_dir=FLAGS.log_dir) ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
classes_path = './data/labels/coco.names' weights_path = './weights/yolov3.tf' tiny = False # set to True if using a Yolov3 Tiny model size = 416 # size images are resized to for model output_path = 'static/detections/' # path to output folder where images with detections are saved num_classes = 80 # number of classes in model # load in weights and classes physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if tiny: yolo = YoloV3Tiny(classes=num_classes) else: yolo = YoloV3(classes=num_classes) yolo.load_weights(weights_path).expect_partial() print('weights loaded') class_names = [c.strip() for c in open(classes_path).readlines()] print('classes loaded') APP_ROOT =os.path.dirname(os.path.abspath(__file__)) upload = os.getcwd() + '/uploads/' app = Flask(__name__) dropzone = Dropzone(app) video_camera = None global_frame = None
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] # Connect to the broker broker = "ampq://*****:*****@10.10.2.1:30000" channel = Channel(broker) # Subscribe to the desired topic subscription = Subscription(channel) camera_id = "CameraGateway." + FLAGS.camera + ".Frame" subscription.subscribe(topic=camera_id) #fourcc = cv2.VideoWriter_fourcc(*'MJPG') #fourcc = cv2.VideoWriter_fourcc(*'XVID') fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out = cv2.VideoWriter(FLAGS.output, fourcc, 5.0, (1288, 728)) for i in range(FLAGS.nframes): msg = channel.consume() img = msg.unpack(Image) img = get_np_image(img) img_to_draw = img #img = tf.image.decode_image(img, channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img) t2 = time.time() times.append(t2 - t1) times = times[-20:] for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) rects = get_rects(img_to_draw, (boxes, scores, classes, nums)) img_to_draw = draw_outputs(img_to_draw, (boxes, scores, classes, nums), class_names) objects = centroidTracker.update(rects) # loop over the tracked objects for (objectID, centroid) in objects.items(): # draw both the ID of the object and the centroid of the # object on the output frame text = "{}".format(objectID) cv2.putText(img_to_draw, text, (centroid[0], centroid[1]), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 240, 0), 4) #cv2.circle(frame, (centroid[0], centroid[1]), 3, (0, 255, 0), -1) out.write(img_to_draw) out.release()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=total_number_of_logos) # number of classes/logos, needs to be updated if another logo is added yolo.load_weights('./weights/yolov3-custom.tf').expect_partial() # file path to weights class_names = [c.strip() for c in open('./data/labels/custom.names').readlines()] # file path to classes list, needs to be updated if another logo is added if FLAGS.count: count = FLAGS.count excel = [] images = [] for i in range(count): con = convert_from_path('data/pdf/test (' + str(i+1) + ').pdf', output_folder='data/images', fmt="jpg", single_file=True, output_file='test (' + str(i+1) + ')') excel.append('data/excel/test (' + str(i+1) + ').xlsx') images.append('data/images/test (' + str(i+1) + ').jpg') raw_images = [] for image in images: img_raw = tf.image.decode_image( open(image, 'rb').read(), channels=3) raw_images.append(img_raw) i = 0 # index number for main loop logos = [] # list of detected logos for each image approvals = [] # list of excel data for each image for raw_img in raw_images: img = tf.expand_dims(raw_img, 0) img = transform_images(img, 416) # image size t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite('./detections/detection (' + str(i+1) + ').jpg', img) # image output # LABEL EXTRACTION temp_names = [] # temporary list for each image's logo detections for j in range(nums[0]): repeat = True temp_pair = [] # temporary list for each logo and its status if (j > 0): for k in range(len(temp_names)): if (class_names[int(classes[0][j])] == temp_names[k][0]): repeat = False break if (repeat): # if not a repeated logo, update main logo list temp_pair.append(class_names[int(classes[0][j])]) # append logo temp_pair.append(False) # append status temp_names.append(temp_pair) # append pair logos.append(temp_names) # append names list to main logo list # EXCEL EXTRACTION wb = load_workbook(excel[i]) sheet = wb.active rows = sheet.max_row temp_sheet = [] # temporary list for each image's excel data for j in range(rows-1): temp_rows = [] # temporary list for each row's excel data temp_rows.append(str(sheet.cell(row=j+2, column=4).value).upper().strip()) temp_rows.append(str(sheet.cell(row=j+2, column=5).value).upper().strip()) temp_rows.append("00FF0000") # Red by default temp_sheet.append(temp_rows) approvals.append(temp_sheet) # append sheet list to main approvals list # EXCEL TRANSLATION for j in range(len(approvals[i])): if (approvals[i][j][0] in extola): temp_trans = extola[approvals[i][j][0]] else: temp_trans = ["NAL"] # No Associated Logo approvals[i][j][0] = temp_trans # EXCEL COMPARED TO LABEL # "APPROVAL STATUS" "On label" "Not on label" # "APPROVED" "Green" "Red" # "NO REQUIREMENTS" "Red" "Green" # "APPROVAL NOT APPLICABLE" "Red" "Green" # "APPROVAL NOT REQUIRED" "Red" "Green" # "CONTACT CISCO PARTNER/IOR" "Red" "Green" # "NOT APPROVED" "Red" "Green" # "PENDING" "Red" "Green" # "RENEWAL IN PROGESS" "Red" "Green" # "NONE"/"UNKNOWN" "Red" "Red" # # "00FF0000" (Red) needs attention # "0000FF00" (Green) good to go # "000000FF" (Blue) missing logo # for j in range(len(approvals[i])): flag = True k = 0 temp_count = 0 while (flag): if (k == len(logos[i])): # logo not on label flag = False if (approvals[i][j][1] == "APPROVED"): approvals[i][j][2] = "00FF0000" # Red elif ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" elif (approvals[i][j][0][0] == "NAL"): # no logo to detect flag = False if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" else: # continue or logo on label for X in range(len(approvals[i][j][0])): if (approvals[i][j][0][X] == logos[i][k][0]): # logo on label logos[i][k][1] = True temp_count+=1 if (temp_count == len(approvals[i][j][0])): flag = False if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")): approvals[i][j][2] = "00FF0000" # Red elif ((temp_count == len(approvals[i][j][0]))and(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")): approvals[i][j][2] = "0000FF00" # Green elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")): approvals[i][j][2] = "00FF0000" # Red sheet.cell(row=j+2, column=5).value = "Unknown" k+=1 sheet.cell(row=j+2, column=5).fill = PatternFill(start_color=approvals[i][j][2], end_color=approvals[i][j][2], fill_type='solid') # LABEL COMPARED TO EXCEL new_row=1 for j in range(len(logos[i])): if (logos[i][j][1] == False): # not on excel so add it in a new row sheet.cell(row=new_row+rows, column=1).value = str(sheet.cell(row=rows, column=1).value) #1 Product Name sheet.cell(row=new_row+rows, column=3).value = str(sheet.cell(row=rows, column=3).value) #3 Desc sheet.cell(row=new_row+rows, column=4).value = logos[i][j][0] #4 Country sheet.cell(row=new_row+rows, column=5).value = "Unknown" #5 Approval Status sheet.cell(row=new_row+rows, column=5).fill = PatternFill(start_color="000000FF", end_color="000000FF", fill_type='solid') #5 Blue for k in range(5): sheet.cell(row=new_row+rows, column=k+6).value = str(sheet.cell(row=rows, column=k+6).value) #6-10 new_row+=1 wb.save(excel[i]) i+=1 # DISPLAY for j in range(i): print("\nL" + str(j+1) + ": ", end="") temp_print = [] for k in range(len(logos[j])): temp_print.append(logos[j][k][0]) print(temp_print, "\nE" + str(j+1) + ": ", end="") temp_print = [] for k in range(len(approvals[j])): temp_print.append(approvals[j][k][0]) print(temp_print) print("")
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.model_name: logging.info("loading model %s" % FLAGS.model_name) yolo = tf.saved_model.load(FLAGS.model_name) logging.info("model loaded") else: if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') nfp = 0 npp = 0 nxp = 0 true_positive = 0 false_positive = 0 false_negative = 0 ds = FLAGS.dataset if FLAGS.save == 'all': records = ["data/%s.test.record" % ds, "data/%s.train.record" % ds, "data/%s.val.record" % ds] else: records = ["data/%s.test.record" % ds] for raw in tf.data.TFRecordDataset(records): record = tf.train.Example() record.ParseFromString(raw.numpy()) name = record.features.feature['image/filename'].bytes_list.value[0].decode("utf-8") fn = name[name.index('/') + 1:] if not os.path.exists('data/' + name): continue print("Record", name) img = tf.image.decode_image(open('data/' + name, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) wh = np.flip(img.shape[0:2]) xmin = record.features.feature['image/object/bbox/xmin'].float_list.value xmax = record.features.feature['image/object/bbox/xmax'].float_list.value ymin = record.features.feature['image/object/bbox/ymin'].float_list.value ymax = record.features.feature['image/object/bbox/ymax'].float_list.value xx = [] yy = [] for i in range(0, len(xmin)): xx.append(wh[0] * (xmin[i] + xmax[i]) / 2) yy.append(wh[1] * (ymin[i] + ymax[i]) / 2) if FLAGS.model_name: out = yolo(img) if FLAGS.tiny: boxes, scores, classes, nums = transformOutputTiny(out[0], out[1]) else: boxes, scores, classes, nums = transformOutput(out[0], out[1]) else: boxes, scores, classes, nums = yolo(img) doc = np.zeros(len(xx)) retrive = np.zeros(nums[0]) threshold = 20 nfp += 1 if nums[0] > 0: npp += 1 img = cv2.imread('data/' + name) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) ofn = 'data/predict/' + fn if FLAGS.save == 'all' or FLAGS.save == 'test': cv2.imwrite(ofn, img) logging.info('output saved to: {}'.format(ofn)) for i in range(0, len(xx)): for j in range(nums[0]): if classes[0][j] != i: continue x1y1 = (np.array(boxes[0][j][0:2]) * wh).astype(np.int32) x2y2 = (np.array(boxes[0][j][2:4]) * wh).astype(np.int32) x = (x1y1[0] + x2y2[0]) / 2 y = (x1y1[1] + x2y2[1]) / 2 d = math.sqrt(math.pow((x - xx[i]), 2) + math.pow((y - yy[i]), 2)) if d < threshold: doc[i] = 1 retrive[j] = 1 break tp = sum(doc) fn = len(doc) - sum(doc) fp = len(retrive) - sum(retrive) true_positive += tp false_positive += fp false_negative += fn print("TP: %d, FP: %d, FN: %d" % (tp, fp, fn)) if FLAGS.save == 'all': logging.info('detections') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) if nums[0] == 4: nxp += 1 print("%d processed. %d some prediction. %d (%1.0f %%) has complete prediction" % (nfp, npp, nxp, 100 * nxp / nfp)) precision = true_positive / (true_positive + false_positive) recall = true_positive / (true_positive + false_negative) print("Precision %1.2f %%, Recall: %1.2f %%" % (100 * precision, 100 * recall))