예제 #1
0
def main(_argv):
    start = timer()
    #timerStart = time.time()
    #basePath = "D:\MMichenthaler\VideoFrames\Video2\Video2_frame1000.jpg"
    #base = cv2.imread(basePath)
    allClimbers = []
    climbersThisPic = []
    #holds = hold_marker(base)
    #print(holds)
    print(FLAGS.colorMask)
    #------------------------------------------
    # Dieser Part ist für die Detection mittels Yolo, da das nicht ausreichend funktioniert wird er hier nun bis auf
    # weiteres auskommentiert gelassen
    #---------------------------------------------

    if FLAGS.baseline and FLAGS.holdsDetection is True:
        base_raw = tf.image.decode_image(open(FLAGS.baseLine, 'rb').read(),
                                         channels=3)
        base = tf.expand_dims(base_raw, 0)
        base = transform_images(base, FLAGS.size)

        t1 = time.time()
        baseBoxes, baseScores, BaseClasses, BaseNums = yolo(base)
        t2 = time.time()
        logging.info('time: {}'.format(t2 - t1))

        logging.info('detections:')
        for i in range(nums[0]):
            logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                               np.array(scores[0][i]),
                                               np.array(boxes[0][i])))

        base = cv2.cvtColor(base_raw.numpy(), cv2.COLOR_RGB2BGR)
        base = draw_outputs(base,
                            (baseBoxes, baseScores, BaseClasses, BaseNums),
                            class_names)  # detection used on  Baseline Img
        cv2.imwrite(FLAGS.output + 'baselineImg.jpg', base)
        logging.info('Baseline set and saved to: {}'.format(FLAGS.output) +
                     str(count))
    # using a separate detector for holds on the bare wall image to set a baseline and saving the results

    elif os.path.isfile(FLAGS.CSVpath + "holds.csv"):
        with open(FLAGS.CSVpath + "holds.csv", "r") as file:
            holdsSt = []
            stHolds = list(csv.reader(file, delimiter=','))
            # print(stHolds)
            for elem in stHolds:
                for elem2 in elem:
                    elem3 = elem2.replace('[', '').replace(']', '')
                    holdsSt.append(elem3.split(','))

            holds = [list(map(int, rec)) for rec in holdsSt]
            logging.info('holds loaded')

    elif FLAGS.baseline and FLAGS.CSVpath:  # dieser code ist um die Griffe in einem Gui zu markieren
        base = cv2.imread(FLAGS.baseline)
        holds = hold_marker(base, FLAGS.CSVpath)
        print(holds)
    '''
    holds = [[843, 2692, 992, 2835],  # holds für newVideo1
             [712, 2516, 891, 2644],
             [879, 2409, 1061, 2519],
             [787, 2039, 924, 2132],
             [912, 1875, 1025, 1971],
             [775, 1795, 888, 1887],
             [1013, 1705, 1120, 1798],
             [819, 1392, 933, 1520],
             [1028, 1112, 1141, 1199],
             [849, 1079, 959, 1213],
             [807, 909, 941, 987],
             [956, 713, 1037, 799],
             [864, 602, 950, 689],
             [1022, 584, 1129, 671],
             [903, 408, 986, 495],
             [1010, 280, 1123, 367]]




    

             
    holds = [[555, 1253, 594, 1288],            # für weitere Testungen die mit hold_marker markierten Griffe von Video2 des alten datensatzes
             [588, 1178, 627, 1215],
             [584, 1107, 626, 1141],
             [579, 1035, 631, 1075],
             [584, 967, 618, 994],
             [545, 862, 599, 908],
             [524, 830, 570, 873],
             [487, 755, 565, 828],
             [512, 680, 584, 741],
             [526, 611, 597, 681],
             [561, 550, 617, 588],
             [532, 489, 586, 528],
             [622, 404, 667, 449],
             [585, 378, 616, 400],
             [531, 392, 565, 420],
             [523, 319, 565, 366],
             [468, 276, 503, 305],
             [531, 177, 579, 223],
             [452, 103, 495, 146]]
    

    holds = [[354, 1252, 447, 1323],        # für weitere Testungen die mit hold_marker markierten Griffe von Video2 des neuen Datensatzes
             [432, 1347, 492, 1397],
             [518, 1291, 569, 1349],
             [439, 1206, 528, 1260],
             [395, 1113, 470, 1161],
             [531, 1069, 579, 1105],
             [540, 988, 581, 1027],
             [396, 1023, 461, 1068],
             [459, 937, 513, 983],
             [321, 964, 389, 1033],
             [318, 845, 363, 899],
             [465, 766, 511, 820],
             [314, 772, 367, 809],
             [357, 700, 413, 740],
             [475, 636, 520, 675],
             [374, 612, 437, 679],
             [500, 600, 542, 627],
             [424, 543, 481, 602],
             [516, 560, 567, 596],
             [568, 488, 609, 520],
             [403, 456, 469, 493],
             [473, 424, 532, 462],
             [476, 358, 517, 399],
             [414, 382, 452, 424],
             [515, 295, 564, 334],
             [455, 327, 494, 366],
             [450, 206, 495, 245],
             [514, 235, 558, 273],
             [496, 103, 459, 144],
             [514, 180, 560, 216]]
    '''
    # auskommentieren weil flags colormask nicht funktioniert
    print(FLAGS.colorMask)
    if os.path.isfile(FLAGS.CSVpath +
                      'colors.csv') and FLAGS.colorMask is True:
        with open(FLAGS.CSVpath + 'colors.csv', "r") as file:
            StColor = list(csv.reader(file, delimiter=','))
            color = [list(map(int, rec)) for rec in StColor]
            color = color[0]
            logging.info('color loaded ' + str(color))

    elif FLAGS.colorMask is True:
        color = color_picker(FLAGS.baseline, FLAGS.CSVpath)

    if FLAGS.detection is True:
        physical_devices = tf.config.experimental.list_physical_devices('GPU')
        for physical_device in physical_devices:
            tf.config.experimental.set_memory_growth(physical_device, True)

        if FLAGS.tiny:
            yolo = YoloV3Tiny(classes=FLAGS.num_classes)
        else:
            yolo = YoloV3(classes=FLAGS.num_classes)

        yolo.load_weights(FLAGS.weights).expect_partial()
        logging.info('weights loaded')

        class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
        logging.info('classes loaded')

        # if FLAGS.tfrecord:
        #     dataset = load_tfrecord_dataset(
        #         FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
        #     dataset = dataset.shuffle(512)
        #     img_raw, _label = next(iter(dataset.take(1)))
        #     img = tf.expand_dims(img_raw, 0)
        #     img = transform_images(img, FLAGS.size)
        #
        #     t1 = time.time()
        #     boxes, scores, classes, nums = yolo(img)
        #     t2 = time.time()
        #     logging.info('time: {}'.format(t2 - t1))
        #
        #     logging.info('detections:')
        #     for i in range(nums[0]):
        #         logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
        #                                            np.array(scores[0][i]),
        #                                            np.array(boxes[0][i])))
        #         allhands.append(boxes[0][i])
        #
        #     img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
        #     img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        #     cv2.imwrite(FLAGS.output, img)
        #     logging.info('output saved to: {}'.format(FLAGS.output))
        # print(sorted(os.listdir(FLAGS.imDir), key=lambda x: int(x[15:-5])))

        if FLAGS.imDir:  # für Detection auf allen bildern in imDir
            climberCounter = 0
            makeNumbered = False
            #for count, dirImg in enumerate(Path(FLAGS.imDir).iterdir()):                    # , key=lambda x: int(x[69:-4]) beim key die anzahl der Zeichen des Paths angeben der vor der nummerierung steht
            for count, dirImg in enumerate(
                    sorted(os.listdir(FLAGS.imDir),
                           key=lambda x: int(x[16:-4]))
            ):  # ANPASSEN WENN UNTER ODER ÜBER 10 auf 16
                #climbDetect = False

                #img_raw = tf.image.decode_image(
                #   open(dirImg, 'rb').read(), channels=3)

                img_raw = cv2.imread(FLAGS.imDir + dirImg)
                img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)

                img = tf.expand_dims(img_raw, 0)
                img = transform_images(img, FLAGS.size)

                t1 = time.time()
                boxes, scores, classes, nums = yolo(img)
                t2 = time.time()
                logging.info('time: {}'.format(t2 - t1))

                logging.info('detections:')
                img = cv2.cvtColor(img_raw, cv2.COLOR_RGB2BGR)

                for i in range(nums[0]):
                    #print(np.array(boxes[0][i]))
                    logging.info('\t{}, {}, {}'.format(
                        class_names[int(classes[0][i])],
                        np.array(scores[0][i]), np.array(boxes[0][i])))
                    if class_names[int(classes[0][i])] == "person":

                        climbersThisPic.append(
                            np.array(boxes[0][i])
                        )  # saving all detected climbers in allclimbers------ für testung auskommentieren PROBLEM mit 2 personen in bild

                if not os.listdir(
                        FLAGS.numberedSource) or makeNumbered is True:
                    makeNumbered = True  #if climbDetect is False:                            #+----
                    cv2.imwrite(
                        FLAGS.numberedSource + 'PhotoNr_' +
                        str(climberCounter) + '.jpg', img
                    )  #| Mit neuem Datensatz einmal diesen block unkommentiert mitlaufen lassen
                    #DIESE ZEILEN FÜR NUMBERED                                                            #| dieser Block speichert jeden frame nummeriert ab, nicht nur jeden in dem eine person vorkommt,
                    climberCounter += 1  #| da sonst bei versagen des yolo keine möglichkeit besteht die gegriffen erkennung durchzuführen
                    #+----

                yOnes = []
                for j in range(len(climbersThisPic)):
                    #print('\t{}'.format(np.array(climbersThisPic[j][1:2])))
                    yOnes.append(climbersThisPic[j][1:2])
                if yOnes:
                    allClimbers.append(
                        climbersThisPic[np.argmin(yOnes)]
                    )  # die Person mit der BB mit dem geringeren y1 wert, also die die weiter oben ist,
                    # wird als Kletterer in allClimbers gespeichert
                else:
                    allClimbers.append([0, 0, 10e-12, 10e-12])

                climbersThisPic = []

                # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
                # cv2.imwrite(FLAGS.numberedSource + str(count) + '.jpg', img)
                img = draw_persons(img, (boxes, scores, classes, nums),
                                   class_names)
                cv2.imwrite(FLAGS.output + str(count) + '.jpg', img)
                logging.info('output saved to: {}'.format(FLAGS.output) +
                             str(count))

            f = open("climbers.txt", "w")
            f.write(str(allClimbers))

#------------------------------------------------------------------------------------------ momentan nicht in verwendung
        elif FLAGS.video is True:  # für Detection am cam feed oder uaf dem Video allen
            frameWidth = 1080
            frameHeight = 1920
            if FLAGS.cam is True:
                cap = cv2.VideoCapture(0)
            else:
                cap = cv2.VideoCapture(FLAGS.vidFile)
            cap.set(3, frameWidth)
            cap.set(4, frameHeight)
            #cap.set(10, 150)
            while True:
                success, img = cap.read()
                imgResult = img.copy()
                imgResult = tf.expand_dims(img_raw, 0)
                imgResult = transform_images(img, FLAGS.size)

                t1 = time.time()
                boxes, scores, classes, nums = yolo(imgResult)
                t2 = time.time()
                logging.info('time: {}'.format(t2 - t1))

                logging.info('detections:')
                for i in range(nums[0]):
                    logging.info('\t{}, {}, {}'.format(
                        class_names[int(classes[0][i])],
                        np.array(scores[0][i]), np.array(boxes[0][i])))
                    allhands.append(np.array(
                        boxes[0][i]))  # saving all detected hands in allhands

                img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
                img = draw_outputs(img, (boxes, scores, classes, nums),
                                   class_names)
                cv2.imshow("Video", imgResult)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        else:
            img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(),
                                            channels=3)
            img = tf.expand_dims(img_raw, 0)
            img = transform_images(img, FLAGS.size)

            t1 = time.time()
            boxes, scores, classes, nums = yolo(img)
            t2 = time.time()
            logging.info('time: {}'.format(t2 - t1))

            logging.info('detections:')
            for i in range(nums[0]):
                logging.info('\t{}, {}, {}'.format(
                    class_names[int(classes[0][i])], np.array(scores[0][i]),
                    np.array(boxes[0][i])))

            img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums),
                               class_names)
            cv2.imwrite(FLAGS.output, img)
            logging.info('output saved to: {}'.format(FLAGS.output))
        # einzel bild detection
#-----------------------------------------------------------------------------------------------------------------------
# using the hand detection to create a list of all detected Hands

# 13.08.2021 diese schleife modifizieren um in der richtigen reihenfolge über griffe und bilder zu loopen
# -> auf überschneidung von griffen und personen boundingboxes achten -> nur dann differenzen berechnen
#print(str(allClimbers))

    olh = []  # overlapping hands
    points = 0
    holdList = "Gripped Holds:"
    overlapList = 'Overlaps:'
    if FLAGS.baseline:

        for holdID in range(
                len(holds)):  # alle hände mit allen griffen verschneiden
            logging.info('loading hold Nr.' + str(holdID))

            for index in range(
                    len(allClimbers)
            ):  # -> fläche von hand und überschneidung vergleichen
                climbImg = cv2.imread(FLAGS.numberedSource + 'PhotoNr_' +
                                      str(index) + '.jpg')

                climberPix = percToPix(allClimbers[index],
                                       cv2.imread(FLAGS.baseline))
                #print(climberPix)
                r = overlapRect(holds[holdID], climberPix)

                delay = index - FLAGS.delay  #Delay hier bearbeiten

                #print(rect_area(holds[holdID]), rect_area(r), rect_area(climberPix))

                if index == 0:
                    base = climbImg.copy()
                elif index > FLAGS.delay:  #hier auch Delay bearbeiten
                    base = cv2.imread(FLAGS.numberedSource + 'PhotoNr_' +
                                      str(delay) + '.jpg')

                if abs(rect_area(holds[holdID]) - rect_area(r)) < 10e-12 and (
                        index %
                        FLAGS.frameReduction) == 0:  # or (index % 600) == 0:
                    olh.append(overlapRect(holds[holdID], climberPix))
                    # print(climbImg)
                    if FLAGS.colorMask is True:  #WIEDER EINKOMMENTIEREN
                        #img = cv2.imread(str(climbImg))
                        base = mask_colour(base, color)
                        climbImg = mask_colour(climbImg, color)

                    allDiff, score, scorePix = compare_baseline(
                        base, climbImg, holds[holdID])
                    logging.info(
                        'overlap detected: image ' + str(index) +
                        ' and hold ' + str(holdID) + '; image similarity ' +
                        str(score) + '; Overlap percent ' +
                        str((allDiff.shape[1] * allDiff.shape[0] - scorePix) /
                            cv2.countNonZero(
                                cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))))
                    overlapList = overlapList + '\n overlap detected: image ' + str(
                        index) + 'and hold' + str(
                            holdID) + '; image similarity ' + str(score)
                    cv2.imwrite(
                        FLAGS.holdsOut + str(holdID) + '/overlapping_' +
                        str(index) + '.jpg', allDiff)
                    if score < FLAGS.similarity or ((allDiff.shape[1] * allDiff.shape[0]-scorePix)/cv2.countNonZero(cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))) > FLAGS.holdOverlap \
                            or (allDiff.shape[1] * allDiff.shape[0]-scorePix) > FLAGS.handPix:
                        holdList = holdList + " \n hold"  #(allDiff.shape[1] * allDiff.shape[0]-scorePix)/cv2.countNonZero(cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))
                        print('Size of the hold rectangle: ' +
                              str(allDiff.shape[1] * allDiff.shape[0]))
                        print('Fremdpixel: ' +
                              str(allDiff.shape[1] * allDiff.shape[0] -
                                  scorePix))
                        print('NonZero Pixel: ' + str(
                            cv2.countNonZero(
                                cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))))
                        print('Similarity: ' + str(score))
                        print('Overlap: ' + str(
                            (allDiff.shape[1] * allDiff.shape[0] - scorePix) /
                            cv2.countNonZero(
                                cv2.cvtColor(allDiff, cv2.COLOR_BGR2GRAY))))
                        cv2.imwrite(
                            FLAGS.holdsOut + str(holdID) + '/gripped_' +
                            str(index) + '.jpg', allDiff)

                        if points < holds[holdID][4]:
                            points = holds[holdID][4]
                        logging.info('progress detected: points = ' +
                                     str(points))
                        break
                else:
                    #print(allClimbers[index])

                    logging.info('CLimber Nr. ' + str(index) +
                                 ' and Grip Nr. ' + str(holdID) + 'no overlap')
        #holdID += 1
    print("Total points: " + str(points))
    # print(holdList)
    # print(overlapList)
    print('Number of detected climbers:' + str(len(allClimbers)))
    # ...
    end = timer()
    #print(end - start)

    logging.info('Elapsed time: {}'.format(
        str(datetime.timedelta(seconds=(end - start)))))
예제 #2
0
def main():

    train_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_train.record-00000-of-00001'
    valid_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_val.record-00000-of-00001'
    weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/checkpoints/yolov3.tf'
    # Path to text? file containing all classes, 1 per line
    classes = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/shapes/shapes.names'
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = 'fit'
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = 'none'
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = 416
    num_epochs = 1
    batch_size = 8
    learning_rate = 1e-3
    num_classes = 4
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = 80
    iou_threshold = 0.5

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/shapes_{}.tf'.format(
        num_epochs)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    # Training dataset
    #dataset_train = tf.data.TFRecordDataset(train_path)
    #dataset_val = tf.data.TFRecordDataset(valid_path)

    dataset_train = load_tfrecord_dataset(train_path, classes, image_size)
    dataset_train = dataset_train.shuffle(buffer_size=512)
    dataset_train = dataset_train.batch(batch_size)
    #dataset_train = dataset_train.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))
    #dataset_train = dataset_train.prefetch(
    #    buffer_size=tf.data.experimental.AUTOTUNE)

    dataset_val = load_tfrecord_dataset(valid_path, classes, image_size)
    dataset_val = dataset_val.shuffle(buffer_size=512)
    dataset_val = dataset_val.batch(batch_size)
    #dataset_val = dataset_val.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))

    # Create model in training mode
    yolo = models.YoloV3(image_size, training=True, classes=num_classes)

    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    # Which weights to start with?
    print('Loading Weights...')
    #yolo.load_weights(weights_path)

    yolo.get_layer('yolo_darknet').set_weights(
        model_pretrained.get_layer('yolo_darknet').get_weights())
    freeze_all(yolo.get_layer('yolo_darknet'))

    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks
    ]  # Passing loss as a list might sometimes fail? dict might be better?

    yolo.compile(optimizer=optimizer,
                 loss=loss,
                 run_eagerly=(mode == 'eager_fit'))
    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True),
        TensorBoard(log_dir='logs')
    ]

    history = yolo.fit(dataset_train,
                       epochs=num_epochs,
                       callbacks=callbacks,
                       validation_data=dataset_val)
    yolo.save_weights(saved_weights_path)

    # Detect/ROC
    model = YoloV3(image_size, training=False, classes=num_classes)
    model.load_weights(saved_weights_path).expect_partial()

    batch_size = 1

    val_dataset = load_tfrecord_dataset(valid_path, classes, image_size)
    val_dataset = val_dataset.batch(batch_size)

    val_dataset = val_dataset.map(
        lambda x, y: (transform_images(x, image_size),
                      transform_targets(y, anchors, anchor_masks, image_size)))

    images = []
    for img, labs in val_dataset:
        img = np.squeeze(img)
        images.append(img)

    predictions = []

    evaluator = Evaluator(iou_thresh=iou_threshold)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections (200), box coords (4))
    # scores -> (num_imgs, num_detections)
    # classes -> (num_imgs, num_detections)
    # num_detections -> num_imgs

    # Aim for labels shape (per batch): [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    # full_labels = [label for _, label in val_dataset]

    # Shape : [Num images, 3 scales, grid, grid, anchor, 6 ]

    filtered_labels = []

    for _, label in val_dataset:
        img_labels = []
        # Label has shape [3 scales x[1, grid, grid, 3, 6]]
        for scale in label:
            # Shape [1, grid, grid, 3, 6]
            scale = np.asarray(scale)
            grid = scale.shape[1]

            scale2 = np.reshape(scale, (3, grid * grid, 6))
            # Shape: [3, grix*grid, 6]

            for anchor in scale2:
                filtered_anchors = []
                for box in anchor:
                    if box[4] > 0:
                        filtered_anchors.append(np.asarray(box))
            img_labels.append(filtered_anchors)

        img_labels = np.asarray(img_labels)
        filtered_labels.append(img_labels)

    print(len(filtered_labels))
    print(len(filtered_labels[0]))
    print(len(filtered_labels[0][2]))

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # Predictions shape: [num_imgs x num_preds x[box coords(4), conf, classes]]
    # Box coords should be in format x1 y1 x2 y2

    # Labels shape: [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    evaluator(predictions, filtered_labels, images)  # Check gts box coords
    '''
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            continue

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)

        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        if "car" in class_names:
            print("car detect")
        img = cv2.putText(
            img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000),
            (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        if FLAGS.output:
            out.write(img)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
예제 #4
0
def main(_argv):
    th = threading.Thread(target=arduino_read)
    th.start()
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')
    CUDA = torch.cuda.is_available()
    times = []

    inp_dim = int('416')
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    try:
        vid = cv2.VideoCapture(0)  #cam number - usb=1     vid = cap
    except:
        vid = cv2.VideoCapture(FLAGS.video)
    # vid = cv2.VideoCapture(0)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    tracker = cv2.TrackerKCF_create()
    fps = None
    initBB = None
    # initBB = True
    redetect = False
    failCnt = 0
    global start
    check_start = None

    while True:
        ret, frame = vid.read()  #img = frame
        # if frmae is None:
        #     logging.warning("Empty Frame")
        #     time.sleep(0.1)
        #     continue
        # frame = imutils.resize(frame, width = 500)
        (H, W) = frame.shape[:2]

        img_in = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)
        boxes, scores, classes, nums = yolo.predict(img_in)

        # t1 = time.time()
        # boxes, scores, classes, nums = yolo.predict(img_in)
        # t2 = time.time()
        # times.append(t2-t1)
        # times = times[]

        # print('initBB = ', initBB)
        if initBB is not None:
            (success, box) = tracker.update(frame)
            if start == 'q':
                initBB = None
                tracker = cv2.TrackerMedianFlow_create()
                ardu_stop()
            elif success:
                failCnt = 0
                (x, y, w, h) = [int(v) for v in box]
                cv2.rectangle(frame, (x + 30, y), (x + w - 30, y + h),
                              (0, 255, 0), 2)
                ardu(box)
                # frame = draw_outputs(frame, (boxes, scores, classes, nums), class_names)
            else:
                failCnt += 1
                ardu_detect()
                if failCnt > 50:
                    redetect = True
                    initBB = None
                    tracker = cv2.TrackerKCF_create()
            # fps.update()
            # fps.stop()
            # info = [
            #     ('Tracker', 'kcf'),
            #     ('Success', 'yes' if success else 'No'),
            #     ('FPS', '{:.2f}'.format(fps.fps())),
            # ]
            #
            # for (i, (k, v)) in enumerate(info):
            #     text = '{}:{}'.format(k, v)
            #     cv2.putText(frame, text, (10, H-((i*20)+20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255, 2))

        # if redetect:
        #     ret, frame = vid.read()
        #     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        #     template = cv2.imread('./user_faces/user.jpg', 0)
        #     w, h = template.shape[::-1]
        #
        #     res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF)
        #     min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
        #     top_left = min_loc
        #     bottom_right = (top_left[0] + w, top_left[1] + h)
        #     cv2.rectangle(frame, top_left, bottom_right, (0, 255, 0), 1)
            """
            imgray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            w, h = imgray.shape[::-1]
            templ = cv2.imread('./user.jpg', cv2.IMREAD_GRAYSCALE)
            templ_h, templ_w = templ.shape[::-1]
            res = cv2.matchTemplate(imgray, templ, cv2.TM_CCOEFF_NORMED)
            loc = np.where(res >= 0.6)
            for pt in zip(*loc[::-1]):
                cv2.rectangle(frame, pt, (pt[0] + w, pt[1] + h), (0, 255, 0), 1)
            """
        key = cv2.waitKey(1) & 0xFF
        if key == ord('s') or start == 's':
            check_start = start_tracker(frame, (boxes, scores, classes, nums),
                                        class_names)
            if check_start is not None:
                start = 'a'
                initBB = tuple(check_start)
                tracker.init(frame, initBB)
                x, y, w, h = check_start
                frame_user = frame[y:y + h, x + 20:x + w - 20]
                cv2.imwrite('./user_faces/user.jpg', frame_user)
                # fps = FPS().start()

            # img, orig_im, dim = prep_image(frame, inp_dim)
            # img = prep_image(frame, inp_dim)

            # cv2.imshow('img', frame)

            # if CUDA:
            #     # im_dim = im_dim.cuda()
            #     img = img.cuda()
            #
            # with torch.no_grad():
            #     output = model(Variable(img),CUDA)
            # output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
            # try:
            #     if output[0][0].tolist() == 0:
            #         #tensor[0][0] mean the category of predicted class and 0 is person in coco.names
            #         initBB = np.array([int(i) for i in output[0][1:5].tolist])
            #         x, y, w, h = initBB
            #         initBB = (x, y, w, h)
            #         frame_user = frame[x-10:w+10, y-10:h+10]
            #         cv2.imwrite('./user_faces/user.jpg', frame_user)
            #         tracker.init(frame, initBB)
            #         fps = FPS().start()
            # except:
            #     print("다시해주세요")
        #print(initBB)
        cv2.imshow('img', frame)

        if key == ord('q'):
            ardu_stop()
            break

    vid.release()

    cv2.destroyAllWindows()
예제 #5
0
from MY_print_methods import print_class
from RAGHAV_object_tracker import object_tracker

from _collections import deque

p = print_class()
ot = object_tracker()
is_init_frame = True  # Flag is necessary to setup object tracking properly
prev_frame_objects = []
cur_frame_objects = []
font = cv2.FONT_HERSHEY_SIMPLEX  # OpenCV font for drawing text on frame

crash_flag = False

class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()]
yolo = YoloV3(classes=len(class_names))
yolo.load_weights('./weights/yolov3.tf')

max_cosine_distance = 0.5
nn_budget = None
nms_max_overlap = 0.8

model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine',
                                                   max_cosine_distance,
                                                   nn_budget)
tracker = Tracker(metric)

vid = cv2.VideoCapture('./data/video/' + name +
                       '.mp4')  # 28, 26, 30 (mTracker güzel test)
예제 #6
0
def setup_model():
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes
        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    model.compile(optimizer=optimizer,
                  loss=loss,
                  run_eagerly=(FLAGS.mode == 'eager_fit'))

    return model, optimizer, loss, anchors, anchor_masks
예제 #7
0
def main(_argv):
    ##progress bar
    #Iniating sort tracker
    mot_tracker = Sort()
    #starting yolov3 and related processes    delete_index=[]
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    yolo = YoloV3(classes=FLAGS.num_classes)
    yolo.load_weights(FLAGS.weights)
    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    vid = cv2.VideoCapture(FLAGS.data + '/raw.avi')
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
    if FLAGS.output == 'None':
        FLAGS.output = f'{FLAGS.data}/Track_output.avi'
    colours = np.random.rand(300, 3) * 255
    writer = [FLAGS.output, codec, fps]
    #out = cv2.VideoWriter(FLAGS.output, codec, fps, (800, 800))
    df_RFID_cage = rm.load_RFID(FLAGS.data + '/RFID_data_all.csv')
    #starting mouse tracker processing
    vid_length = len(df_RFID_cage)
    pbar = tqdm(total=vid_length)
    mouse_tracks = rm.mouse_tracker(tags, FLAGS.data, df_RFID_cage, writer,
                                    vid_length)
    #starting detection and sort loop
    frame_count = 0
    t1 = time.time()
    print('starting')
    while vid.isOpened():  #reading frames
        ret, img = vid.read()
        #yolo process
        if ret:
            frame_count += 1
            img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_in = tf.expand_dims(img_in, 0)
            img_in = transform_images(img_in, FLAGS.size)
            boxes, scores, classes, nums = yolo.predict(img_in)
            #print(scores)
            objects, bb_start, bb_end, probability = get_object_details(
                img, (boxes, scores, classes, nums), class_names)
            ds_boxes = []  # array to feed into sort tracker
            if nums[0] != 0:
                for i in range(nums[0]):
                    center_coords, standard_cords = get_center(
                        bb_start[i], bb_end[i])
                    standard_cords.append(round(probability[i], 4))
                    ds_boxes.append(standard_cords)
            ds_boxes_array = np.asarray(ds_boxes)
            trackers = mot_tracker.update(ds_boxes_array)
            sort_tracks = []
            for object in trackers:
                xmin, ymin, xmax, ymax, index = int(object[0]), int(
                    object[1]), int(object[2]), int(object[3]), int(object[4])
                sort_tracker = [xmin, ymin, xmax, ymax, index]
                sort_tracks.append(sort_tracker)
            sort_tracks = sorted(sort_tracks, key=lambda x: x[4])
            mouse_tracks.update(frame_count, sort_tracks)
            pbar.update(1)
        else:
            if frame_count == 0:
                print('Unable to open video, please check video path')
                break
            else:
                vid.release()
                break
    t2 = time.time()
    time_yolo_sort = t2 - t1
    print(
        f'time taken for yolo_sort on {str(frame_count)}: {str(time_yolo_sort)} seconds'
    )
    #associating RFID tag
    print('Associating RFID with Sort_ID')
    t3 = time.time()
    df_RFID_tracks, df_matchs = mouse_tracks.evaulate()
    t4 = time.time()
    RFID_matching_time = t4 - t3
    print(
        f'RFID Matching of {str(frame_count)} took {str(RFID_matching_time)} seconds'
    )
    print('Writing Video')
    vid = cv2.VideoCapture(FLAGS.data + '/raw.avi')
    frame_count = 0
    pbar = tqdm(total=len(df_RFID_cage))
    df_RFID_tracks.to_csv(f'{FLAGS.data}/RFID_tracks.csv')
    df_matchs.to_csv(f'{FLAGS.data}/RFID_matches.csv')
    t5 = time.time()
    while vid.isOpened():
        ret, img = vid.read()
        if ret:
            frame_count += 1
            edited_img = mouse_tracks.write_video(frame_count, img,
                                                  FLAGS.Video_type)
            if eval(FLAGS.showvid):
                cv2.imshow('output', edited_img)
                if cv2.waitKey(1) == ord('q'):
                    break
            else:
                pass
            pbar.update(1)
        else:
            break
    t6 = time.time()
    write_time = t6 - t5
    print('Wrting {str(frame_count)} took {str(write_time) seconds}')
    print('All processes completed')
예제 #8
0
def main(_argv):
    # Change flag values
    if FLAGS.height is None:
        FLAGS.height = FLAGS.size
    if FLAGS.width is None:
        FLAGS.width = FLAGS.size
    size = (FLAGS.height, FLAGS.width)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(size,
                           training=True,
                           classes=FLAGS.num_classes,
                           recurrent=FLAGS.recurrent)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(size,
                       training=True,
                       classes=FLAGS.num_classes,
                       recurrent=FLAGS.recurrent)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes, size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=8)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        train_dataset = train_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        val_dataset = val_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))

    # Configure the model for transfer learning
    if FLAGS.transfer != 'none':
        # if we need all weights, no need to create another model
        if FLAGS.transfer == 'all':
            model.load_weights(FLAGS.weights)

        # else, we need only some of the weights
        # create appropriate model_pretrained, load all weights and copy the ones we need
        else:
            if FLAGS.tiny:
                model_pretrained = YoloV3Tiny(size,
                                              training=True,
                                              classes=FLAGS.weights_num_classes
                                              or FLAGS.num_classes,
                                              recurrent=FLAGS.recurrent)
            else:
                model_pretrained = YoloV3(size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes,
                                          recurrent=FLAGS.recurrent)
            # load pretrained weights
            model_pretrained.load_weights(FLAGS.weights)
            # transfer darknet
            darknet = model.get_layer('yolo_darknet')
            darknet.set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # transfer 'yolo_conv_i' layer weights
            if FLAGS.transfer in [
                    'yolo_conv', 'yolo_output_conv', 'yolo_output'
            ]:
                for l in model.layers:
                    if l.name.startswith('yolo_conv'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
            # transfer 'yolo_output_i' first conv2d layer
            if FLAGS.transfer == 'yolo_output_conv':
                # transfer tiny output conv2d
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        # get and set the weights of the appropriate layers
                        model.get_layer(l.name).layers[1].set_weights(
                            model_pretrained.get_layer(
                                l.name).layers[1].get_weights())
                        # should I freeze batch_norm as well?
            # transfer 'yolo_output_i' layer weights
            if FLAGS.transfer == 'yolo_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
    # no transfer learning
    else:
        pass

    # freeze layers, if requested
    if FLAGS.freeze != 'none':
        if FLAGS.freeze == 'all':
            freeze_all(model)
        if FLAGS.freeze in [
                'yolo_darknet'
                'yolo_conv', 'yolo_output_conv', 'yolo_output'
        ]:
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        if FLAGS.freeze in ['yolo_conv', 'yolo_output_conv', 'yolo_output']:
            for l in model.layers:
                if l.name.startswith('yolo_conv'):
                    freeze_all(l)
        if FLAGS.freeze == 'yolo_output_conv':
            if FLAGS.tiny:
                # freeze the appropriate layers
                freeze_all(model.layers[4].layers[1])
                freeze_all(model.layers[5].layers[1])
            else:
                # freeze the appropriate layers
                freeze_all(model.layers[5].layers[1])
                freeze_all(model.layers[6].layers[1])
                freeze_all(model.layers[7].layers[1])
        if FLAGS.transfer == 'yolo_output':
            for l in model.layers:
                if l.name.startswith('yolo_output'):
                    freeze_all(l)
    # freeze nothing
    else:
        pass

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
예제 #9
0
def main(_argv):

    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
    except:
        tpu = None
    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy()
    #print("REPLICAS: ", strategy.num_replicas_in_sync)
    FLAGS.batch_size = FLAGS.batch_size * strategy.num_replicas_in_sync
    with strategy.scope():
        if FLAGS.tiny:
            model = YoloV3Tiny(FLAGS.size,
                               training=True,
                               classes=FLAGS.num_classes)
            anchors = yolo_tiny_anchors
            anchor_masks = yolo_tiny_anchor_masks
        else:
            model = YoloV3(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
            anchors = yolo_anchors
            anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size)
    train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    if FLAGS.optimizer == 'Adam':
        optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'nAdam':
        optimizer = tf.keras.optimizers.Nadam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(lr=FLAGS.learning_rate,
                                                rho=0.9)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        if tpu:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint(
                    'yolov3_train_{epoch}.h5',
                    save_weights_only=True,
                    verbose=1,
                    period=FLAGS.period
                )  #, monitor='val_loss', mode='min', save_best_only=True), #1000
            ]
        else:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                #EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('./checkpoints/yolov3_train_{epoch}.tf',
                                verbose=1,
                                save_weights_only=True,
                                period=FLAGS.period),  #1000
                TensorBoard(log_dir='logs')
            ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
예제 #10
0
def main(_argv):
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            '{}*.tfrecord'.format(flags.FLAGS.dataset), FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            '{}*.tfrecord'.format(flags.FLAGS.val_dataset), FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes)))

    if FLAGS.transfer != 'none':
        if FLAGS.transfer != 'continue':
            model_pretrained = YoloV3(416, training=True, classes=80)
            model_pretrained.load_weights(FLAGS.weights)
            model.get_layer('yolo_darknet').set_weights(model_pretrained.get_layer('yolo_darknet').get_weights())

        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(
                    FLAGS.size, training=True, classes=FLAGS.num_classes)
            else:
                init_model = YoloV3(
                    FLAGS.size, training=True, classes=FLAGS.num_classes)

            if FLAGS.transfer == 'continue':
                model = init_model
                model.load_weights(FLAGS.weights)
            elif FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(init_model.get_layer(
                            l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(init_model.get_layer(
                            l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            # model.save_weights(
            #     'checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      #metrics=[tf.keras.metrics.MeanIoU(num_classes=FLAGS.num_classes)]
                      )

        callbacks = [
            #ReduceLROnPlateau(verbose=1),
            #EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('/home/Malte/outputs/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True),
            #TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks)

        model.save_weights('/home/Malte/outputs/yolov3.tf')
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    # Load weights
    yolo.load_weights(FLAGS.weights).expect_partial()
    logging.info('weights loaded')

    # Load classnames
    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    if FLAGS.single:
        if FLAGS.tfrecord:
            dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes,
                                            FLAGS.size)
            if FLAGS.shuffle:
                dataset = dataset.shuffle(128)
            img_raw, _label = next(iter(dataset.take(1)))
        else:
            image = random.choice(os.listdir(FLAGS.BILDE_MAPPE))
            print(f"Image chosen: {image}")
            img_raw = tf.image.decode_image(open(
                FLAGS.BILDE_MAPPE + "/" + image, 'rb').read(),
                                            channels=3)
        # (1080, 1920, 3) --> (1, 1080, 1920, 3)
        img = tf.expand_dims(img_raw, 0)

        # Transformerer bildet til onsket size (416, 416, 3)
        img = transform_images(img, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo(img)
        t2 = time.time()
        logging.info('time: {}'.format(t2 - t1))

        logging.info('detections:')
        for i in range(nums[0]):
            logging.info(
                f'\t{class_names[int(classes[0][i])]}, {np.array(scores[0][i])}, {np.array(boxes[0][i])}'
            )

        img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        cv2.imwrite(FLAGS.output, img)
        logging.info('output saved to: {}'.format(FLAGS.output))

    else:
        if FLAGS.tfrecord:
            dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes,
                                            FLAGS.size)
            if FLAGS.shuffle:
                dataset = dataset.shuffle(512)
            dataset = dataset.as_numpy_iterator()

            times = []
            for img_raw, _label in tqdm(dataset):
                img = transform_images(img_raw, FLAGS.size)

                t1 = time.time()
                boxes, scores, classes, nums = yolo(img)
                t2 = time.time()
                times.append(t2 - t1)

            mean_times = np.mean(times)
            print(
                f"Mean detection time for a total of {len(dataset)} was {mean_times}s"
            )
예제 #12
0
파일: detect.py 프로젝트: bcrafton/sort
def main(_argv):

    load = LoadCOCO('../2DMOT2015/train/ADL-Rundle-6')

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights).expect_partial()
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    '''
    if FLAGS.tfrecord:
        dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
        dataset = dataset.shuffle(512)
        img_raw, _label = next(iter(dataset.take(1)))
    else:
        img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)

    img = tf.expand_dims(img_raw, 0)
    img = transform_images(img, FLAGS.size)
    '''

    frame = 0
    dets = []
    
    assert (not load.empty())
    while not load.empty():
    
        path, img_raw, img = load.pop()
        name = os.path.basename(path)
        print (path)
        
        # nums = total detections.
        boxes, scores, classes, nums = yolo(img)
        
        nums_np = nums.numpy()
        num = nums_np[0]
        
        frames = frame * np.ones(shape=(num, 1))
        null = -1 * np.ones(shape=(num, 1))
        boxes_np = boxes.numpy()[0][0:num].reshape(num, 4)
        scores_np = scores.numpy()[0][0:num].reshape(num, 1)
        
        # print (np.shape(boxes_np))
        boxes_np[:, 0] = boxes_np[:, 0] * 1920
        boxes_np[:, 1] = boxes_np[:, 1] * 1080
        boxes_np[:, 2] = boxes_np[:, 2] * 1920
        boxes_np[:, 3] = boxes_np[:, 3] * 1080
        boxes_np[:, 2] = boxes_np[:, 2] - boxes_np[:, 0]
        boxes_np[:, 3] = boxes_np[:, 3] - boxes_np[:, 1]
        
        '''
        if len(boxes_np):
            print (boxes_np[0])
        '''
        
        det = np.concatenate((frames, null, boxes_np, scores_np, null, null, null), axis=1)
        dets.append(det)

        '''
        logging.info('detections:')
        for i in range(nums[0]):
            logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i])))
        '''

        img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        cv2.imwrite('images/' + name, img)
        
        frame = frame + 1

    #############################
    
    dets = np.concatenate(dets, axis=0)
    # print (np.shape(dets))
    
    # np.save('dets', dets)
    # np.savetxt("yolo-det.txt", dets, delimiter=",")
    np.savetxt("yolo-det.txt", dets, fmt='%d, %d, %0.2f, %0.2f, %0.2f, %0.2f, %0.2f, %d, %d, %d', delimiter=",")
예제 #13
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        #원래 코드
        #vid = cv2.VideoCapture(int(FLAGS.video))
        #다음 팟플레이어
        #vid = cv2.VideoCapture('rtsp://172.20.10.4:8554/test')
        vid = cv2.VideoCapture('rtsp://192.168.0.28:8554/test')

        #연결x
        #os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'protocol_whitelist;file,rtp,udp'
        #vid = cv2.VideoCapture('C:/Users/Jiwon/Desktop/yolov3_deepsort-master/stream.sdp')
        #vid = cv2.VideoCapture(
        #'udpsrc port=8400 caps=application/x-rtp,media=(string)video,clock-rate=(int)9000,encoding-name=(string)H264,payload=(int)96!rtph264depay!decodebin!videoconvert!appsink',
        #cv2.CAP_GSTREAMER)
        #vid = cv2.VideoCapture("rtspsrc location=rtsp://192.168.0.25/main latency=30 ! decodebin ! nvvidconv ! appsink")
        #vid = cv2.VideoCapture('udp://@:5000')
        #vid =  cv2.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtph264depay ! avdec_h264 ! appsink', cv2.CAP_GSTREAMER)
        #vid = cv2.VideoCapture(1)

    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of qint
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1
    #확인을 위한 코드
    f_cnt = 0
    redetect = False

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)

        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            if class_name == "person":
                if int(track.track_id) == 1:
                    cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
                    cv2.rectangle(
                        img, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), (0, 255, 0), -1)
                    cv2.putText(img, class_name + "-" + str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (0, 0, 0), 2)

                    #if(MQTT초기 확인 값이면 저장)k 여기 코드 수정하셈
                    #img_user = img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])]
                    img_user = img[int(bbox[1]):int(bbox[1]) + int(bbox[3]),
                                   int(bbox[0]):int(bbox[0]) + int(bbox[2]) -
                                   10]
                    cv2.imwrite(
                        'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png',
                        img_user)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        f_cnt += 1
        print("False")
        if f_cnt > 10:
            redetect = True
            f_cnt = 0
        #"""
        if redetect:  # https://opencv-python.readthedocs.io/en/latest/doc/24.imageTemplateMatch/imageTemplateMatch.html
            _, img = vid.read()
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            template = cv2.imread(
                'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png',
                0)
            w, h = template.shape[::1]  #template 이미지의 가로와 세로

            res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            top_left = min_loc
            bottom_right = (top_left[0] + w, top_left[1] + h)
            cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 1)
            print("TRUE")
        #   """
        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
예제 #14
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        # 设置仅在需要时申请显存空间
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    # 判断训练tiny版本的YOLO还是完整版的YOLO
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 如果未指定数据集则加载一张图片作为数据集=>fake_dataset
    train_dataset = dataset.load_fake_dataset()

    # 判断数据集路径是否为空
    if FLAGS.dataset:
        # 从TFRecode文件加载数据集 train_dataset:(x_train, y_train)
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    # 生成批训练数据
    # 打乱数据顺序
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # y.shape:train_dataset.as_numpy_iterator().next()[1].shape
    # =>(batch_size, yolo_max_boxes, 5) 5=>(xmin, ymin, xmax, ymax, classlabel)
    train_dataset = train_dataset.map(lambda x, y: (
        # 图像数据归一化[0,1]
        dataset.transform_images(x, FLAGS.size),
        # 根据先验框anchor确定bbox属于哪一层特征图(13*13, 26*26, 52*52)
        # 并计算出bbox的中心点在特征图上的位置
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 数据预读取,提高延迟和吞吐量
    # tf.data.experimental.AUTOTUNE:根据可用CPU动态设置并行调用的数量
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 加载伪验证集,防止没有添加验证集路径时报错
    val_dataset = dataset.load_fake_dataset()
    # 加载验证集
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    # 训练模式选择
    # 随机初始化权重,从0开始训练整个网络
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    # 迁移训练的两种方式
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            # 模型网络结构
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        # 加载预训练权重
        model_pretrained.load_weights(FLAGS.weights)

        # 设置darknet网络权重并冻结网络,即主干网络不参与训练,其余参数随机初始化
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 设置YOLO输出层以外的网络的权重并冻结, 即仅训练YOLO的输出层且参数随机初始化
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    # 迁移学习fine_tune和frozen模式要求训练的类别数和预训练权重一致(80类)
    else:
        # All other transfer require matching classes
        # 加载网络所有预训练权重参数
        model.load_weights(FLAGS.weights)
        # 冻结darknet(骨干网络)权重, 其余参数在预训练权重的基础上训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 冻结所有参数,训练不起作用.
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 定义优化器:Adam
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # 调试模型:速度慢:  Eager: op 在调用后会立即运行
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 训练集上的平均loss/验证集上的平均loss
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
        # 迭代每个epoch
        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 梯度带:自动计算变量梯度
                with tf.GradientTape() as tape:
                    # model(): eager模式下选择此方式,不需要编译直接运行, 速度快.
                    # model.predict()第一次运行时需要先编译图模式
                    outputs = model(images, training=True)
                    # 计算张量各维度的元素之和.
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # 梯度
                grads = tape.gradient(total_loss, model.trainable_variables)
                # 执行最优化器
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # 记录日志文件
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                # 更新平均loss
                avg_loss.update_state(total_loss)

            # 在验证集上验证
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            # .result():返回累计结果
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))
            # reset_states:清除累计值
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            # 每个epoch保存一次模型权重
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 训练模式
    else:
        # 编译模型
        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=['accuracy'],
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        # 回调函数
        callbacks = [
            # lr衰减
            ReduceLROnPlateau(verbose=1),
            # lr不变时停止训练
            EarlyStopping(patience=3, verbose=1),
            # 保存模型
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            # 训练结果可视化
            TensorBoard(log_dir='logs', write_images=True, update_freq='batch')
        ]
        # 进行迭代训练
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
예제 #15
0
def nayanam(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    yolo = YoloV3(classes=80)

    yolo.load_weights(PATH_TO_WEIGHTS)
    print('weights loaded')

    class_names = [c.strip() for c in open(PATH_TO_CLASSES).readlines()]
    print('classes loaded')

    out = None
    fps = 0.0
    count = 0

    vid = cv2.VideoCapture(RTSP_URL)
    while (vid.isOpened()):
        try:
            _, img = vid.read()

        except:
            print("Empty frame")
            continue
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []

        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])

        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
        # running NMS
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        # Deepsort tracker called here
        tracker.predict()
        tracker.update(detections)
        #dump file set here
        # file = open(PATH_TO_RESULTS,'a+')
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            if VIDEO_DEBUG == 1:
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            s = str(track.track_id) + ',' + class_name + ',' + str(int(
                bbox[0])) + ',' + str(int(bbox[1])) + '\n'
            # file.write(s)
            print(s)
        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps = ", fps)
        # file.close()
        if VIDEO_DEBUG == 1:
            cv2.imshow('output', img)
            if cv2.waitKey(1) == 27:
                break
        signal.signal(signal.SIGINT, user_exit)
    vid.release()
    if VIDEO_DEBUG == 1:
        cv2.destroyAllWindows()
예제 #16
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    segment_frame_num = 0
    segment_id = 0
    segment_frame = []
    num_of_obeject = []
    segment_scores = []
    segment_nums = []
    key_frame = []
    image_id = 0
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('./data/video/town_summary.avi', fourcc, 20.0,
                          (240, 120))

    while True:
        if segment_frame_num == 20:
            key_num = np.argmax(segment_nums)
            print(key_num)
            #cv2.imwrite("./data/images/{}.jpg".format(image_id),segment_frame[key_num])
            out.write(segment_frame[key_num])
            key_frame.append(segment_frame[key_num])
            segment_scores = []
            segment_frame = []
            segment_nums = []
            segment_frame_num = 0
            image_id += 1

        _, img = vid.read()
        img = cv2.resize(img, (640, 480))
        segment_frame.append(img)
        segment_frame_num += 1

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        segment_scores.append(scores)
        segment_nums.append(nums)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        img = cv2.putText(
            img, "Time: {:.2f}ms,num :{}".format(
                sum(times) / len(times) * 1000, nums), (0, 30),
            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        if FLAGS.output:
            out.write(img)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    fps = 0.0
    count = 0

    ser = sl.Serial("COM3", 57600)

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count+=1
            if count < 3:
                continue
            else: 
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        fps  = ( fps + (1./(time.time()-t1)) ) / 2

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)

        for i in range(nums[0]):
            #TEST THAT ONLY SENDS DETECTIONS THAT ARE PEOPLE (ANDREW AND JASONS CODE) 
            if(classes[0][i] == 0 and scores[0][i] >= 0.90):
                angles, distances = get_angles_and_distances(img, boxes[0], i)
                #IF THEY ARE WITH A 10px SQUARE GO AHEAD AND SHOOT
                #IF WE WERE MOVING THE CAMERA/TURRET WE WOULD JUST SEND THE DISTANCES TO THE ARDUINO
                #AND LET IT FIGURE OUT WHAT TO DO
                if((distances[0] <= 10 and distances[0] >= -10) and (distances[1] <= 10 and distances[1] >= -10)):
                    ser.write("SHOOT".encode())
                    print(ser.readline())
                print(angles)
                print(distances)
                print("\n")

        img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        
        if FLAGS.output:
            out.write(img)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
예제 #18
0
def main(_argv):
    global detect_strat_frag

    # 2値化画像保存先フォルダのファイルの削除
    if len(glob.glob(ostu_file_path + "*.png")) > 0:
        for p in glob.glob(ostu_file_path + "*.png", recursive=True):
            if os.path.isfile(p):
                os.remove(p)
        print("2値化画像を削除しました")
    else:
        print("2値化画像はありません")

    # 推定済画像保存先フォルダのファイルの削除
    if len(glob.glob(output_file_path + "*.png")) > 0:
        for p in glob.glob(output_file_path + "*.png", recursive=True):
            if os.path.isfile(p):
                os.remove(p)
        print("推定済画像を削除しました")
    else:
        print("推定済画像はありません")

    # プログラムの実行開始時間
    calculate_start_time = time.time()

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
            print('{} memory growth: {}'.format(
                device, tf.config.experimental.get_memory_growth(device)))
    else:
        print("Not enough GPU hardware devices available")

    logging.info('YoloV3Tiny loading...')

    if flags_tiny:
        yolo = YoloV3Tiny(classes=num_classes)
    else:
        yolo = YoloV3(classes=num_classes)

    logging.info('YoloV3Tiny loaded')

    calculate_finish_time = time.time()
    print("YoloV3Tinyロード時間 (開始 <--> YoloV3Tinyロード):" +
          str(calculate_finish_time - calculate_start_time) + "秒" + "\n")

    yolo.load_weights(weights_path).expect_partial()
    logging.info('weights loaded')

    calculate_finish_time = time.time()
    print("学習モデルロード時間 (開始 <--> モデルロード):" +
          str(calculate_finish_time - calculate_start_time) + "秒" + "\n")

    class_names = [c.strip() for c in open(classes_path).readlines()]
    logging.info('classes loaded')

    calculate_finish_time = time.time()
    print("クラスファイルロード時間 (開始 <--> ファイルロード):" +
          str(calculate_finish_time - calculate_start_time) + "秒" + "\n")

    # 2値化画像保存先フォルダのファイルの数を読み込む
    dir_num = sum(
        os.path.isfile(os.path.join(ostu_file_path, name))
        for name in os.listdir(ostu_file_path))

    # 2値化画像保存先フォルダのファイルのパスを読み込む
    image_path = glob.glob(ostu_file_path + "*.png")

    # 自然順ソート
    image = [None] * dir_num
    count = 0
    count_temp = 0
    detect_time_temp = 0
    detect_time = 0
    for path in natsorted(image_path):
        image[count] = path
        # print(image[count])
        count += 1
    count = 0

    # humanクラスの撮影回数
    human_frame_sum = 0

    # 推定の合計回数
    detect_count = 0

    # 推定結果保存用
    detect_class = [None] * dir_num * 2

    # 推定監視フラグがTrueになるまでループする
    while True:
        if detect_strat_frag == True:
            print("----------------推定開始----------------")
            break

    for i in range(dir_num):
        logging.info('input: {}'.format(image[i]))
        img_raw = tf.image.decode_image(open(image[i], 'rb').read(),
                                        channels=3)

        img = tf.expand_dims(img_raw, 0)
        img = transform_images(img, resize)

        # 画像1枚あたりの推定時間
        t1 = time.time()
        boxes, scores, classes, nums = yolo(img)
        t2 = time.time()
        logging.info('time: {}'.format(t2 - t1))
        detect_time_temp = t2 - t1
        detect_time = detect_time_temp + detect_time

        # 推定結果
        logging.info('detections:')
        for i in range(nums[0]):
            logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                               np.array(scores[0][i]),
                                               np.array(boxes[0][i])))
            # 多数決用の配列
            detect_class[count] = class_names[int(classes[0][i])]

            # 人間クラスと認識されたときに人間クラス撮影画像数を加算
            if 'human' in detect_class[count]:
                detect_count += 1
                human_temp = max(list(range(nums[0]))) + 1
                human_frame_sum = 1 / human_temp + human_frame_sum

            count += 1

        # 画像保存
        img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        output = '/home/pi/yolov3-tf2-master/output_image/' + 'detect_{}.png'.format(
            count_temp)
        count_temp += 1
        cv2.imwrite(output, img)
        logging.info('output saved to: {}'.format(output) + '\n')

    print("----------------推定終了----------------")

    print("画像1枚あたりに要した平均時間:" + str(detect_time / dir_num) + "秒" + "\n")

    # プログラムの実行終了時間
    calculate_finish_time = time.time()
    print("物体検出に要した合計時間:" + str(calculate_finish_time - calculate_start_time) +
          "秒" + "\n")
    print("推定した合計枚数 : " + str(dir_num) + "\n")

    # 検出されたクラスを数えるため
    human_front_num = 0
    human_right_num = 0
    human_left_num = 0
    none_class_num = 0
    detect_dict = {
        'human_front': 0,
        'human_right': 0,
        'human_left': 0,
        'none_class': 0
    }

    # humanクラスの合計
    human_class_sum = 0

    # 検出されたクラスを数える
    for i in range(dir_num):
        if detect_class[i] == 'human_front':
            human_front_num += 1
            human_class_sum += 1
        elif detect_class[i] == 'human_right':
            human_right_num += 1
            human_class_sum += 1
        elif detect_class[i] == 'human_left':
            human_left_num += 1
            human_class_sum += 1
        else:
            none_class_num += 1

    detect_dict['human_front'] = human_front_num
    detect_dict['human_right'] = human_right_num
    detect_dict['human_left'] = human_left_num
    detect_dict['none_class'] = none_class_num
    print("推定された全てのクラス : " + str(detect_dict))

    # 上位3クラスを選択
    detect_res = []
    detect_res = sorted(detect_dict.items(), key=lambda x: x[1],
                        reverse=True)[:3]
    print("上位3クラス : " + str(detect_res) + "タイプ: " + str(type(detect_res)))
    print("最上位クラス : " + str(detect_res[0][0]) + "タイプ: " +
          str(type(detect_res[0])))

    # 平均人数を計算
    if human_class_sum == 0 or human_frame_sum == 0:
        class_avg = 0
        # print("人間クラスは無い")
    else:
        class_avg = human_class_sum / human_frame_sum
    print("人間クラスと認識された画像内に存在する平均人数 : " + str((round((class_avg), 1))))

    # サーバへセンサデータを送信 (WiFi経由)
    send_data = {}
    send_data['id'] = device_id
    send_data['class'] = detect_res[0][0]
    send_data['avg'] = class_avg
    send_data['date'] = time.time() * 1000
    upload_func(send_data)
예제 #19
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    check_weighs_exist(tiny=FLAGS.tiny)

    if FLAGS.tiny:
        model = YoloV3Tiny(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3Tiny-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # Load the dataset
    train_dataset = dataset.load_fake_dataset()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            file_pattern=FLAGS.dataset,
            class_file=FLAGS.classes,
            size=FLAGS.size
        )
    # Shuffle the dataset
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size, reshuffle_each_iteration=True)
    train_dataset_length = [i for i, _ in enumerate(train_dataset)][-1] + 1
    print(f"Dataset for training consists of {train_dataset_length} images.")

    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                    dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)

    val_dataset_length = [i for i, _ in enumerate(val_dataset)][-1] + 1
    print(f"Dataset for validation consists of {val_dataset_length} images.")
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes
        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format_tiny)

        else:
            model_pretrained = YoloV3(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format)

        if FLAGS.transfer == 'darknet':
            # Set yolo darknet layer weights to the loaded pretrained model weights
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # Freeze these layers
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for i in model.layers:
                if not i.name.startswith('yolo_output'):
                    i.set_weights(model_pretrained.get_layer(
                        i.name).get_weights())
                    freeze_all(i)

    else:
        # All other transfer require matching classes
        if FLAGS.tiny:
            model.load_weights(FLAGS.weights_tf_format_tiny)
        else:
            model.load_weights(FLAGS.weights_tf_format)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # Use the Adam optimizer with the specified learning rate
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    # YoloLoss function
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        print(f"Mode is: {FLAGS.mode}")
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)

                    regularization_loss = tf.reduce_sum(model.losses)

                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

                logging.info(f"epoch_{epoch}_train_batch_{batch},"
                             f"{total_loss.numpy()},"
                             f"{list(map(lambda x: np.sum(x.numpy()), pred_loss))}")
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch,
                    batch,
                    total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()),
                             pred_loss)))
                )
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf')
    else:
        print(f"Compiling the model")
        model.compile(
            optimizer=optimizer,
            loss=loss,
            run_eagerly=(FLAGS.mode == 'eager_fit'),
            metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss',
                      patience=125,
                      verbose=1),
        ReduceLROnPlateau(monitor='val_loss',
                          verbose=1,
                          factor=0.90,
                          min_lr=0,
                          patience=20,
                          mode="auto"),
        ModelCheckpoint(
            str(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf'),
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
            mode="auto",
        ),
        TensorBoard(log_dir='logs'),
        CSVLogger(f'checkpoints/logs/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}',
                  separator=',')
    ]
    history = model.fit(train_dataset,
                        epochs=FLAGS.epochs,
                        verbose=2,
                        callbacks=callbacks,
                        validation_data=val_dataset,
                        steps_per_epoch=np.ceil(train_dataset_length / FLAGS.batch_size),
                        validation_steps=np.ceil(val_dataset_length / FLAGS.batch_size))
예제 #20
0
def main(args):

    image_size = 416  # 416
    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = 1e-3
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes = args.classes

    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    val_dataset = dataset.load_tfrecord_dataset(valid_path, classes,
                                                image_size)
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, image_size),
        dataset.transform_targets(y, anchors, anchor_masks, image_size)))

    model = YoloV3(image_size, training=True, classes=num_classes)
    # Darknet transfer is a special case that works
    # with incompatible number of classes
    # reset top layers
    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    if transfer == 'darknet':
        model.get_layer('yolo_darknet').set_weights(
            model_pretrained.get_layer('yolo_darknet').get_weights())
        freeze_all(model.get_layer('yolo_darknet'))

    predictions = []

    evaluator = Evaluator(iou_thresh=args.iou)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections, box coords)

    # Full labels shape -> [num_batches, grid scale, imgs]
    # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
    full_labels = np.asarray([label for _, label in val_dataset])

    # Shape -> [num_batches, num_imgs_in_batch, 3]
    # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
    full_labels_trans = full_labels.transpose(0, 2, 1)

    full_labels_flat = []

    for batch in full_labels_trans:
        for img in batch:
            row = []
            for scale in img:
                row.append(scale)
            full_labels_flat.append(row)

    # Shape -> [num_imgs x 3]
    full_labels_flat = np.asarray(full_labels_flat)

    # Remove any labels consisting of all 0's
    filt_labels = []
    # for img in range(len(full_labels_flat)):
    for img in full_labels_flat:
        test = []
        # for scale in full_labels_flat[img]:
        for scale in img:
            lab_list = []
            for g1 in scale:
                for g2 in g1:
                    for anchor in g2:
                        if anchor[0] > 0:
                            temp = [
                                anchor[0] * image_size, anchor[1] * image_size,
                                anchor[2] * image_size, anchor[3] * image_size,
                                anchor[4], anchor[5]
                            ]
                            temp = [float(x) for x in temp]
                            lab_list.append(np.asarray(temp))
            test.append(np.asarray(lab_list))
        filt_labels.append(np.asarray(test))
    filt_labels = np.asarray(
        filt_labels
    )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
    # filt_labels = filt_labels[:, :4] * image_size

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
    # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
    # Box coords should be in format x1 y1 x2 y2

    evaluator(predictions, filt_labels, images)  # Check gts box coords

    confidence_thresholds = np.linspace(0.1, 1, 15)
    confidence_thresholds = [0.5]
    all_tp_rates = []
    all_fp_rates = []

    # Compute ROCs for above range of thresholds
    # Compute one for each class vs. the other classes
    for index, conf in enumerate(confidence_thresholds):
        tp_of_img = []
        fp_of_img = []
        all_classes = []

        tp_rates = {}
        fp_rates = {}

        boxes, scores, classes, num_detections = model.predict(val_dataset)

        # Full labels shape -> [num_batches, grid scale, imgs]
        # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
        full_labels = np.asarray([label for _, label in val_dataset])

        # Shape -> [num_batches, num_imgs_in_batch, 3]
        # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
        full_labels_trans = full_labels.transpose(0, 2, 1)

        full_labels_flat = []

        for batch in full_labels_trans:
            for img in batch:
                row = []
                for scale in img:
                    row.append(scale)
                full_labels_flat.append(row)

        # Shape -> [num_imgs x 3]
        full_labels_flat = np.asarray(full_labels_flat)

        # Remove any labels consisting of all 0's
        filt_labels = []
        # for img in range(len(full_labels_flat)):
        for img in full_labels_flat:
            test = []
            # for scale in full_labels_flat[img]:
            for scale in img:
                lab_list = []
                for g1 in scale:
                    for g2 in g1:
                        for anchor in g2:
                            if anchor[0] > 0:
                                temp = [
                                    anchor[0] * image_size,
                                    anchor[1] * image_size,
                                    anchor[2] * image_size,
                                    anchor[3] * image_size, anchor[4],
                                    anchor[5]
                                ]
                                temp = [float(x) for x in temp]
                                lab_list.append(np.asarray(temp))
                test.append(np.asarray(lab_list))
            filt_labels.append(np.asarray(test))
        filt_labels = np.asarray(
            filt_labels
        )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
        # filt_labels = filt_labels[:, :4] * image_size

        # i is the num_images index
        # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(
                        np.hstack([
                            boxes[img][sc] * image_size, scores[img][sc],
                            classes[img][sc]
                        ]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(
            predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filt_labels, images)  # Check gts box coords

        classes = list(set(r['class_ids']))  # All unique class ids
        for c in classes:
            if c not in all_classes:
                all_classes.append(c)
        complete_classes = dataset_val.class_ids[1:]
        # Need TPR and FPR rates for each class versus the other classes
        # Recall == TPR
        tpr = utils.compute_ap_indiv_class(gt_bbox, gt_class_id, gt_mask,
                                           r["rois"], r["class_ids"],
                                           r["scores"], r['masks'],
                                           complete_classes)
        total_fpr = utils.compute_fpr_indiv_class(gt_bbox, gt_class_id,
                                                  gt_mask, r["rois"],
                                                  r["class_ids"], r["scores"],
                                                  r['masks'], complete_classes)
        # print(f'For Image: TPR: {tpr} -- FPR: {total_fpr}')
        tp_of_img.append(tpr)
        fp_of_img.append(total_fpr)

        all_classes = dataset_val.class_ids[1:]

        # Need to get average TPR and FPR for number of images used
        for c in all_classes:
            tp_s = 0
            for item in tp_of_img:
                if c in item.keys():
                    tp_s += item[c]
                else:
                    tp_s += 0

            tp_rates[c] = tp_s / len(image_ids)
            # tp_rates[c] = tp_s

        # print(tp_rates)

        for c in all_classes:
            fp_s = 0
            for item in fp_of_img:
                if c in item.keys():
                    fp_s += item[c]
                else:
                    fp_s += 0
            fp_rates[c] = fp_s / len(image_ids)
            # fp_rates[c] = fp_s

        all_fp_rates.append(fp_rates)
        all_tp_rates.append(tp_rates)

    print(f'TP Rates: {all_tp_rates}')
    print(f'FP Rates: {all_fp_rates}')
예제 #21
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    inputIndex = 0
    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 20, 30)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if class_name == "Person" or class_name == "person":
                inputIndex += 1
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                #######
                im = img[int(int(bbox[1])):int(int(bbox[3])),
                         int(int(bbox[0])):int(int(bbox[2]))]
                #######
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                ##############
                #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Cropped"+str(inputIndex)+".png", im)
                color = ('b', 'g', 'r')
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            ##  for channel, col in enumerate(color):  # for histogram
            ##  ax1 = plt.subplot(1, 1, 1)
            #ax2 = plt.subplot(1, 2, 2)
            ##  histr = cv2.calcHist([im], [channel], None, [256], [0, 256])
            #plt.plot(histr, color=col)
            #plt.plot(histr)
            #plt.xlim([0, 256])
            #plt.title('Histogram for color scale picture')
            ##  plt.axis('off')
            ##  ax1.imshow(im)
            #ax2.plot(histr)

            ##    plt.savefig("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\APlot"+str(inputIndex)+".png");
            #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Final"+str(inputIndex)+".png", im)
            ###################

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
예제 #22
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights).expect_partial()
    print('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    print('classes loaded')

    if FLAGS.tfrecord:
        dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes,
                                        FLAGS.size)
        dataset = dataset.shuffle(512)
        img_raw, _label = next(iter(dataset.take(1)))
    else:
        raw_images = []
        images = FLAGS.images
        for image in images:
            img_raw = tf.image.decode_image(open(image, 'rb').read(),
                                            channels=3)
            height = img_raw.shape[0]
            width = img_raw.shape[1]
            raw_images.append(img_raw)
    num = 0
    print("raw image :", raw_images)
    for raw_img in raw_images:
        num += 1
        img_in = tf.expand_dims(raw_img, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo(img_in)
        t2 = time.time()
        logging.info('time: {}'.format(t2 - t1))

        print('detections:')
        tot = 0
        for i in range(nums[0]):
            if (class_names[int(classes[0][i])] == 'person'):
                tot += 1
                print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                            np.array(scores[0][i]),
                                            np.array(boxes[0][i])))

        #identity only persons
        ind = np.where(classes[0] == 0)[0]
        # print(ind)

        #identify bounding box of only persons
        boxes1 = np.array(boxes)
        person = boxes1[0][ind]

        #total no. of persons
        num = len(person)

        img = cv2.imread('img.png')

        midpoints = [
            mid_point(img, person, i, height, width) for i in range(tot)
        ]

        heights_of_people = [
            height_dist(img, person, i, height, width) for i in range(tot)
        ]

        print("\n\nHeights :", heights_of_people)
        print("Avg height : ", )

        if (len(heights_of_people) != 0):
            avg = sum(heights_of_people) / len(heights_of_people)

        print("\n\nMidpoints:", midpoints)
        dist = compute_distance(midpoints, tot)

        print("\n\ndistance : ", dist)

        if avg >= 100:
            avg = avg * 0.85
        thresh = avg

        p1, p2, d = find_closest(dist, tot, thresh)

        for i in range(len(p1)):
            cv2.line(img, midpoints[p1[i]], midpoints[p2[i]], (88, 43, 237), 2)

        img, count = change_2_red(img, person, p1, p2, height, width)

        df = pd.DataFrame({"p1": p1, "p2": p2, "dist": d})
        print(df)

        total_interaction = int((tot * (tot - 1)) / 2)
        faulty_interaction = len(p1)
        sd_index = (faulty_interaction / total_interaction) * 100

        print(sd_index)

        overlay = img.copy()
        output = img.copy()

        img = cv2.rectangle(overlay, (0, 0),
                            (0 + (len("Not following : 100")) * 17, 80),
                            (0, 0, 0), -1)

        img = cv2.putText(img, "Total People : {:.0f}".format(tot), (0, 30),
                          cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255), 1)

        # img = cv2.putText(img, "Following : {:.0f}".format(tot-count), (0 , 60),
        #                   cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 1)

        img = cv2.putText(img, "Not Following : {:.0f}".format(count), (0, 60),
                          cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255), 1)

        alpha = 0.5

        cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)

        cv2.imshow('output', output)

        key = cv2.waitKey(20000)
        if key == 27:  #if ESC is pressed, exit loop
            cv2.destroyAllWindows()

        # cv2.imwrite(FLAGS.output + 'detection_avg_changing' + '.jpg', img)

        print("height : ", height)
        print("width : ", width)
예제 #23
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            FLAGS.dataset, FLAGS.classes, FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(
                        l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        print('yes')
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(
                'checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
예제 #24
0
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import tensorflow as tf
from yolov3_tf2.models import (YoloV3, YoloV3Tiny)
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import draw_outputs
import subprocess as sp
import numpy

yolo = YoloV3()

yolo.load_weights("./checkpoints/yolov3.tf")
logging.info('weights loaded')

class_names = [c.strip() for c in open("./data/coco.names").readlines()]
logging.info('classes loaded')

times = []

# Added FFMPEG stuff

FFMPEG_BIN = "ffmpeg"
command = [
    FFMPEG_BIN,
    '-i',
    'fifo264',  # fifo is the named pipe
    '-pix_fmt',
    'bgr24',  # opencv requires bgr24 pixel format.
    '-vcodec',
def main(_argv):
    # Definition of the parameters
    rospy.init_node('tracker', anonymous=True)
    rospy.Subscriber("/new_image_raw", Image, callback)
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    #vid = cv2.VideoCapture(0)
    # vid = cv_image

    #cv_image = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = 720
        height = 862
        fps = 2
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0

    while True:
        #_, img = vid.read()

        img = cv_image

        #cv2.imshow("loading image", cv_image)
        #image is comming over here fine

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        print("shape = {} , ".format(img.shape))
        print("dtype = {} , ".format(img.dtype))
        #img = np.array(img, dtype=np.uint16)
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in, steps=1)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        #cv2.imshow("output", img.astype('float32'))
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    # vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()

    cv2.destroyAllWindows()
예제 #26
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    post_process_outputs = post_process_block(model.outputs,
                                              classes=FLAGS.num_classes)
    post_process_model = Model(model.inputs, post_process_outputs)

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y:
                                  (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
    # model.outputs shape: [[N, 13, 13, 3, 85], [N, 26, 26, 3, 85], [N, 52, 52, 3, 85]]
    # labels shape: ([N, 13, 13, 3, 6], [N, 26, 26, 3, 6], [N, 52, 52, 3, 6])
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    transf_labels = dataset.transform_targets(
                        labels, anchors, anchor_masks, FLAGS.size)
                    for output, label, loss_fn in zip(outputs, transf_labels,
                                                      loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss,
                                               axis=None) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_loss.update_state(total_loss)

                if batch >= 100:
                    break

            true_pos_total = np.zeros(FLAGS.num_classes)
            false_pos_total = np.zeros(FLAGS.num_classes)
            n_pos_total = np.zeros(FLAGS.num_classes)
            for batch, (images, labels) in enumerate(val_dataset):
                # get losses
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                transf_labels = dataset.transform_targets(
                    labels, anchors, anchor_masks, FLAGS.size)
                for output, label, loss_fn in zip(outputs, transf_labels,
                                                  loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_val_loss.update_state(total_loss)

                # get true positives, false positives, and positive labels
                preds = post_process_model(images)
                true_pos, false_pos, n_pos = batch_true_false_positives(
                    preds.numpy(), labels.numpy(), FLAGS.num_classes)
                true_pos_total += true_pos
                false_pos_total += false_pos
                n_pos_total += n_pos

                if batch >= 20:
                    break

            # precision-recall by class
            precision, recall = batch_precision_recall(true_pos_total,
                                                       false_pos_total,
                                                       n_pos_total)
            for c in range(FLAGS.num_classes):
                print('Class {} - Prec: {}, Rec: {}'.format(
                    c, precision[c], recall[c]))
            # total precision-recall
            print('Total - Prec: {}, Rec: {}'.format(
                calc_precision(np.sum(true_pos_total),
                               np.sum(false_pos_total)),
                calc_recall(np.sum(true_pos_total), np.sum(n_pos_total))))
            import pdb
            pdb.set_trace()

            # log losses
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # reset loop and save weights
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(
                os.path.join(FLAGS.checkpoint_dir, 'yolov3_train_{}.tf'\
                    .format(epoch)))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint(os.path.join(FLAGS.checkpoint_dir,
                                         'yolov3_train_{epoch}.tf'),
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir=FLAGS.log_dir)
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
예제 #27
0
classes_path = './data/labels/coco.names'
weights_path = './weights/yolov3.tf'
tiny = False                    # set to True if using a Yolov3 Tiny model
size = 416                      # size images are resized to for model
output_path = 'static/detections/'   # path to output folder where images with detections are saved
num_classes = 80                # number of classes in model

# load in weights and classes
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
	tf.config.experimental.set_memory_growth(physical_devices[0], True)

if tiny:
	yolo = YoloV3Tiny(classes=num_classes)
else:
	yolo = YoloV3(classes=num_classes)

yolo.load_weights(weights_path).expect_partial()
print('weights loaded')

class_names = [c.strip() for c in open(classes_path).readlines()]
print('classes loaded')

APP_ROOT =os.path.dirname(os.path.abspath(__file__))
upload = os.getcwd() + '/uploads/'

app = Flask(__name__)
dropzone = Dropzone(app)

video_camera = None
global_frame = None
예제 #28
0
def main(_argv):
    if FLAGS.tiny:
        yolo = YoloV3Tiny()
    else:
        yolo = YoloV3()

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    # Connect to the broker
    broker = "ampq://*****:*****@10.10.2.1:30000"
    channel = Channel(broker)

    # Subscribe to the desired topic
    subscription = Subscription(channel)
    camera_id = "CameraGateway." + FLAGS.camera + ".Frame"
    subscription.subscribe(topic=camera_id)

    #fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    #fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out = cv2.VideoWriter(FLAGS.output, fourcc, 5.0, (1288, 728))
    for i in range(FLAGS.nframes):

        msg = channel.consume()
        img = msg.unpack(Image)
        img = get_np_image(img)
        img_to_draw = img

        #img = tf.image.decode_image(img, channels=3)
        img = tf.expand_dims(img, 0)
        img = transform_images(img, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]

        for i in range(nums[0]):
            logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                               np.array(scores[0][i]),
                                               np.array(boxes[0][i])))
        rects = get_rects(img_to_draw, (boxes, scores, classes, nums))

        img_to_draw = draw_outputs(img_to_draw, (boxes, scores, classes, nums),
                                   class_names)

        objects = centroidTracker.update(rects)

        # loop over the tracked objects
        for (objectID, centroid) in objects.items():
            # draw both the ID of the object and the centroid of the
            # object on the output frame
            text = "{}".format(objectID)
            cv2.putText(img_to_draw, text, (centroid[0], centroid[1]),
                        cv2.FONT_HERSHEY_COMPLEX, 1, (0, 240, 0), 4)
            #cv2.circle(frame, (centroid[0], centroid[1]), 3, (0, 255, 0), -1)

        out.write(img_to_draw)

    out.release()
예제 #29
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    yolo = YoloV3(classes=total_number_of_logos) # number of classes/logos, needs to be updated if another logo is added
    yolo.load_weights('./weights/yolov3-custom.tf').expect_partial() # file path to weights
    class_names = [c.strip() for c in open('./data/labels/custom.names').readlines()] # file path to classes list, needs to be updated if another logo is added
    if FLAGS.count:
        count = FLAGS.count
    excel = []
    images = []
    for i in range(count):
        con = convert_from_path('data/pdf/test (' + str(i+1) + ').pdf', output_folder='data/images', fmt="jpg", single_file=True, output_file='test (' + str(i+1) + ')')
        excel.append('data/excel/test (' + str(i+1) + ').xlsx')
        images.append('data/images/test (' + str(i+1) + ').jpg')
    raw_images = []
    for image in images:
        img_raw = tf.image.decode_image(
            open(image, 'rb').read(), channels=3)
        raw_images.append(img_raw)
    i = 0 # index number for main loop
    logos = [] # list of detected logos for each image
    approvals = [] # list of excel data for each image
    for raw_img in raw_images:
        img = tf.expand_dims(raw_img, 0)
        img = transform_images(img, 416) # image size
        t1 = time.time()
        boxes, scores, classes, nums = yolo(img)
        t2 = time.time()
        logging.info('time: {}'.format(t2 - t1))
        img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR)
        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        cv2.imwrite('./detections/detection (' + str(i+1) + ').jpg', img) # image output
# LABEL EXTRACTION
        temp_names = [] # temporary list for each image's logo detections
        for j in range(nums[0]):
            repeat = True
            temp_pair = [] # temporary list for each logo and its status
            if (j > 0):
                for k in range(len(temp_names)):
                    if (class_names[int(classes[0][j])] == temp_names[k][0]):
                        repeat = False
                        break
            if (repeat): # if not a repeated logo, update main logo list
                temp_pair.append(class_names[int(classes[0][j])]) # append logo 
                temp_pair.append(False) # append status
                temp_names.append(temp_pair) # append pair
        logos.append(temp_names) # append names list to main logo list
# EXCEL EXTRACTION
        wb = load_workbook(excel[i])
        sheet = wb.active
        rows = sheet.max_row
        temp_sheet = [] # temporary list for each image's excel data
        for j in range(rows-1):
            temp_rows = [] # temporary list for each row's excel data
            temp_rows.append(str(sheet.cell(row=j+2, column=4).value).upper().strip())
            temp_rows.append(str(sheet.cell(row=j+2, column=5).value).upper().strip())
            temp_rows.append("00FF0000") # Red by default
            temp_sheet.append(temp_rows)
        approvals.append(temp_sheet) # append sheet list to main approvals list
# EXCEL TRANSLATION
        for j in range(len(approvals[i])):
            if (approvals[i][j][0] in extola):
                temp_trans = extola[approvals[i][j][0]]
            else:
                temp_trans = ["NAL"] # No Associated Logo
            approvals[i][j][0] = temp_trans
# EXCEL COMPARED TO LABEL
# "APPROVAL STATUS"             "On label"   "Not on label"
# "APPROVED"                    "Green"     "Red"
# "NO REQUIREMENTS"             "Red"       "Green"
# "APPROVAL NOT APPLICABLE"     "Red"       "Green"
# "APPROVAL NOT REQUIRED"       "Red"       "Green"
# "CONTACT CISCO PARTNER/IOR"   "Red"       "Green"
# "NOT APPROVED"                "Red"       "Green"
# "PENDING"                     "Red"       "Green"
# "RENEWAL IN PROGESS"          "Red"       "Green"
# "NONE"/"UNKNOWN"              "Red"       "Red"
# 
# "00FF0000" (Red) needs attention
# "0000FF00" (Green) good to go
# "000000FF" (Blue) missing logo
#         
        for j in range(len(approvals[i])):
            flag = True
            k = 0
            temp_count = 0
            while (flag):
                if (k == len(logos[i])): # logo not on label
                    flag = False
                    if (approvals[i][j][1] == "APPROVED"):
                        approvals[i][j][2] = "00FF0000" # Red
                    elif ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or
                          (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "NO REQUIREMENTS")):
                        approvals[i][j][2] = "0000FF00" # Green
                    elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")):
                        approvals[i][j][2] = "00FF0000" # Red
                        sheet.cell(row=j+2, column=5).value = "Unknown"
                elif (approvals[i][j][0][0] == "NAL"): # no logo to detect
                    flag = False
                    if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or
                        (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")or(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")):
                        approvals[i][j][2] = "0000FF00" # Green
                    elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")):
                        approvals[i][j][2] = "00FF0000" # Red
                        sheet.cell(row=j+2, column=5).value = "Unknown"  
                else: # continue or logo on label
                    for X in range(len(approvals[i][j][0])):
                        if (approvals[i][j][0][X] == logos[i][k][0]): # logo on label
                            logos[i][k][1] = True
                            temp_count+=1
                            if (temp_count == len(approvals[i][j][0])):
                                flag = False
                            if ((approvals[i][j][1] == "APPROVAL NOT APPLICABLE")or(approvals[i][j][1] == "APPROVAL NOT REQUIRED")or(approvals[i][j][1] == "CONTACT CISCO PARTNER/IOR")or
                                (approvals[i][j][1] == "NOT APPROVED")or(approvals[i][j][1] == "PENDING")or(approvals[i][j][1] == "RENEWAL IN PROGESS")):
                                approvals[i][j][2] = "00FF0000" # Red
                            elif ((temp_count == len(approvals[i][j][0]))and(approvals[i][j][1] == "APPROVED")or(approvals[i][j][1] == "NO REQUIREMENTS")):
                                approvals[i][j][2] = "0000FF00" # Green
                            elif ((approvals[i][j][1] == "NONE")or(approvals[i][j][1] == "UNKNOWN")):
                                approvals[i][j][2] = "00FF0000" # Red
                                sheet.cell(row=j+2, column=5).value = "Unknown"
                k+=1
            sheet.cell(row=j+2, column=5).fill = PatternFill(start_color=approvals[i][j][2], end_color=approvals[i][j][2], fill_type='solid')
# LABEL COMPARED TO EXCEL
        new_row=1
        for j in range(len(logos[i])):
            if (logos[i][j][1] == False): # not on excel so add it in a new row
                sheet.cell(row=new_row+rows, column=1).value = str(sheet.cell(row=rows, column=1).value) #1 Product Name
                sheet.cell(row=new_row+rows, column=3).value = str(sheet.cell(row=rows, column=3).value) #3 Desc
                sheet.cell(row=new_row+rows, column=4).value = logos[i][j][0] #4 Country
                sheet.cell(row=new_row+rows, column=5).value = "Unknown" #5 Approval Status
                sheet.cell(row=new_row+rows, column=5).fill = PatternFill(start_color="000000FF", end_color="000000FF", fill_type='solid') #5 Blue
                for k in range(5):
                    sheet.cell(row=new_row+rows, column=k+6).value = str(sheet.cell(row=rows, column=k+6).value) #6-10
                new_row+=1
        wb.save(excel[i])
        i+=1
# DISPLAY
    for j in range(i):
        print("\nL" + str(j+1) + ": ", end="")
        temp_print = []
        for k in range(len(logos[j])):
            temp_print.append(logos[j][k][0])
        print(temp_print, "\nE" + str(j+1) + ": ", end="")
        temp_print = []
        for k in range(len(approvals[j])):
            temp_print.append(approvals[j][k][0])
        print(temp_print)
    print("")
예제 #30
0
def main(_argv):
  physical_devices = tf.config.experimental.list_physical_devices('GPU')
  if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

  if FLAGS.model_name:
    logging.info("loading model %s" % FLAGS.model_name)
    yolo = tf.saved_model.load(FLAGS.model_name)
    logging.info("model loaded")
  else:
    if FLAGS.tiny:
      yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
      yolo = YoloV3(classes=FLAGS.num_classes)
    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')


  class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
  logging.info('classes loaded')

  nfp = 0
  npp = 0
  nxp = 0
  true_positive = 0
  false_positive = 0
  false_negative = 0

  ds = FLAGS.dataset
  if FLAGS.save == 'all':
    records = ["data/%s.test.record" % ds,
     "data/%s.train.record" % ds,
     "data/%s.val.record" % ds]
  else:
    records = ["data/%s.test.record" % ds]

  for raw in tf.data.TFRecordDataset(records):
    record = tf.train.Example()
    record.ParseFromString(raw.numpy())
    name = record.features.feature['image/filename'].bytes_list.value[0].decode("utf-8")
    fn = name[name.index('/') + 1:]

    if not os.path.exists('data/' + name):
      continue

    print("Record", name)
    img = tf.image.decode_image(open('data/' + name, 'rb').read(), channels=3)
    img = tf.expand_dims(img, 0)
    img = transform_images(img, FLAGS.size)
    wh = np.flip(img.shape[0:2])

    xmin = record.features.feature['image/object/bbox/xmin'].float_list.value
    xmax = record.features.feature['image/object/bbox/xmax'].float_list.value
    ymin = record.features.feature['image/object/bbox/ymin'].float_list.value
    ymax = record.features.feature['image/object/bbox/ymax'].float_list.value
    xx = []
    yy = []
    for i in range(0, len(xmin)):
      xx.append(wh[0] * (xmin[i] + xmax[i]) / 2)
      yy.append(wh[1] * (ymin[i] + ymax[i]) / 2)

    if FLAGS.model_name:
      out = yolo(img)
      if FLAGS.tiny:
        boxes, scores, classes, nums = transformOutputTiny(out[0], out[1])
      else:
        boxes, scores, classes, nums = transformOutput(out[0], out[1])
    else:
      boxes, scores, classes, nums = yolo(img)

    doc = np.zeros(len(xx))
    retrive = np.zeros(nums[0])
    threshold = 20


    nfp += 1
    if nums[0] > 0:
      npp += 1
      img = cv2.imread('data/' + name)
      img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
      ofn = 'data/predict/' + fn
      if FLAGS.save == 'all' or FLAGS.save == 'test':
        cv2.imwrite(ofn, img)
        logging.info('output saved to: {}'.format(ofn))
      for i in range(0, len(xx)):
        for j in range(nums[0]):
          if classes[0][j] != i:
            continue
          x1y1 = (np.array(boxes[0][j][0:2]) * wh).astype(np.int32)
          x2y2 = (np.array(boxes[0][j][2:4]) * wh).astype(np.int32)

          x = (x1y1[0] + x2y2[0]) / 2
          y = (x1y1[1] + x2y2[1]) / 2
          d = math.sqrt(math.pow((x - xx[i]), 2) + math.pow((y - yy[i]), 2))
          if d < threshold:
            doc[i] = 1
            retrive[j] = 1
            break

    tp = sum(doc)
    fn = len(doc) - sum(doc)
    fp = len(retrive) - sum(retrive)
    true_positive += tp
    false_positive += fp
    false_negative += fn
    print("TP: %d, FP: %d, FN: %d" % (tp, fp, fn))

    if FLAGS.save == 'all':
      logging.info('detections')
      for i in range(nums[0]):
        logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                           np.array(scores[0][i]),
                                           np.array(boxes[0][i])))

    if nums[0] == 4:
      nxp += 1

  print("%d processed. %d some prediction. %d (%1.0f %%) has complete prediction" % (nfp, npp, nxp, 100 * nxp / nfp))

  precision = true_positive / (true_positive + false_positive)
  recall = true_positive / (true_positive + false_negative)
  print("Precision %1.2f %%, Recall: %1.2f %%" % (100 * precision,  100 * recall))