Exemplo n.º 1
0
def main(argv):
    # print("location recieved in main as: ", e)
    ###################################
    global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME
    violator_count_list = list()
    ###################################
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    yolo = YoloV3(classes=80)

    yolo.load_weights('./weights/yolov3.tf')
    logging.info('weights loaded')

    class_names = [c.strip() for c in open('./coco.names').readlines()]
    logging.info('classes loaded')
    video_path = 'test.mkv'

    try:
        vid = cv2.VideoCapture(int(FILE_URL))
    except:
        vid = cv2.VideoCapture(FILE_URL)
    time.sleep(1.0)

    out = None

    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print("height: ", height)
    print("width: ", width)
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('./result.avi', codec, fps, (width, height))
    frame_index = -1
    fps = 0.0
    count = 0
    PROCESSING_STATUS = True
    while True:
        _, img = vid.read()
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)
        temp_violators = set()
        temp_total_people = set()
        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            class_name1 = track.get_class()
            if class_name1 == "person":
                temp_total_people.add(track.track_id)
                bbox1 = track.to_tlbr()
                x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2)
                y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2)
                r1 = int(abs(bbox1[3] - bbox1[1]))
                color = (255, 0, 0)
                cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2),
                         (0, 255, 0), 2)
                cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1)
                scale = (r1) / 100
                transparentOverlay(img,
                                   dst_circle, (x1_c, y1_c - 5),
                                   alphaVal=110,
                                   color=(0, 200, 20),
                                   scale=scale)
                for other in tracker.tracks:
                    if not other.is_confirmed() or other.time_since_update > 1:
                        continue
                    if track.track_id == other.track_id:
                        continue

                    class_name2 = other.get_class()
                    if class_name2 == "person":
                        temp_total_people.add(other.track_id)
                        bbox2 = other.to_tlbr()
                        x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2)
                        y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2)
                        r2 = int(abs(bbox2[3] - bbox2[1]))
                        if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 //
                                      2) >= 0 and abs(y1_c - y2_c) < r1 // 4:
                            temp_violators.add(track.track_id)
                            temp_violators.add(other.track_id)
                            cv2.line(img, (x1_c, y1_c), (x2_c, y2_c),
                                     (0, 0, 255), 2)
                            scale1 = (r1) / 100
                            transparentOverlay(img,
                                               dst_circle, (x1_c, y1_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale1)
                            scale2 = (r2) / 100
                            transparentOverlay(img,
                                               dst_circle, (x2_c, y2_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale2)

        # print fps on screen
        ### Comment below 3 lines to not see live output screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)

        ### Violators calculation
        violators_for_frame = len(temp_violators)
        VIOLATION_PERCENTAGE = violators_for_frame
        print("Violation percentage: ", violators_for_frame)
        violator_count_list.append(int(violators_for_frame))
        ###
        ### Call to firebase upload function
        # if violators_for_frame > 20:
        #     social_dist_violation_frame_handler(img)
        #     cv2.imwrite("temp.png",img)
        #     firebase_upload("temp.png")
        #     os.remove("temp.png")

        frame_index = frame_index + 1

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if len(violator_count_list) == 0:
        mean_violation = 0
    else:
        mean_violation = sum(violator_count_list) / len(violator_count_list)
    PROCESSING_STATUS = False
    out.release()
    cv2.destroyAllWindows()
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            continue

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        img = cv2.putText(
            img, "FPS: {:.2f}".format(1 / (sum(times) / len(times)) * 1),
            (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        if FLAGS.output:
            out.write(img)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
Exemplo n.º 3
0
def main(_argv):
    if FLAGS.mode == "eager_tf":
        tf.compat.v1.enable_eager_execution()

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.trace:
        run_options = tf.compat.v1.RunOptions(
            output_partition_graphs=True,
            trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
        run_metadata = tf.compat.v1.RunMetadata()
        trace_dir = os.path.join("traces", "training")
        if not os.path.isdir(trace_dir):
            os.makedirs(trace_dir)
        graphs_dir = os.path.join("traces", "training", "graphs")
        if not os.path.isdir(graphs_dir):
            os.makedirs(graphs_dir)
    else:
        run_options = None
        run_metadata = None

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    val_dataset = val_dataset.repeat()

    # TF2 doesn't need this, but we're using TF1.15.
    if FLAGS.mode == "fit":
        sess = tf.keras.backend.get_session()
        sess.run(tf.compat.v1.global_variables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"variables_init.json"),
                      'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"variables_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

        sess.run(tf.compat.v1.tables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"table_init.json"), 'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"table_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      options=run_options,
                      run_metadata=run_metadata)

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
        ]

        class TraceCallback(tf.keras.callbacks.Callback):
            def on_epoch_begin(self, epoch, logs=None):
                self.current_epoch = epoch

            def on_train_batch_end(self, batch, logs=None):
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                with open(
                        os.path.join(
                            trace_dir,
                            f"training_epoch_{self.current_epoch}_batch_{batch}.json"
                        ), 'w') as f:
                    f.write(chrome_trace)
                # No need to dump graph partitions for every batch; they should be identical.
                if batch == 0:
                    for i in range(len(run_metadata.partition_graphs)):
                        with open(
                                os.path.join(graphs_dir,
                                             f"training_partition_{i}.pbtxt"),
                                'w') as f:
                            f.write(str(run_metadata.partition_graphs[i]))

        if FLAGS.trace:
            callbacks.append(TraceCallback())
        else:
            callbacks.append(TensorBoard(write_graph=False, log_dir="logs"))

        history = model.fit(
            train_dataset,
            epochs=FLAGS.epochs,
            callbacks=callbacks,
            validation_data=val_dataset,
            steps_per_epoch=FLAGS.num_samples // FLAGS.batch_size,
            validation_steps=FLAGS.num_val_samples // FLAGS.batch_size)
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        n = 1

        if cv2.waitKey(1) == ord('a'):
            while True:
                _, img = vid.read()

                if (check_blur(img,
                               threshold=200) == 0):  #ADJUST THRESHOLD HERE
                    cv2.imshow('output', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                    print("blurred")
                    continue

                img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_in = tf.expand_dims(img_in, 0)
                img_in = transform_images(img_in, FLAGS.size)

                t1 = time.time()
                boxes, scores, classes, nums = yolo.predict(img_in)
                fps = (fps + (1. / (time.time() - t1))) / 2

                img = draw_outputs(img, (boxes, scores, classes, nums),
                                   class_names)
                cv2.imshow('output', img)

                text = ""
                for i in range(80):
                    if scores[0][i] == 0:
                        break
                    Class = int(classes[0][i])
                    place = (
                        (boxes[0][i][2] - boxes[0][i][0]) / 2) + boxes[0][i][0]
                    print(place, class_names[Class])
                    if place < .33:
                        side = 'left'
                    elif place < .66:
                        side = 'center'
                    else:
                        side = 'right'
                    if side == 'center':
                        text = text + " There is a " + class_names[
                            Class] + 'in the ' + side + '.'
                    else:
                        text = text + " There is a " + class_names[
                            Class] + 'on the ' + side + '.'

                try:
                    # text = "This is a test."
                    speech = gTTS(text=text, slow=False)
                    speech.save(
                        r'C:\\Users\\HARINI\\Object-Detection-API\\audio\\text'
                        + str(n) +
                        '.wav')  #CHANGE THESE 2 PATHS TO YOUR OWN PATH
                    os.system(
                        r'C:\\Users\\HARINI\\Object-Detection-API\\audio\\text'
                        + str(n) + '.wav')
                    n = n + 1
                except:
                    continue

                if not (waitf(15)):
                    break

        if FLAGS.output:
            out.write(img)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
Exemplo n.º 5
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        #         cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit


#         if cv2.waitKey(1) == ord('q'):
#             break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Exemplo n.º 6
0
def main(_argv):
    counta = 0
    count = 0
    dorsch_counter = 0
    steinbutt_counter = 0
    kliesche_counter = 0
    herring_counter = 0
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()
        img_raw = img
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img_raw = img_in
        #img_in = img_in[336:535, 787:1198] #cod first
        #img_in = img_in[365:555, 750:1278] #cod second vid
        # img_in = img_in[387:634, 739:1218] #flat_fish
        #img_in = img_in[344: 513, 766: 1042] #steinbutt
        #img_in = img_in[331:520, 789:1065] #liman
        #img_in = img_in[420: 580, 751: 1226] # cod_anne
        img_in = img_in[365:555, 750:1278]  # cod_new
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        if not boxes.any():
            dummy_nums.append(0)
            fine_class.append('no fish')
            cod_count.append(0)
            herring_count.append(0)
            kliesche_count.append(0)
            steinbutt_count.append(0)
        fps = (fps + (1. / (time.time() - t1))) / 2
        img, stack_predections, length = draw_outputs(
            img[365:555,
                750:1278], (boxes, scores, classes, nums), class_names)
        img_raw[365:555, 750:1278] = img  #cod_trial
        cv2.putText(img_raw, "FPS: {:.2f}".format(fps), (0, 60),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        full_score.append(scores)
        boxes, scores, classes, nums = boxes[0], scores[0], classes[0], nums[0]
        if scores.any():

            for i in range(nums):
                if (scores[i] * 100) > 50:
                    cv2.putText(
                        img_raw,
                        'Computed_length = {} cm'.format(round(
                            (length[-1]))), (0, 240),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3)
                    fine_class.append(stack_predections[0])
                    if stack_predections[0] == 0:
                        cv2.putText(img_raw, 'Detected Fish = Cod', (0, 100),
                                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                                    (0, 0, 255), 3)
                        if np.all(np.array(cod_count[-8:]) == 0) and np.all(
                                np.array(herring_count[-8:]) == 0) and np.all(
                                    np.array(kliesche_count[-8:]) ==
                                    0) and np.all(
                                        np.array(steinbutt_count[-8:]) == 0):
                            dorsch_counter += 1
                            cod_count.append(dorsch_counter)
                        cod_count.append(dorsch_counter)

                    if stack_predections[0] == 1:
                        cv2.putText(img_raw, 'Detected Fish = Herring',
                                    (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                    1, (0, 0, 255), 3)
                        if np.all(np.array(cod_count[-9:]) == 0) and np.all(
                                np.array(herring_count[-9:]) == 0) and np.all(
                                    np.array(kliesche_count[-9:]) ==
                                    0) and np.all(
                                        np.array(steinbutt_count[-9:]) == 0):
                            herring_counter += 1
                            herring_count.append(herring_counter)
                        herring_count.append(herring_counter)

                    if stack_predections[0] == 1:
                        cv2.putText(img_raw, 'Detected Fish = Dab', (0, 100),
                                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                                    (0, 0, 255), 3)
                        if np.all(
                                np.array(kliesche_count[-8:]) == 0) and np.all(
                                    np.array(herring_count[-8:]) ==
                                    0) and np.all(
                                        np.array(kliesche_count[-8:]) == 0
                                    ) and np.all(
                                        np.array(steinbutt_count[-8:]) == 0):
                            kliesche_counter += 1
                            kliesche_count.append(kliesche_counter)
                        kliesche_count.append(kliesche_counter)

                    if stack_predections[0] == 2:
                        cv2.putText(img_raw, 'Detected Fish = Turbot',
                                    (0, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                    1, (0, 0, 255), 3)
                        if np.all(
                                np.array(steinbutt_count[-8:]) == 0
                        ) and np.all(
                                np.array(herring_count[-8:]) == 0) and np.all(
                                    np.array(kliesche_count[-8:]) ==
                                    0) and np.all(
                                        np.array(steinbutt_count[-8:]) == 0):
                            steinbutt_counter += 1
                            steinbutt_count.append(steinbutt_counter)
                        steinbutt_count.append(steinbutt_counter)

                    if np.all(np.array(dummy_nums[-5:]) == 0):
                        counta += 1
                        dummy_nums.append(counta)
                    dummy_nums.append(counta)

                if (scores[i] * 100) < 80:
                    fine_class.append('no fish')
                    dummy_nums.append(0)
                    cod_count.append(0)
                    kliesche_count.append(0)
                    herring_count.append(0)
                    steinbutt_count.append(0)
        print(dummy_nums)
        print(stack_predections)
        cv2.putText(img_raw, 'Total no of Fish = ' + str(counta), (0, 130),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 3)
        cv2.putText(img_raw, 'Total no of Cod = ' + str(dorsch_counter),
                    (0, 160), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255),
                    3)
        cv2.putText(img_raw, 'Total no of Dab = ' + str(kliesche_counter),
                    (0, 180), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255),
                    3)
        cv2.putText(img_raw, 'Total no of Herring = ' + str(herring_counter),
                    (0, 200), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255),
                    3)
        cv2.putText(img_raw, 'Total no of Turbot = ' + str(steinbutt_counter),
                    (0, 220), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255),
                    3)
        if FLAGS.output:
            cv2.resize(img_raw, (650, 550))
            out.write(img_raw)
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
Exemplo n.º 7
0
def get_detections():
    raw_images = []
    images = request.files.getlist("images")
    image_names = []
    for image in images:
        image_name = image.filename
        image_names.append(image_name)
        image.save(os.path.join(os.getcwd(), image_name))
        img_raw = tf.image.decode_image(open(image_name, 'rb').read(),
                                        channels=3)
        raw_images.append(img_raw)

    num = 0

    # create list for final response
    response = []
    li = []
    for j in range(len(raw_images)):
        # create list of responses for current image
        responses = []
        raw_img = raw_images[j]
        num += 1
        img = tf.expand_dims(raw_img, 0)
        img = transform_images(img, size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo(img)
        t2 = time.time()
        print('time: {}'.format(t2 - t1))
        # print("**",scores)
        print('detections:')
        for i in range(nums[0]):
            # if np.array(scores[0][i])*100>30:
            print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                        np.array(scores[0][i]),
                                        np.array(boxes[0][i])))
            responses.append({
                "class":
                class_names[int(classes[0][i])],
                "confidence":
                float("{0:.2f}".format(np.array(scores[0][i]) * 100)),
                "co ordinates":
                str("{}".format((np.array(boxes[0][i]))))
            })

            # print(tuple(np.array(boxes[0][i])))

            # img = Image.open("C:\\Repos\\object-Detection-API\\detections\\detection.jpg")
            # a,b = img.size
            # print("*****")
            # print(a,b)
            x, y, z, h = np.array(boxes[0][i])
            p = finalList(class_names[int(classes[0][i])], x, y)
            li.append(p)
            # print(x,y,z,h)
            # crop = img.crop((x*a,y*b,z*a,h*b))
            # crop.show()
        response.append({"image": image_names[j], "detections": responses})
        # note the tuple
    img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR)
    img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
    cv2.imwrite(output_path + 'detection' + '.jpg', img)
    print('output saved to: {}'.format(output_path + 'detection' + str(num) +
                                       '.jpg'))

    st = """
    <!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width" />
<title>HTML Result</title>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css" 
      integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous">
</head>
<body>
<div class="container body-content">"""

    en = """
</div>
</body>
</html>
    """
    inputf = """ 
    <div class="row justify-content-start" style="padding-top:10px;">
<label>Demo Text: </label>
</div>
<div class="row justify-content-center" style="padding-top:10px;">
<input class="form-control"></input>
            </div>"""

    button = """
    <div class="col" style="padding-top:10px;">
        <button class="btn btn-primary">Submit</button>
    </div>"""

    img = """
    <img src="C:/repos/env/Object-Detection-API/img.png" width="150" height="150" alt="Image Here">"""

    radio = """
    <div class="col" style="padding-top:10px;">
        <input type="radio" id="male" name="Demo text" value="male">
        <label for="male">Demo Text</label><br>
    </div>
"""
    dropdown = """
    <div class="dropdown">
<label for="cars">Dropdown:</label>
<select name="cars" id="cars" class="btn btn-primary dropdown-toggle">
<option value="1">Option 1</option>
<option value="2">Option 2</option>
<option value="3">Option 3</option>
<option value="4">Option 4</option>
</select>
</div>"""
    checkbox = """
    <div class="col" style="padding-top:10px;">
        <input type="checkbox" id="vehicle1" name="vehicle1" value="Bike">
        <label for="vehicle1"> I have a bike</label><br>
    </div>
    """
    text = """<div class="col" style="padding-top:10px;"> <p class="text-black-50"> You’ve probably heard of 
    Lorem Ipsum before – it’s the most-used dummy text excerpt out there. People use it because it has a fairly 
    normal distribution of letters and words (making it look like normal English), but it’s also Latin, 
    which means your average reader won’t get distracted by trying to read it. </p> </div> """

    sorted_li = sorted(li, key=operator.attrgetter('y'))
    # print("###########################")
    # for m in sorted_li:
    #     print(m.name, m.y)
    #
    # print("###########################")
    for i in sorted_li:
        if i.name == "check box":
            st += checkbox
        elif i.name == "radio button":
            st += radio
        elif i.name == "dropdown":
            st += dropdown
        elif i.name == "input":
            st += inputf
        elif i.name == "submit":
            st += button
        elif i.name == "text":
            st += text
        else:
            st += img
        print(i.name, i.x, i.y)
    print(st + en)
    f = open("demofile3.html", "w")
    f.write(st + en)
    f.close()
    # remove temporary images
    for name in image_names:
        os.remove(name)
    try:
        return jsonify({"response": response}), 200
    except FileNotFoundError:
        abort(404)
Exemplo n.º 8
0
def main(_argv):
    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    # Horovod: adjust learning rate based on number of GPUs.
    optimizer = tf.optimizers.Adam(FLAGS.learning_rate * hvd.size())
    # Horovod: add Horovod DistributedOptimizer.

    ###############################################
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(
                    train_dataset.take(5717 // hvd.size())):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # Horovod: add Horovod Distributed GradientTape.
                tape = hvd.DistributedGradientTape(tape)

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # Horovod: broadcast initial variable states from rank 0 to all other processes.
                # This is necessary to ensure consistent initialization of all workers when
                # training is started with random weights or restored from a checkpoint.
                #
                # Note: broadcast should be done after the first gradient step to ensure optimizer
                # initialization.
                if batch == 0:
                    hvd.broadcast_variables(model.variables, root_rank=0)
                    hvd.broadcast_variables(optimizer.variables(), root_rank=0)

                #############################
                if hvd.rank() == 0:
                    logging.info("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                ###########################
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                if hvd.rank() == 0:
                    logging.info("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            if hvd.rank() == 0:
                logging.info("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            if hvd.rank() == 0:
                model.save_weights(
                    'checkpoints/horovod_yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
def basic():

    # Remove already existing files in the output_frames directory :)
    counter1 = 0
    files_del = glob.glob('data/output_frames/*')
    for counter1 in files_del:
        os.remove(counter1)
# Remove already existing files in the Clipped directory :)
    counter2 = 0
    files_clipped = glob.glob('data/Clipped/*')
    for counter2 in files_clipped:
        os.remove(counter2)
# Request Video File to act like live stream
    f = request.files['file']
    print('FILENAME: ', f.filename)
    print('SECURE FILE NAME: ', f.save(secure_filename(f.filename)))
    times = []
    i = 0
    h = 0
    print(f.filename)
    try:
        vid = cv2.VideoCapture(f.filename)
    except:
        vid = cv2.VideoCapture(f.filename)
    out = None
    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        fps = (fps + (1. / (time.time() - t1))) / 2

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        # Checking threshold i first row of 2D "scores array" because score array has only one row
        if (scores[0][1].any() > 0.50):
            cv2.imwrite('data/Clipped/clipp' + str(i) + '.jpg', img)
            storage.child('ClippedCrash/' + str(i) +
                          '/crash.jpg').put('data/Clipped/clipp' + str(i) +
                                            '.jpg')
            link_image = storage.child('ClippedCrash/' + str(i) +
                                       '/crash.jpg').get_url(None)
            doc_ref = db.collection(u'Crash')
            doc_ref.add({
                u'Name': u'Vehicle Crash',
                u'Type': u'Anomaly',
                u'Timestamp': str(datetime.now()),
                u'Image_Url': link_image
            })
        elif (scores[0][0].any() > 0.50):
            cv2.imwrite('data/Clipped/clipp' + str(i) + '.jpg', img)
            # storage.child('LaneClipped/' +str(i)+ '/Lane.jpg').put('data/Clipped/clipp'+str(i)+'.jpg')
            # link_image = storage.child('ClippedLane/' +str(i)+ '/Lane.jpg').get_url(None)
            # doc_ref = db.collection(u'LaneVoilation')
            # doc_ref.add({
            #     u'Name': u'Lane Voilation',
            #     u'Type': u'Anomaly',
            #     u'Timestamp': datetime.now(),
            #     u'Image Url': link_image
            # })

        print(boxes, scores, classes, nums, class_names)
        global displayData
        displayData = {
            "scores": str(scores),
            "classes": str(classes),
            "classes_names": str(class_names)
        }
        # print(displayData)
        # data['boxes'] = i
        img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imwrite('data/output_frames/anomaly' + str(i) + '.jpg', img)
        i = i + 1

        cv2.destroyAllWindows()

    # os.remove('data\output-vid\short.mp4')
    vid_array = []
    for img_video in glob.glob('data/output_frames/*.jpg'):
        vid_img = cv2.imread(img_video)
        height, width, layers = vid_img.shape
        SIZE_vid = (width, height)
        vid_array.append(vid_img)

    out = cv2.VideoWriter('data/output-vid/short.mp4',
                          cv2.VideoWriter_fourcc(*'mp4a'), 15, SIZE_vid)

    for n in range(len(vid_array)):
        out.write(vid_array[n])
    out.release()

    cv2.destroyAllWindows()

    users_ref = db.collection(u'Crash')
    Crashdata = users_ref.stream()
    sasta = []
    print(Crashdata)
    for doc in Crashdata:
        print(f'{doc.id} => {doc.to_dict()}')
        my_dict = doc.to_dict()
        sasta.append(my_dict)
        # print(my_dict)
    print(sasta)
    storage.child("videos/new.mp4").put("data\output-vid\short.mp4")
    links = storage.child('videos/new.mp4').get_url(None)
    return render_template('upload.html', l=(links, sasta))
Exemplo n.º 10
0
def main(_argv):
    # set present path
    home = os.getcwd()

    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    # model_filename = 'weights/mars-small128.pb'
    model_filename = os.path.join(home, "weights", "arcface_weights.h5")
    encoder = gdet.create_box_encoder(model_filename, batch_size=128)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    # Database 생성
    face_db = dict()

    db_path = FLAGS.database
    for name in os.listdir(db_path):
        name_path = os.path.join(db_path, name)
        name_db = []
        for i in os.listdir(name_path):
            if i.split(".")[1] != "jpg": continue
            id_path = os.path.join(name_path, i)
            img = cv2.imread(id_path)
            # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # img_in = tf.expand_dims(img_in, 0)
            # img_in = transform_images(img_in, FLAGS.size)
            # boxes, scores, classes, nums = yolo.predict(img_in)
            boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]])
            scores = np.asarray([[1]])
            converted_boxes = convert_boxes(img, boxes, scores)
            features = encoder(img, converted_boxes)

            if features.shape[0] == 0: continue

            for f in range(features.shape[0]):
                name_db.append(features[f, :])
        name_db = np.asarray(name_db)
        face_db[name] = dict({"used": False, "db": name_db})

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0

    detection_list = []

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)

        # print(boxes, scores, classes, nums)
        # time.sleep(5)
        t2 = time.time()
        times.append(t2 - t1)
        print(f'yolo predict time : {t2-t1}')
        times = times[-20:]

        t3 = time.time()
        #############
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0], scores[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        t4 = time.time()
        print(f'feature generation time : {t4-t3}')

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        t5 = time.time()
        # Call the tracker
        tracker.predict()
        # tracker.update(detections)
        tracker.update(detections, face_db, FLAGS.max_face_threshold)
        t6 = time.time()
        print(f'tracking time : {t6-t5}')

        frame_index = frame_index + 1
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            face_name = track.get_face_name()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id)) +
                            len(str(face_name))) * 23, int(bbox[1])), color,
                          -1)
            # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            cv2.putText(
                img, class_name + "-" + str(track.track_id) + "-" + face_name,
                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            # print(class_name + "-" + str(track.track_id))

            # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
            if face_name != "":
                detection_list.append(
                    dict({
                        "frame_no": str(frame_index),
                        "id": str(face_name),
                        "x": str(int(bbox[0])),
                        "y": str(int(bbox[1])),
                        "width": str(int(bbox[2]) - int(bbox[0])),
                        "height": str(int(bbox[3]) - int(bbox[1]))
                    }))
        #######
        fps = (fps + (1. / (time.time() - t1))) / 2
        # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
        #                   cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
        if FLAGS.output:
            out.write(img)
            # frame_index = frame_index + 1
            # list_file.write(str(frame_index)+' ')
            # if len(converted_boxes) != 0:
            #     for i in range(0,len(converted_boxes)):
            #         list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
            # list_file.write('\n')
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()

    frame_list = sorted(detection_list,
                        key=lambda x: (int(x["frame_no"]), int(x["id"])))
    # pprint.pprint(frame_list)

    f = open(FLAGS.eval, "w")
    for a in frame_list:
        f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] +
                " " + a["width"] + " " + a["height"] + "\n")
    # 파일 닫기
    f.close()
Exemplo n.º 11
0
def upload_file():
    # check if the post request has the file part
    if 'files[]' not in request.files:
        resp = jsonify({'message': 'No file part in the request'})
        resp.status_code = 400
        return resp

    files = request.files.getlist('files[]')
    errors = {}
    success = False

    #i am return image shape
    result = []
    for file in files:
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            print(filename)
            #file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

            file.save(os.path.join(os.getcwd(), filename))

            ##############################################################yolo code#########################################
            img_raw = tf.image.decode_image(open(filename, 'rb').read(),
                                            channels=3)
            img = tf.expand_dims(img_raw, 0)
            img = transform_images(img, size)
            t1 = time.time()
            boxes, scores, classes, nums = yolo(img)
            t2 = time.time()
            print('time: {}'.format(t2 - t1))

            print('detections:')
            for i in range(nums[0]):
                print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                            np.array(scores[0][i]),
                                            np.array(boxes[0][i])))
            img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums),
                               class_names)
            cv2.imwrite(output_path + 'detection.jpg', img)
            print('output saved to: {}'.format(output_path + 'detection.jpg'))

            # prepare image for response
            _, img_encoded = cv2.imencode('.png', img)
            response = img_encoded.tostring()

            #remove temporary image
            os.remove(filename)
            success = True
        else:
            errors[file.filename] = 'File type is not allowed'

    if success and errors:
        errors['message'] = 'File(s) successfully uploaded'
        resp = jsonify(errors)
        resp.status_code = 206
        return resp
    if success:
        resp = jsonify({'message': 'fwekfjwegfkjwegf'})
        resp.status_code = 201
        return resp
    else:
        resp = jsonify(errors)
        resp.status_code = 400
        return resp
def main(_argv):
    region = load_ROI()


    # Definition of the parameters
    max_cosine_distance = 0.3  #Default = 0.5
    nn_budget = None
    nms_max_overlap = 0.8      #Default = 0.5 

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    video_name = os.path.splitext(FLAGS.video)[-2]

    weights = 'weights/yolov3_sang.tf'
    yolo.load_weights(weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')


    #WRITE RESULT
    
    result = "tracking_result/{}_track.txt".format(video_name)
    file_out = open(result,'w')
    path = os.getcwd()
    path = str(os.path.split(os.path.split(path)[0])[0])
    #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name))
    vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name))
    vid = cv2.VideoCapture(vid_path)
    out = None

    
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    
    frame_index = -1 
    
    fps = 0.0
    count = 0 
    while True:
        _, img = vid.read()

        if img is None:
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)    
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
        
        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]        

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        frame_index = frame_index + 1
        if frame_index % 100 == 0: 
            print('FRAME: ',frame_index)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            x_cen = int((int(bbox[2]) + int(bbox[0]))/2)
            y_cen = int((int(bbox[3]) + int(bbox[1]))/2)

            if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False:  #NGOAI ROI THI XOA
                track.delete_track()

            cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2)
            
            #GHI FILE TRACKING_RESULT theo chuan CountMovement
            bb_width = int(bbox[2]) - int(bbox[0])
            bb_height = int(bbox[3]) - int(bbox[1])
            diagonal = math.sqrt(bb_height**2 + bb_width**2)
            file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height))

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        for det in detections:
            bbox = det.to_tlbr() 
            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1)
        
        # print fps on screen 
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
Exemplo n.º 13
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights).expect_partial()
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    if FLAGS.tfrecord:
        dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes,
                                        FLAGS.size)
        dataset = dataset.shuffle(512)
        img_raw, _label = next(iter(dataset.take(1)))
    else:
        img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(),
                                        channels=3)

    img = tf.expand_dims(img_raw, 0)
    img = transform_images(img, FLAGS.size)

    t1 = time.time()
    boxes, scores, classes, nums = yolo(img)

    t2 = time.time()
    logging.info('time: {}'.format(t2 - t1))

    logging.info('detections:')
    # ------
    detect_dict = {}

    if FLAGS.make_json:
        object_num = 1

    for i in range(nums[0]):
        if not FLAGS.only_cars or class_names[int(
                classes[0][i])] in ('car', 'truck'):
            logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                               np.array(scores[0][i]),
                                               np.array(boxes[0][i])))
            if FLAGS.make_json:
                if FLAGS.only_cars:
                    detect_dict["Car " + str(object_num)] = np.array(
                        boxes[0][i]).tolist()
                else:
                    detect_dict["Object " + str(object_num) + ": " + class_names[int(classes[0][i])]] = \
                        np.array(boxes[0][i]).tolist()
                object_num += 1

    if FLAGS.make_json:
        with open("outputs/output.json", "w") as outfile:
            json.dump(detect_dict, outfile)
        logging.info('output JSON saved to: outputs/output.json')
    # ------

    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
    img = draw_outputs(img, (boxes, scores, classes, nums),
                       class_names,
                       only_cars=FLAGS.only_cars)
    cv2.imwrite(FLAGS.output, img)
    logging.info('output picture saved to: {}'.format(FLAGS.output))
Exemplo n.º 14
0
    return


dataImages = [f for f in listdir('./data/polar_car_set/Images')]
polar = ['0', '45', '90']
data9chan = []
for j in range(len(dataImages)):
    tmp = []
    for l in range(len(polar)):

        img = tf.image.decode_image(open(
            './data/polar_car_set/Images/{0}/{1}.jpg'.format(
                dataImages[j], polar[l]), 'rb').read(),
                                    channels=3)

        img = transform_images(img, 416)

        tmp.append(img)
    tmp9chan = np.concatenate((tmp[0], tmp[1], tmp[2]), axis=2)
    data9chan.append(tmp9chan)

data_list = data9chan

data_Array = np.asarray(data9chan, dtype='float32')
data_Array = data_Array.reshape(data_Array.shape[0], data_Array.shape[1],
                                data_Array.shape[2] * 3, 3)

nb_max_box = 100
list_labels = []
for i in labels.values():
    i = np.concatenate((i, np.zeros(
Exemplo n.º 15
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights).expect_partial()
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    logging.info('load cat model')
    model_cat = cnn.get_inception_v2_cat()
    logging.info('cat model loaded')

    logging.info('load dog model')
    model_dog = cnn.get_inception_v2_dog()
    logging.info('dog model loaded')

    if FLAGS.tfrecord:
        dataset = load_tfrecord_dataset(
            FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
        dataset = dataset.shuffle(512)
        img_raw, _label = next(iter(dataset.take(1)))
    else:
        img_raw = tf.image.decode_image(
            open(FLAGS.image, 'rb').read(), channels=3)

    img = tf.expand_dims(img_raw, 0)
    img = transform_images(img, FLAGS.size)

    t1 = time.time()
    boxes, scores, classes, nums = yolo(img)
    t2 = time.time()

    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)

    t3 = time.time()
    cnn_output = cnn.get_more_data(img, model_cat, model_dog, (boxes, scores, classes, nums), class_names)
    t4 = time.time()

    logging.info('time: {}'.format(t2 - t1))

    logging.info('primary detections:')
    for i in range(nums[0]):
        logging.info('\t{}, {:.2f}'.format(class_names[int(classes[0][i])],
                                           np.array(scores[0][i])))

    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)

    img, cat_det, dog_det = draw_outputs(img, model_cat, model_dog, (boxes, scores, classes, nums), class_names,
                                         cnn_output)

    cv2.imwrite(FLAGS.output, img)

    if np.size(cat_det) != 0 or np.size(dog_det) != 0:
        logging.info('secondary detections :')
        logging.info('time: {}'.format(t4 - t3))

        if np.size(cat_det) != 0:
            for cat in cat_det:
                logging.info('\t{}, {:.2f}'.format(cat[0], cat[1]))

        if np.size(dog_det) != 0:
            for dog in dog_det:
                logging.info('\t {}, {:.2f}'.format(dog[0], dog[1]))

    logging.info('output saved to: {}'.format(FLAGS.output))
    cv2.imshow(FLAGS.output, img)
    cv2.waitKey(0)
Exemplo n.º 16
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    countFrame = 0
    while True:
        _, img = vid.read()
        countFrame += 1

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            continue

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        result = []
        if (FLAGS.mode != 'basic'):
            if (FLAGS.mode == 'optical_flow'):
                mode = 1
            elif (FLAGS.mode == 'final'):
                mode = 3
            else:
                mode = 2
            img1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if (countFrame >= 2):
                flow = opticalFlow(img1, img2)
                if (FLAGS.mode == 'final'):
                    img, result = my_draw_flow(img2, flow, 1, boxes[0],
                                               nums[0], mode)
                    print(result)
                else:
                    img = my_draw_flow(img1, flow, 4, boxes[0], nums[0], mode)
            img2 = img1

        img = draw_outputs(img, (boxes, scores, classes, nums), class_names,
                           result)
        img = cv2.putText(
            img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000),
            (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        if FLAGS.output:
            out.write(img)
        # cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()
Exemplo n.º 17
0
def main():

    class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()]
    yolo = YoloV3(classes=len(class_names))
    yolo.load_weights('./weights/yolov3.tf')


    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8


    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
    tracker = Tracker(metric)


    vid = cv2.VideoCapture("traffic1.mkv")
    #vid = cv2.VideoCapture("video.webm")
    #vid = VideoCaptureAsync("video.webm")
    #vid = vid.start()


    codec = cv2.VideoWriter_fourcc(*'XVID')
    vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
    vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height))

    from collections import deque
    pts = [deque(maxlen=30) for _ in range(1000)]

    counter = []

    directory1 = "/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset/"

    result = []

    new_cnt = 0

    while True:
        
        _, img = vid.read()
        if img is None:
            print('Completed')
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()

        boxes, scores, classes, nums = yolo.predict(img_in)

        classes = classes[0]
        names = []
        
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
            
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)

        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                    zip(converted_boxes, scores[0], names, features)]


        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]


        tracker.predict()
        tracker.update(detections)

        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0,1,20)]

        #current_count = int(0)
        #count = 0

        for track in tracker.tracks:
            
            if not track.is_confirmed() or track.time_since_update >1:
                continue
            
            bbox = track.to_tlbr()
            class_name= track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
        
            cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                        +len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            
            cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                        (255, 255, 255), 2)

            center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
            pts[track.track_id].append(center)

            for j in range(1, len(pts[track.track_id])):
                
                if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64/float(j+1))*2)
                cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)


            height, width, _ = img.shape
            cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
            #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)
            cv2.line(img, (220, 460), (1000, 450), (0, 0, 255), 2)
            center_y = int(((bbox[1])+(bbox[3]))/2)

            #count = 0
            if center_y <= int(3*height/6+height/20) and center_y >= int(3*height/6-height/20):
                
                if class_name == 'car' or class_name == 'truck' or class_name == 'person':
                    counter.append(int(track.track_id))  

                    directory = r'/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset'
                    for filename in os.listdir(directory):
                        if filename.endswith(".jpg") or filename.endswith(".png"):
                            a1 = os.path.join(directory, filename)
                            b = int(re.search(r'\d+', a1).group())
                            result.append(b)
                        else:
                            continue
                        
                    
                    b1 = max(result) + 1
                    count = 0

                    while(True):
                        
                        count += 1
                        print(count)
                        #count = b1
                        
                        ##increase image size and resoulation
                        #new_img = img[int(bbox[0]):(int(bbox[2])+int(bbox[3])), int(bbox[1]):(int(bbox[2])+int(bbox[3]))]
                        new_img = img[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))]
                        #new_rgb = rgb[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))]
                        #new_img = cv2.resize(new_img, (360, 360), interpolation = cv2.INTER_NEAREST)

                        cv2.imwrite(directory1 + f"image{b1}.jpg", new_img)
                                    
                        if count > 1:
                            print("break the loop..............")
                            break
                    #current_count += 1


        total_count = len(set(counter))
        #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2)
        cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0,130), 0, 1, (0,0,255), 2)


        fps = 1./(time.time()-t1)
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
        #cv2.resizeWindow('output', 1024, 768)
        cv2.imshow('output', img)
        out.write(img)


        if cv2.waitKey(1) == ord('q'):
            break
        
        
    vid.release()
    out.release()
    cv2.destroyAllWindows()
    def scanner(self):
        FLAGS(sys.argv)
        self.physical_devices = tf.config.experimental.list_physical_devices(
            'GPU')
        if len(self.physical_devices) > 0:
            tf.config.experimental.set_memory_growth(self.physical_devices[0],
                                                     True)

        if FLAGS.tiny:
            self.yolo = YoloV3Tiny(classes=FLAGS.num_classes)
        else:
            self.yolo = YoloV3(classes=FLAGS.num_classes)

        self.yolo.load_weights(FLAGS.weights)
        logging.info('weights loaded')

        self.class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
        logging.info('classes loaded')

        times = []

        try:
            self.vid = cv2.VideoCapture((0))
        except:
            self.vid = cv2.VideoCapture(FLAGS.video)

        self.out = None

        if FLAGS.output:
            # by default VideoCapture returns float instead of int
            self.width = int(self.vid.get(cv2.CAP_PROP_FRAME_WIDTH))
            self.height = int(self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
            self.fps = int(self.vid.get(cv2.CAP_PROP_FPS))
            self.codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
            self.out = cv2.VideoWriter(FLAGS.output, self.codec, self.fps,
                                       (self.width, self.height))
        self.fps = 0.0
        self.count = 0

        a = True

        while a:
            _, self.img = self.vid.read()

            if self.img is None:
                logging.warning("Empty Frame")
                time.sleep(0.1)
                self.count += 1
                if self.count < 3:
                    continue
                else:
                    break

            self.img_in = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
            self.img_in = tf.expand_dims(self.img_in, 0)
            self.img_in = transform_images(self.img_in, FLAGS.size)

            self.t1 = time.time()
            self.boxes, self.scores, self.classes, self.nums = self.yolo.predict(
                self.img_in)

            self.fps = (self.fps + (1. / (time.time() - self.t1))) / 2

            self.img, self.pname = draw_outputs(
                self.img, (self.boxes, self.scores, self.classes, self.nums),
                self.class_names)
            pname = self.pname
            print('in main funcion : ', self.pname)

            self.img = cv2.putText(self.img, "FPS: {:.2f}".format(self.fps),
                                   (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                                   (0, 0, 255), 2)

            # draw_outputs(img, outputs, class_names)
            if FLAGS.output:
                self.out.write(self.img)

            cv2.namedWindow('Product Scanner')
            cv2.imshow('Product Scanner', self.img)

            if cv2.waitKey(100) & 0xFF == ord('e'):
                self.dbdata()
                print('destroying scanner window')
                cv2.destroyWindow('Product Scanner')
                a = False
Exemplo n.º 19
0
def get_image():
    image = request.files["images"]
    # print("######### IMG", image)
    image_name = image.filename
    image.save(os.path.join(os.getcwd(), image_name))
    img_raw = tf.image.decode_image(open(image_name, 'rb').read(), channels=3)
    img = tf.expand_dims(img_raw, 0)
    img = transform_images(img, size)

    t1 = time.time()
    boxes, scores, classes, nums = yolo(img)
    t2 = time.time()
    print('time: {}'.format(t2 - t1))

    print('detections:')
    for i in range(nums[0]):
        print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                    np.array(scores[0][i]),
                                    np.array(boxes[0][i])))
    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
    img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
    cv2.imwrite(output_path + 'detection.jpg', img)
    print('output saved to: {}'.format(output_path + 'detection.jpg'))

    # prepare image for response
    _, img_encoded = cv2.imencode('.png', img)
    response = img_encoded.tostring()

    ######################################################################
    image_path = os.path.join(os.getcwd(), 'detections/detection.jpg')

    image = cv2.imread(image_path)
    print(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # perform edge detection, find contours in the edge map, and sort the
    # resulting contours from left-to-right
    edged = cv2.Canny(blurred, 30, 150)
    cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                            cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sort_contours(cnts, method="left-to-right")[0]

    # initialize the list of contour bounding boxes and associated
    # characters that we'll be OCR'ing
    chars = []

    # loop over the contours
    for c in cnts:
        # compute the bounding box of the contour
        (x, y, w, h) = cv2.boundingRect(c)

        # filter out bounding boxes, ensuring they are neither too small
        # nor too large
        if (w >= 5 and w <= 150) and (h >= 15 and h <= 120):
            # extract the character and threshold it to make the character
            # appear as white (foreground) on a black background, then
            # grab the width and height of the thresholded image
            roi = gray[y:y + h, x:x + w]
            thresh = cv2.threshold(roi, 0, 255,
                                   cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
            (tH, tW) = thresh.shape

            # if the width is greater than the height, resize along the
            # width dimension
            if tW > tH:
                thresh = imutils.resize(thresh, width=32)

            # otherwise, resize along the height
            else:
                thresh = imutils.resize(thresh, height=32)

            # re-grab the image dimensions (now that its been resized)
            # and then determine how much we need to pad the width and
            # height such that our image will be 32x32
            (tH, tW) = thresh.shape
            dX = int(max(0, 32 - tW) / 2.0)
            dY = int(max(0, 32 - tH) / 2.0)

            # pad the image and force 32x32 dimensions
            padded = cv2.copyMakeBorder(thresh,
                                        top=dY,
                                        bottom=dY,
                                        left=dX,
                                        right=dX,
                                        borderType=cv2.BORDER_CONSTANT,
                                        value=(0, 0, 0))
            padded = cv2.resize(padded, (32, 32))

            # prepare the padded image for classification via our
            # handwriting OCR model
            padded = padded.astype("float32") / 255.0
            padded = np.expand_dims(padded, axis=-1)

            # update our list of characters that will be OCR'd
            chars.append((padded, (x, y, w, h)))

    # extract the bounding box locations and padded characters
    boxes = [b[1] for b in chars]
    chars = np.array([c[0] for c in chars], dtype="float32")

    # OCR the characters using our handwriting recognition model
    preds = model.predict(chars)

    # define the list of label names
    labelNames = "0123456789"
    labelNames += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    labelNames = [l for l in labelNames]
    lst = []
    # loop over the predictions and bounding box locations together
    for (pred, (x, y, w, h)) in zip(preds, boxes):
        # find the index of the label with the largest corresponding
        # probability, then extract the probability and label
        i = np.argmax(pred)
        prob = pred[i]
        label = labelNames[i]
        lst.append(label + ":" + str(prob))
        # draw the prediction on the image
        print("[INFO] {} - {:.2f}%".format(label, prob * 100))
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    1.2, (0, 255, 0), 2)

    # # show the image
    # cv2.imshow("Image", image)
    # cv2.waitKey(0)

    # remove temporary image
    os.remove(image_name)
    cv2.imshow("Image", image)
    cv2.waitKey(0)

    try:
        return jsonify({"response": lst}), 200
    except FileNotFoundError:
        abort(404)
Exemplo n.º 20
0
def main(args):
    tf.config.experimental.list_physical_devices('GPU')
    # tf.device(f'/gpu:{args.gpu_num}')

    train_path = args.train_dataset
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes_file = args.classes
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = args.mode
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = args.transfer
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = cfg.IMAGE_SIZE

    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = cfg.LEARNING_RATE
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/trained_{}.tf'.format(num_epochs)
    saved_weights_path = args.saved_weights

    # Original Anchors below
    anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                             (59, 119), (116, 90), (156, 198), (373, 326)],
                            np.float32) / 608

    anchors = cfg.YOLO_ANCHORS

    anchor_masks = cfg.YOLO_ANCHOR_MASKS

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if args.no_train:
        print('Skipping training...')
    else:
        start_time = time.time()
        model = YoloV3(image_size, training=True, classes=num_classes)

        train_dataset = dataset.load_tfrecord_dataset(train_path,
                                                      classes_file,
                                                      image_size)
        train_dataset = train_dataset.shuffle(buffer_size=512)
        train_dataset = train_dataset.batch(batch_size)
        train_dataset = train_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))
        train_dataset = train_dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        # Configure the model for transfer learning
        if transfer == 'none':
            pass  # Nothing to do
        elif transfer in ['darknet', 'no_output']:
            # Darknet transfer is a special case that works
            # with incompatible number of classes
            # reset top layers
            model_pretrained = YoloV3(image_size,
                                      training=True,
                                      classes=weight_num_classes or num_classes)
            model_pretrained.load_weights(weights_path)

            if transfer == 'darknet':
                model.get_layer('yolo_darknet').set_weights(
                    model_pretrained.get_layer('yolo_darknet').get_weights())
                freeze_all(model.get_layer('yolo_darknet'))

            elif transfer == 'no_output':
                for layer in model.layers:
                    if not layer.name.startswith('yolo_output'):
                        layer.set_weights(model_pretrained.get_layer(
                            layer.name).get_weights())
                        freeze_all(layer)
        elif transfer == 'pre':
            model = YoloV3(image_size,
                           training=False,
                           classes=num_classes)
            model.load_weights(weights_path)

        else:
            # All other transfer require matching classes
            model.load_weights(weights_path)
            if transfer == 'fine_tune':
                # freeze darknet and fine tune other layers
                darknet = model.get_layer('yolo_darknet')
                freeze_all(darknet)
            elif transfer == 'frozen':
                # freeze everything
                freeze_all(model)
        optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
        loss = [YoloLoss(anchors[mask], classes=num_classes)
                for mask in anchor_masks]  # Passing loss as a list might sometimes fail? dict might be better?

        if mode == 'eager_tf':
            # Eager mode is great for debugging
            # Non eager graph mode is recommended for real training
            avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
            avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
            for epoch in range(1, num_epochs + 1):
                for batch, (images, labels) in enumerate(train_dataset):
                    with tf.GradientTape() as tape:
                        outputs = model(images, training=True)
                        regularization_loss = tf.reduce_sum(model.losses)
                        pred_loss = []
                        for output, label, loss_fn in zip(outputs, labels, loss):
                            pred_loss.append(loss_fn(label, output))
                        total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    grads = tape.gradient(total_loss, model.trainable_variables)
                    optimizer.apply_gradients(
                        zip(grads, model.trainable_variables))
                    print("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_loss.update_state(total_loss)
                for batch, (images, labels) in enumerate(val_dataset):
                    outputs = model(images)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    print("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_val_loss.update_state(total_loss)
                print("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))
                avg_loss.reset_states()
                avg_val_loss.reset_states()

                model.save_weights(
                    'checkpoints/yolov3_train_{}.tf'.format(epoch))
        elif mode == 'valid':
            pass  # Pass this step for validation only
        else:
            model.compile(optimizer=optimizer, loss=loss,
                          run_eagerly=(mode == 'eager_fit'))
            callbacks = [
                ReduceLROnPlateau(verbose=1, min_lr=1e-4, patience=50),
                # EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('checkpoints/midpoints/yolov3_train_{epoch}.tf',
                                verbose=1, save_weights_only=True),
                TensorBoard(log_dir=f'logs/{saved_weights_path[:-3]}')
            ]

            history = model.fit(train_dataset,
                                epochs=num_epochs,
                                callbacks=callbacks,
                                validation_data=val_dataset)
            print(f'Saving weights to: {saved_weights_path}')
            model.save_weights(saved_weights_path)
        finish_time = time.time()
        train_time = finish_time - start_time
        print('Training time elapsed: {}'.format(train_time))

    # Calculate mAP
    if args.validate:
        print('Validating...')
        model = YoloV3(image_size, training=False, classes=num_classes)
        model.load_weights(saved_weights_path).expect_partial()

        batch_size = 1

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)

        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        images = []
        for img, labs in val_dataset:
            img = np.squeeze(img)
            images.append(img)

        predictions = []

        evaluator = Evaluator(iou_thresh=args.iou)

        # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
        boxes, scores, classes, num_detections = model.predict(val_dataset)
        print(boxes.shape)
        print(boxes[0])
        # boxes -> (num_imgs, num_detections, box coords)

        filtered_labels = []
        for _, label in val_dataset:
            filt_labels = flatten_labels(label)
            filtered_labels.append(filt_labels)

        # i is the num_images index
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(np.hstack([boxes[img][sc] * image_size, scores[img][sc], classes[img][sc]]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filtered_labels, images, roc=False)  # Check gts box coords

    if args.valid_imgs:  # Predictions
        print('Valid Images...')
        # yolo = YoloV3(classes=num_classes)
        yolo = YoloV3(image_size, training=False, classes=num_classes)
        yolo.load_weights(saved_weights_path).expect_partial()
        print('weights loaded')

        print('Validation Image...')
        # Find better way to do this so not requiring manual changes
        class_dict = cfg.CLASS_DICT

        class_names = list(class_dict.values())
        print('classes loaded')

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))


        # boxes, scores, classes, num_detections
        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')

            #img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)
            img = img * 255

            boxes, scores, classes, nums = yolo(img)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            # img = img * 255

            output = 'test_images/test_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(index)

            # print('detections:')
            # for i in range(nums[index]):
            #     print('\t{}, {}, {}'.format(class_names[int(classes[index][i])],
            #                               np.array(scores[index][i]),
            #                               np.array(boxes[index][i])))
            #     if i > 10:
            #         continue

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255
            cv2.imwrite(output, img)

            index = index + 1

    if args.visual_data:
        print('Visual Data...')
        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')
            # img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)

            output = 'test_images/test_labels_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_labels_{}.jpg'.format(index)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255

            cv2.imwrite(output, img)

            index = index + 1

        return
Exemplo n.º 21
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights).expect_partial()
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    if FLAGS.tfrecord:
        dataset = load_tfrecord_dataset(
            FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
        dataset = dataset.shuffle(512)
        img_raw, _label = next(iter(dataset.take(1)))
    else:
        #OPEN THE IMAGE LIST
        #PARSE THE FIRST IMAGE FILENAME
        #OPEN THE IMAGE AS img_raw
        
        #STUB -- CHANGE LATER
        image_name = FLAGS.image
        
        
        print(image_name)
        img_raw = tf.image.decode_image(
            open(FLAGS.image, 'rb').read(), channels=3)

    
    # IMAGE PROCESSING AFTER WE TAKE THE RAW ONE.
    img = tf.expand_dims(img_raw, 0)
    img = transform_images(img, FLAGS.size)

    t1 = time.time()
    boxes, scores, classes, nums = yolo(img)


    t2 = time.time()
    print("FLAGS.size: " + format(FLAGS.size))
    print("Image RAW size: " + format(img_raw.shape))
    print("Image size: " + format(img.shape))
    logging.info('time: {}'.format(t2 - t1))

    logging.info('detections:')
    for i in range(nums[0]):
        logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                           np.array(scores[0][i]),
                                           np.array(boxes[0][i])))

    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
    img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
    cv2.imwrite(FLAGS.output, img)
    logging.info('output saved to: {}'.format(FLAGS.output))

    print("Persons detected:")
    for i in range(nums[0]):
        # Only process class 0 = person.
        if(int(classes[0][i]) == 0):
            print("\tPersonID " + format(i))
            print("\t\tSCORE: " + format(scores[0][i]))
            
            int_box = convert_box_to_img_size(img_raw.shape,np.array(boxes[0][i]))
            print("\t\tBOX: " + format(int_box))
Exemplo n.º 22
0
def main():

    class_names = [
        c.strip() for c in open('./data/labels/coco.names').readlines()
    ]
    yolo = YoloV3(classes=len(class_names))
    yolo.load_weights('./weights/yolov3.tf')

    imageHub = imagezmq.ImageHub()

    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8

    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric('cosine',
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    #vid = cv2.VideoCapture('./data/video/traffic1.mkv')
    #vid = cv2.VideoCapture("video.webm")
    #vid = VideoCaptureAsync("video.webm")
    #vid = vid.start()

    codec = cv2.VideoWriter_fourcc(*'XVID')
    #vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
    #vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height))
    out = cv2.VideoWriter('./data/video/results.avi', codec, 20, (480, 480))

    from collections import deque
    pts = [deque(maxlen=30) for _ in range(1000)]

    counter = []

    while True:

        #_, img = vid.read()

        (rpiName, img) = imageHub.recv_image()
        imageHub.send_reply(b'OK')

        if img is None:
            print('Completed')
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()

        boxes, scores, classes, nums = yolo.predict(img_in)

        classes = classes[0]
        names = []

        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])

        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        tracker.predict()
        tracker.update(detections)

        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        #current_count = int(0)

        for track in tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)

            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            pts[track.track_id].append(center)

            for j in range(1, len(pts[track.track_id])):

                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(img, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), color, thickness)

            height, width, _ = img.shape
            #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
            #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

            center_y = int(((bbox[1]) + (bbox[3])) / 2)

            if center_y <= int(3 * height / 6 + height /
                               20) and center_y >= int(3 * height / 6 -
                                                       height / 20):
                if class_name == 'car' or class_name == 'truck' or class_name == 'person':
                    counter.append(int(track.track_id))
                    #current_count += 1

        total_count = len(set(counter))
        #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2)
        cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0, 130),
                    0, 1, (0, 0, 255), 2)

        fps = 1. / (time.time() - t1)
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 0, 1, (0, 0, 255),
                    2)
        #cv2.resizeWindow('output', 1024, 768)
        cv2.imshow('output', img)
        out.write(img)

        if cv2.waitKey(1) == ord('q'):
            break

    #vid.release()
    out.release()
    cv2.destroyAllWindows()
Exemplo n.º 23
0
Arquivo: app.py Projeto: rasika-v/API
def get_detections():
    raw_images = []
    images = request.files.getlist("images")
    print(images)
    image_names = []
    for image in images:
        image_name = image.filename
        image_names.append(image_name)
        image.save(os.path.join(os.getcwd(), image_name))
        img_raw = tf.image.decode_image(open(image_name, 'rb').read(),
                                        channels=3)
        raw_images.append(img_raw)

    num = 0

    # create list for final response
    response = []

    for j in range(len(raw_images)):
        # create list of responses for current image
        responses = []
        raw_img = raw_images[j]
        num += 1
        img = tf.expand_dims(raw_img, 0)
        img = transform_images(img, size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo(img)
        t2 = time.time()
        print('time: {}'.format(t2 - t1))

        print('detections:')
        for i in range(nums[0]):
            print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
                                        np.array(scores[0][i]),
                                        np.array(boxes[0][i])))
            '''responses.append({
                "class": 'Pothole',
                "confidence": float("{0:.2f}".format(np.array(scores[0][i])*100))
            })'''

            response.append({
                "class":
                'Pothole',
                "confidence":
                float("{0:.2f}".format(np.array(scores[0][i]) * 100))
                #"image": image_names[j],
                #"detections": responses
            })
        img = cv2.cvtColor(raw_img.numpy(), cv2.COLOR_RGB2BGR)
        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        cv2.imwrite(output_path + 'detection' + str(num) + '.jpg', img)
        print('output saved to: {}'.format(output_path + 'detection' +
                                           str(num) + '.jpg'))

    #remove temporary images
    for name in image_names:
        os.remove(name)
    try:
        return jsonify({"response": response}), 200
    except FileNotFoundError:
        abort(404)
Exemplo n.º 24
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    # anchors是固定的,每一层每个anchors对应固定的3个anchors boxes,共三层
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 目的是什么???不清楚
    train_dataset = dataset.load_fake_dataset()

    # 载入训练数据,生成dataset.map,进行预处理
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            FLAGS.dataset, FLAGS.classes, FLAGS.size)

    # 训练数据打乱
    train_dataset = train_dataset.shuffle(buffer_size=512)
    # 训练数据设置batch大小
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # 训练数据匹配anchor,做map预处理
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 训练数据使用多线程并行计算预处理,自动设置为最大的可用线程数,机器算力拉满
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 同train_dataset
    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))


    # Configure the model for transfer learning 为迁移学习配置模型,所谓迁移,就是利用yolo的结构做为预训练模型
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers 载入预训练模型的预处理数据
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        # 载入预训练模型权重
        model_pretrained.load_weights(FLAGS.weights)

        # 载入backbone及其参数权重,即darknet,做为预训练模型的主干,训练过程中对除backbone及其参数权重以外的参数做训练
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 载入预训练模型的全部,除了输出部分,即nms部分,训练过程只对输出部分(nms)参数做训练
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(
                        l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        # 载入backbone,即darknet,做为预训练模型的主干,训练过程中不改变主干backbone的结构,对全网络参数做训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 载入全部模型,整体做为预处理模型,训练过程中,不做任何改变
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 优化器设置
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    # 损失函数设置
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    # Eager Mode(动态图模式),便于可以得到即时的反馈,用于训练的时候便于观察变化
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 损失函数的均值观测
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 根据训练数据损失函数的反馈值,逐步优化梯度,进而优化模型参数
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                # 日志展示损失loss的变化
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            # val同上train的部分
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            # 日志展示一整个epoch后的train和val的最终loss
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # 展示完成,复位,重置
            avg_loss.reset_states()
            avg_val_loss.reset_states()

            # 将本次训练结束后得到的模型参数保存并输出
            model.save_weights(
                'checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 如果不需要观测实时反馈变化,那么就后台训练,日志端不会看到任何信息
    else:
        # 模型配置器
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      metrics=['accuracy'])

        # 自定义模型控制器,创建一个保存模型权重的回调
        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        # # period = 2, 表示每隔1个epoch保存一次checkpoint
        # callbacks = [
        #     ReduceLROnPlateau(verbose=1),
        #     EarlyStopping(patience=3, verbose=1),
        #     ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
        #                     verbose=1, save_weights_only=True, period = 2),
        #     TensorBoard(log_dir='logs')
        # ]

        # 模型训练,使用新的回调训练模型
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset,
                            validation_freq=1)
                      (vid_width, vid_height))

from _collections import deque
pts = [deque(maxlen=30) for _ in range(1000)]

counter = []

while True:
    _, img = vid.read()
    if img is None:
        print('Completed')
        break

    img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0)
    img_in = transform_images(img_in, 416)

    t1 = time.time()

    boxes, scores, classes, nums = yolo.predict(img_in)

    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    names = np.array(names)
    converted_boxes = convert_boxes(img, boxes[0])
    features = encoder(img, converted_boxes)

    detections = [
        Detection(bbox, score, class_name,
Exemplo n.º 26
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes,
                                                  FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
            model.save_weights(
                'checkpoints/yolov3_train_{}.ckpt'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            tf.keras.callbacks.ReduceLROnPlateau(verbose=1,
                                                 factor=0.2,
                                                 patience=3,
                                                 cooldown=0),
            tf.keras.callbacks.EarlyStopping(patience=3, verbose=1),
            tf.keras.callbacks.ModelCheckpoint(
                filepath='./checkpoints/yolov3-tiny_train.ckpt',
                verbose=1,
                save_weights_only=True,
                save_best_only=True),
            tf.keras.callbacks.ModelCheckpoint(
                filepath='./checkpoints/yolov3-tiny_train.tf',
                verbose=0,
                save_weights_only=True,
                save_best_only=True),
            tf.keras.callbacks.TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)

    saved_model_dir = './model/yolov3-tiny_train'
    model.save(saved_model_dir)
    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
    tflite_model = converter.convert()
    open('model_tflite.tflite', 'wb').write(tflite_model)
Exemplo n.º 27
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    # Setup
    if FLAGS.multi_gpu:
        for physical_device in physical_devices:
            tf.config.experimental.set_memory_growth(physical_device, True)

        strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync
        FLAGS.batch_size = BATCH_SIZE

        with strategy.scope():
            model, optimizer, loss, anchors, anchor_masks = setup_model()
    else:
        model, optimizer, loss, anchors, anchor_masks = setup_model()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        start_time = time.time()
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
        end_time = time.time() - start_time
        print(f'Total Training Time: {end_time}')

        import mlflow
        mlflow.set_experiment("signal_detect")
        epoch = len(history.history['loss'])
        mlflow.log_param("dataset", FLAGS.dataset)
        mlflow.log_param("val_dataset", FLAGS.val_dataset)
        mlflow.log_param("epoch", FLAGS.epochs)
        mlflow.log_param("batch_size", FLAGS.batch_size)
        mlflow.log_param("learning_rate", FLAGS.learning_rate)
        mlflow.log_metric("loss", float(history.history['loss'][epoch - 1]))
        mlflow.log_metric(
            "yolo_output_0_loss",
            float(history.history['yolo_output_0_loss'][epoch - 1]))
        mlflow.log_metric(
            "yolo_output_1_loss",
            float(history.history['yolo_output_1_loss'][epoch - 1]))
        mlflow.log_metric("val_loss",
                          float(history.history['val_loss'][epoch - 1]))
        mlflow.log_metric(
            "val_yolo_output_0_loss",
            float(history.history['val_yolo_output_0_loss'][epoch - 1]))
        mlflow.log_metric(
            "val_yolo_output_1_loss",
            float(history.history['val_yolo_output_1_loss'][epoch - 1]))
        mlflow.log_artifact("checkpoints/yolov3_train_" + str(epoch) +
                            ".tf.data-00000-of-00001")
        mlflow.log_artifact("checkpoints/yolov3_train_" + str(epoch) +
                            ".tf.index")
Exemplo n.º 28
0
def main(_argv):
    img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
    img = tf.expand_dims(img, 0)
    transform_images(img, FLAGS.size)
def main(_argv):
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size,
                                        training=True,
                                        classes=FLAGS.num_classes)
            else:
                init_model = YoloV3(FLAGS.size,
                                    training=True,
                                    classes=FLAGS.num_classes)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemplo n.º 30
0
def main(_argv):

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    tsteps = sum(1 for _ in train_dataset)
    train_dataset = train_dataset.shuffle(buffer_size=256,
                                          reshuffle_each_iteration=True)
    train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    train_dataset = train_dataset.repeat(FLAGS.epochs)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    vsteps = sum(1 for _ in val_dataset)
    val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    val_dataset = val_dataset.repeat(FLAGS.epochs)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True,
                            period=10),
            TensorBoard(log_dir='logs')
        ]

        model.fit(train_dataset,
                  epochs=FLAGS.epochs,
                  steps_per_epoch=int(tsteps / FLAGS.batch_size),
                  callbacks=callbacks,
                  validation_data=val_dataset,
                  validation_steps=int(vsteps / FLAGS.batch_size))