Пример #1
0
def _main_(args):
    config_path  = args.conf
    weights_path = args.weights
    image_path   = args.input

    with open(config_path) as config_buffer:    
        config = json.load(config_buffer)

    ###############################
    #   Make the model 
    ###############################

    yolo = YOLO(backend             = config['model']['backend'],
                input_size          = config['model']['input_size'], 
                labels              = config['model']['labels'], 
                max_box_per_image   = config['model']['max_box_per_image'],
                anchors             = config['model']['anchors'])

    ###############################
    #   Load trained weights
    ###############################    

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes 
    ###############################

    if image_path[-4:] == '.mp4':
        video_out = image_path[:-4] + '_detected' + image_path[-4:]
        video_reader = cv2.VideoCapture(image_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(video_out,
                               cv2.VideoWriter_fourcc(*'MPEG'), 
                               50.0, 
                               (frame_w, frame_h))

        for i in tqdm(range(nb_frames)):
            _, image = video_reader.read()
            
            boxes = yolo.predict(image)
            image = draw_boxes(image, boxes, config['model']['labels'])

            video_writer.write(np.uint8(image))

        video_reader.release()
        video_writer.release()  
    else:
        image = cv2.imread(image_path)
        boxes = yolo.predict(image)
        image = draw_boxes(image, boxes, config['model']['labels'])

        print(len(boxes), 'boxes are found')

        cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
Пример #2
0
def _main_(args):

    config_path = args.conf

    with open(config_path) as config_buffer:    
        config = json.loads(config_buffer.read())

    ###############################
    #   Parse the annotations 
    ###############################

    # parse annotations of the training set
    train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'], 
                                                config['train']['train_image_folder'], 
                                                config['model']['labels'])

    # parse annotations of the validation set, if any, otherwise split the training set
    if os.path.exists(config['valid']['valid_annot_folder']):
        valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'], 
                                                    config['valid']['valid_image_folder'], 
                                                    config['model']['labels'])
    else:
        train_valid_split = int(0.8*len(train_imgs))
        np.random.shuffle(train_imgs)

        valid_imgs = train_imgs[train_valid_split:]
        train_imgs = train_imgs[:train_valid_split]

    if len(config['model']['labels']) > 0:
        overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys()))

        print 'Seen labels:\t', train_labels
        print 'Given labels:\t', config['model']['labels']
        print 'Overlap labels:\t', overlap_labels           

        if len(overlap_labels) < len(config['model']['labels']):
            print 'Some labels have no annotations! Please revise the list of labels in the config.json file!'
            return
    else:
        print 'No labels are provided. Train on all seen labels.'
        config['model']['labels'] = train_labels.keys()
        
    ###############################
    #   Construct the model 
    ###############################

    yolo = YOLO(architecture        = config['model']['architecture'],
                input_size          = config['model']['input_size'], 
                labels              = config['model']['labels'], 
                max_box_per_image   = config['model']['max_box_per_image'],
                anchors             = config['model']['anchors'])

    ###############################
    #   Load the pretrained weights (if any) 
    ###############################    

    if os.path.exists(config['train']['pretrained_weights']):
        print "Loading pre-trained weights in", config['train']['pretrained_weights']
        yolo.load_weights(config['train']['pretrained_weights'])

    ###############################
    #   Start the training process 
    ###############################

    yolo.train(train_imgs         = train_imgs,
               valid_imgs         = valid_imgs,
               train_times        = config['train']['train_times'],
               valid_times        = config['valid']['valid_times'],
               nb_epoch           = config['train']['nb_epoch'], 
               learning_rate      = config['train']['learning_rate'], 
               batch_size         = config['train']['batch_size'],
               warmup_epochs      = config['train']['warmup_epochs'],
               object_scale       = config['train']['object_scale'],
               no_object_scale    = config['train']['no_object_scale'],
               coord_scale        = config['train']['coord_scale'],
               class_scale        = config['train']['class_scale'],
               saved_weights_name = config['train']['saved_weights_name'],
               debug              = config['train']['debug'])
Пример #3
0
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

weights_path = "trained_wts.h5"
image_path = "1.jpg"

yolo = YOLO(backend="Full Yolo",
            input_size=416,
            labels=["Potholes"],
            max_box_per_image=15,
            anchors=[
                0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282,
                3.52778, 9.77052, 9.16828
            ])
yolo.load_weights(weights_path)


@app.route('/', methods=['GET', 'POST'])
def predict():
    import keras.backend.tensorflow_backend as tb
    tb._SYMBOLIC_SCOPE.value = True

    url = request.form.get('url')
    urllib.request.urlretrieve(url, '1.jpg')
    image = cv2.imread("1.jpg")
    boxes = yolo.predict(image)
    image = draw_boxes(image, boxes, "Pothole")

    # print(len(boxes), 'boxes are found')
Пример #4
0
def _main_(args):

	config_path = args.conf

	with open(config_path) as config_buffer:    
		config = json.loads(config_buffer.read())

	###############################
	#   Parse the annotations 
	###############################

	# parse annotations of the training set
	train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'], 
												config['train']['train_image_folder'], 
												config['model']['labels'])

	# parse annotations of the validation set, if any, otherwise split the training set
	if os.path.exists(config['valid']['valid_annot_folder']):
		valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'], 
													config['valid']['valid_image_folder'], 
													config['model']['labels'])
	else:
		train_valid_split = int(0.8*len(train_imgs))
		np.random.shuffle(train_imgs)

		valid_imgs = train_imgs[train_valid_split:]
		train_imgs = train_imgs[:train_valid_split]

	if len(config['model']['labels']) > 0:
		overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys()))

		print 'Seen labels:\t', train_labels
		print 'Given labels:\t', config['model']['labels']
		print 'Overlap labels:\t', overlap_labels           

		if len(overlap_labels) < len(config['model']['labels']):
			print 'Some labels have no annotations! Please revise the list of labels in the config.json file!'
			return
	else:
		print 'No labels are provided. Train on all seen labels.'
		config['model']['labels'] = train_labels.keys()
		
	###############################
	#   Construct the model 
	###############################

	yolo = YOLO(architecture        = config['model']['architecture'],
				input_size          = config['model']['input_size'], 
				labels              = config['model']['labels'], 
				max_box_per_image   = config['model']['max_box_per_image'],
				anchors             = config['model']['anchors'])

	###############################
	#   Load the pretrained weights (if any) 
	###############################    

	if os.path.exists(config['train']['pretrained_weights']):
		print "Loading pre-trained weights in", config['train']['pretrained_weights']
		yolo.load_weights(config['train']['pretrained_weights'])

	for layer in galaxyModel.layers:
		print(layer)
        layer.trainable = True	

	###############################
	#   Start the training process 
	###############################

	if args.training:
		yolo.train(train_imgs         = train_imgs,
				   valid_imgs         = valid_imgs,
				   train_times        = config['train']['train_times'],
				   valid_times        = config['valid']['valid_times'],
				   nb_epoch           = config['train']['nb_epoch'], 
				   learning_rate      = config['train']['learning_rate'], 
				   batch_size         = config['train']['batch_size'],
				   warmup_epochs      = config['train']['warmup_epochs'],
				   object_scale       = config['train']['object_scale'],
				   no_object_scale    = config['train']['no_object_scale'],
				   coord_scale        = config['train']['coord_scale'],
				   class_scale        = config['train']['class_scale'],
				   saved_weights_name = config['train']['saved_weights_name'],
				   debug              = config['train']['debug'])
	
	image = cv2.imread(config['valid']['valid_image_folder'] + '/10.png')

	plt.figure(figsize=(10,10))

	boxes = yolo.predict(image)

	image = draw_boxes(image, boxes, labels=config['model']['labels'])

	plt.imshow(image[:,:,::-1]); plt.show()
Пример #5
0
def _main_(args):
    config_path = args.conf

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    ###############################
    #   Parse the annotations
    ###############################

    # parse annotations of the training set
    train_imgs, train_labels = parse_annotation(
        config['train']['train_annot_folder'],
        config['train']['train_image_folder'], config['model']['labels'])

    # parse annotations of the validation set, if any, otherwise split the training set
    if os.path.exists(config['valid']['valid_annot_folder']):
        valid_imgs, valid_labels = parse_annotation(
            config['valid']['valid_annot_folder'],
            config['valid']['valid_image_folder'], config['model']['labels'])
    else:
        train_valid_split = int(0.8 * len(train_imgs))
        np.random.shuffle(train_imgs)

        valid_imgs = train_imgs[train_valid_split:]
        train_imgs = train_imgs[:train_valid_split]

    if len(config['model']['labels']) > 0:
        overlap_labels = set(config['model']['labels']).intersection(
            set(train_labels.keys()))

        print('Seen labels:\t', train_labels)
        print('Given labels:\t', config['model']['labels'])
        print('Overlap labels:\t', overlap_labels)

        if len(overlap_labels) < len(config['model']['labels']):
            print(
                'Some labels have no annotations! Please revise the list of labels in the config.json file!'
            )
            return
    else:
        print('No labels are provided. Train on all seen labels.')
        config['model']['labels'] = train_labels.keys()

    ###############################
    #   Construct the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    ###############################
    #   Load the pretrained weights (if any)
    ###############################

    if os.path.exists(config['train']['pretrained_weights']):
        print("Loading pre-trained weights in",
              config['train']['pretrained_weights'])
        yolo.load_weights(config['train']['pretrained_weights'])

    ###############################
    #   Start the training process
    ###############################

    yolo.train(train_imgs=train_imgs,
               valid_imgs=valid_imgs,
               train_times=config['train']['train_times'],
               valid_times=config['valid']['valid_times'],
               nb_epochs=config['train']['nb_epochs'],
               learning_rate=config['train']['learning_rate'],
               batch_size=config['train']['batch_size'],
               warmup_epochs=config['train']['warmup_epochs'],
               object_scale=config['train']['object_scale'],
               no_object_scale=config['train']['no_object_scale'],
               coord_scale=config['train']['coord_scale'],
               class_scale=config['train']['class_scale'],
               saved_weights_name=config['train']['saved_weights_name'],
               debug=config['train']['debug'])
Пример #6
0
def main(args):
    config_path = args.conf

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    # parse annotations of the training set
    train_imgs, train_labels = parse_annotation(
        config['train']['train_annot_folder'],
        config['train']['train_image_folder'], config['model']['labels'])

    # parse annotations of the validation set, if any, otherwise split the training set
    if os.path.exists(config['valid']['valid_annot_folder']):
        valid_imgs, valid_labels = parse_annotation(
            config['valid']['valid_annot_folder'],
            config['valid']['valid_image_folder'], config['model']['labels'])
    else:
        train_valid_split = int(0.8 * len(train_imgs))
        np.random.shuffle(train_imgs)

        valid_imgs = train_imgs[train_valid_split:]
        train_imgs = train_imgs[:train_valid_split]

    # detected labels
    overlap_labels = set(config['model']['labels']).intersection(
        set(train_labels.keys()))

    print('Seen labels:\t', train_labels)
    print('Given labels:\t', config['model']['labels'])
    print('Overlap labels:\t', overlap_labels)

    if len(overlap_labels) < len(config['model']['labels']):
        print(
            'Some labels have no images! Please revise the list of labels in the config.json file!'
        )
        return

    # construct models
    yolo = YOLO(architecture=config['model']['architecture'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    # load pretrained models
    if os.path.exists(config['train']['pretrained_weights']):
        print("Loading pre-trained weights in",
              config['train']['pretrained_weights'])
        yolo.load_weights(config['train']['pretrained_weights'])

    # start trianing
    yolo.train(train_imgs=train_imgs,
               valid_imgs=valid_imgs,
               train_times=config['train']['train_times'],
               valid_times=config['valid']['valid_times'],
               nb_epoch=config['train']['nb_epoch'],
               learning_rate=config['train']['learning_rate'],
               batch_size=config['train']['batch_size'],
               warmup_bs=config['train']['warmup_batches'],
               object_scale=config['train']['object_scale'],
               no_object_scale=config['train']['no_object_scale'],
               coord_scale=config['train']['coord_scale'],
               class_scale=config['train']['class_scale'],
               saved_weights_name=config['model']['architecture'] + "_" +
               config['train']['saved_weights_name'],
               debug=config['train']['debug'])
Пример #7
0
def _main_(args):
    config_path = args.conf
    weights_path = args.weights
    image_path = args.input

    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    if weights_path == '':
        weights_path = config['train']['saved_weights_name']

    ###############################
    #   Make the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=(config['model']['input_size_h'],
                            config['model']['input_size_w']),
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'],
                gray_mode=config['model']['gray_mode'])

    ###############################
    #   Load trained weights
    ###############################

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes
    ###############################

    if image_path[-4:] == '.mp4':
        video_out = image_path[:-4] + '_detected' + image_path[-4:]
        video_reader = cv2.VideoCapture(image_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(video_out,
                                       cv2.VideoWriter_fourcc(*'MPEG'), 50.0,
                                       (frame_w, frame_h))

        for i in tqdm(range(nb_frames)):
            _, image = video_reader.read()

            boxes = yolo.predict(image)
            image = draw_boxes(image, boxes, config['model']['labels'])

            video_writer.write(np.uint8(image))

        video_reader.release()
        video_writer.release()
    else:
        if os.path.isfile(image_path):
            image = cv2.imread(image_path)
            boxes = yolo.predict(image)
            image = draw_boxes(image, boxes, config['model']['labels'])

            print(len(boxes), 'boxes are found')

            cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
        else:
            detected_images_path = os.path.join(image_path, "detected")
            if not os.path.exists(detected_images_path):
                os.mkdir(detected_images_path)
            images = list(list_images(image_path))
            for fname in tqdm(images):
                image = cv2.imread(fname)
                boxes = yolo.predict(image)
                image = draw_boxes(image, boxes, config['model']['labels'])
                fname = os.path.basename(fname)
                cv2.imwrite(os.path.join(image_path, "detected", fname), image)
Пример #8
0
class TrackerModel(object):
    def __init__(self,
                 config,
                 is_inference=False,
                 is_inference_ensemble=False):
        self.image_h = config['model']['input_size']
        self.image_w = config['model']['input_size']
        self.batch_size = config['train']['batch_size']
        self.num_samples_in_h5 = config['train']['num_samples_in_h5']
        self.stride = config['model']['stride']
        self.h5_sequence_length = config['model']['h5_sequence_length']
        self.last_sequence_length = config['model']['last_sequence_length']
        self.sequence_length = self.last_sequence_length / self.stride
        self.true_box_buffer = config['model']['max_box_per_image']

        self.yolo_weights_path = config['model']['detector_weights_path']

        self.nb_box = len(config['model']['anchors']) // 2
        self.anchors = config['model']['anchors']
        self.epochs = config['train']['nb_epochs']
        self.warmup_epochs = config['train']['warmup_epochs']
        self.object_scale = config['train']['object_scale']
        self.no_object_scale = config['train']['no_object_scale']
        self.coord_scale = config['train']['coord_scale']
        self.class_scale = config['train']['class_scale']
        self.labels = config['model']['labels']
        self.nb_class = len(config['model']['labels'])
        self.class_wt = np.ones(self.nb_class, dtype='float32')

        #added by HS HSS
        self.full_log_dir = config['train']['full_log_dir']
        self.early_stop_patience = config['train']['early_stop_patience']
        self.early_stop_min_delta = config['train']['early_stop_min_delta']
        self.learning_rate_decay_factor = config['train'][
            'learning_rate_decay_factor']
        self.learning_rate_decay_patience = config['train'][
            'learning_rate_decay_patience']
        self.learning_rate_decay_min_lr = config['train'][
            'learning_rate_decay_min_lr']
        self.lstm_h5_data_path = config['train']['lstm_h5_data_path']
        self.saved_weights_name = config['train']['saved_weights_name']
        ####################

        ############################################
        # Compile the model
        ############################################
        self.detector = YOLO(
            backend=config['model']['backend'],
            input_size=config['model']['input_size'],
            labels=config['model']['labels'],
            max_box_per_image=config['model']['max_box_per_image'],
            anchors=config['model']['anchors'])
        self.detector.load_weights(self.yolo_weights_path)
        self.grid_h, self.grid_w = self.detector.grid_h, self.detector.grid_w

        print('\nhs loaded backbone:')
        pp.pprint(vars(self.detector))

        # self.full_model = self.create_model()
        self.full_model = self.create_model_cnn_rnn_extracted()
        if is_inference:
            if is_inference_ensemble:
                self.full_model_inference = self.create_model_cnn_rnn_extracted_inference_ensemble(
                )
            else:
                self.full_model_inference = self.create_model_cnn_rnn_extracted_inference(
                )

        self.debug = config['train']['debug']
        self.initial_epoch = 0

    def load_weights(self, weights_path):
        self.full_model.load_weights(weights_path)
        print("Successfully loaded weights from %s!" % weights_path)

    def freeze_layers(self, model):
        for layer in model.layers:
            layer.trainable = False
        return model

    def create_model_cnn_rnn_extracted(self):
        input_images = Input(batch_shape=(self.batch_size,
                                          self.sequence_length, self.image_h,
                                          self.image_w, 3),
                             name='images_input')
        self.true_boxes = Input(batch_shape=(self.batch_size, 1, 1, 1, 1,
                                             self.true_box_buffer, 4),
                                name='bbox_input')

        #load the detector
        feature_detector_model = self.detector.get_feature_model(
            is_before_activation=False)

        feature_detector_model = self.freeze_layers(feature_detector_model)

        print("\nSummary of feature detector:")
        feature_detector_model.summary()

        #run the yolo bb on each image, and stack up the results to be fed to RNN
        yolo_feats_seq = TimeDistributed(feature_detector_model,
                                name='each_frame_feats')\
                                        (input_images)

        recurrent_state = ConvLSTM2D(256, (1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     return_sequences=True,
                                     name='conv_lstm_1')(yolo_feats_seq)
        recurrent_state = ConvLSTM2D(256, (1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     return_sequences=False,
                                     name='conv_lstm_2')(recurrent_state)
        output_conv = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                             strides=(1, 1),
                             padding='same',
                             kernel_initializer='lecun_normal',
                             name='track_conv')(recurrent_state)
        output_reshaped = Reshape((self.grid_h, self.grid_w, self.nb_box,
                                   4 + 1 + self.nb_class))(output_conv)

        output_trk = Lambda(lambda args: args[0], name='tracking')(
            [output_reshaped, self.true_boxes])
        model = Model([input_images, self.true_boxes],
                      output_trk,
                      name='cnn_rnn_model')

        # We initialize the last layer (prediction by lstm) here
        layer = model.layers[-4]  # track_conv layer
        weights = layer.get_weights()
        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)
        layer.set_weights([new_kernel, new_bias])

        #model.load_weights(self.rnn_weights_path, by_name=True)

        print("\nFull MODEL:")
        model.summary()
        return model  #, feature_detector_model

    def set_inference_weights(self):
        for layer in self.full_model.layers:
            for layer_inference in self.full_model_inference.layers:
                if (layer_inference.name == layer.name):
                    layer_inference.set_weights(layer.get_weights())
                    break
            return

    def create_model_cnn_rnn_extracted_inference(self):
        K.set_learning_phase(0)
        input_images = Input(batch_shape=(self.batch_size, 1, self.image_h,
                                          self.image_w, 3),
                             name='images_input')
        feature_detector_model = self.detector.get_feature_model(
            is_before_activation=False)

        print("\nSummary of feature detector:")
        feature_detector_model.summary()
        yolo_feats_seq = TimeDistributed(feature_detector_model,
                                name='each_frame_feats')\
                                        (input_images)

        recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same',
                return_sequences=True, stateful=True, name='conv_lstm_1')\
                    (yolo_feats_seq)
        recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same',
                return_sequences=False, stateful=True, name='conv_lstm_2')\
                    (recurrent_state)
        output_conv = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                             strides=(1, 1),
                             padding='same',
                             kernel_initializer='lecun_normal',
                             name='track_conv')(recurrent_state)
        output_reshaped = Reshape((self.grid_h, self.grid_w, self.nb_box,
                                   4 + 1 + self.nb_class))(output_conv)
        model = Model(input_images, output_reshaped, name='cnn+rnn model')

        print("\nFull MODEL:")
        model.summary()
        return model

    def load_data_generators(self, generator_config):
        pickle_train = 'data/MultiObjDetTracker_TrainAnn.pickle'
        pickle_val = 'data/MultiObjDetTracker_ValAnn.pickle'

        if os.path.isfile(pickle_train):
            with open(pickle_train, 'rb') as fp:
                train_imgs = pickle.load(fp)
        else:
            train_imgs, seen_train_labels = parse_annotation(
                self.train_annot_folder,
                self.train_image_folder,
                labels=self.LABELS)
            with open(pickle_train, 'wb') as fp:
                pickle.dump(train_imgs, fp)

        if os.path.isfile(pickle_val):
            with open(pickle_val, 'rb') as fp:
                valid_imgs = pickle.load(fp)
        else:
            valid_imgs, seen_valid_labels = parse_annotation(
                self.valid_annot_folder,
                self.valid_image_folder,
                labels=self.LABELS)
            with open(pickle_val, 'wb') as fp:
                pickle.dump(valid_imgs, fp)

        train_batch = BatchSequenceGenerator(train_imgs,
                                             generator_config,
                                             norm=normalize,
                                             shuffle=True,
                                             augment=False)
        valid_batch = BatchSequenceGenerator(valid_imgs,
                                             generator_config,
                                             norm=normalize,
                                             augment=False)

        return train_batch, valid_batch

    def load_data_generators_seq(self, batch_size):
        #path to folder of H5s is in  self.lstm_h5_data_path
        train_batch = SequenceH5Generator(self.lstm_h5_data_path,
                                          batch_size,
                                          self.num_samples_in_h5,
                                          self.last_sequence_length,
                                          self.labels,
                                          stride=self.stride,
                                          is_yolo_feats=False,
                                          is_augment=True)
        valid_batch = SequenceH5Generator(self.lstm_h5_data_path,
                                          batch_size,
                                          self.num_samples_in_h5,
                                          self.last_sequence_length,
                                          self.labels,
                                          stride=self.stride,
                                          is_validation=True,
                                          is_yolo_feats=False)

        return train_batch, valid_batch

    def custom_loss(self, y_true, y_pred):
        new_shape = self.batch_size
        y_pred = tf.reshape(y_pred, (new_shape, self.grid_h, self.grid_w,
                                     self.nb_box, 4 + 1 + self.nb_class))
        y_true = tf.reshape(y_true, (new_shape, self.grid_h, self.grid_w,
                                     self.nb_box, 4 + 1 + self.nb_class))
        self.true_boxes = tf.reshape(
            self.true_boxes, (new_shape, 1, 1, 1, self.true_box_buffer, 4))

        mask_shape = tf.shape(y_true)[:4]

        cell_x = tf.to_float(
            tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]),
                       (1, self.grid_h, self.grid_w, 1, 1)))
        cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))

        cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1),
                            [self.batch_size, 1, 1, self.nb_box, 1])

        coord_mask = tf.zeros(mask_shape)
        conf_mask = tf.zeros(mask_shape)
        class_mask = tf.zeros(mask_shape)

        seen = tf.Variable(0.)
        total_recall = tf.Variable(0.)
        """
        Adjust prediction
        """
        ### adjust x and y
        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
        ### adjust w and h
        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(
            self.anchors, [1, 1, 1, self.nb_box, 2])
        ### adjust confidence
        pred_box_conf = tf.sigmoid(y_pred[..., 4])
        ### adjust class probabilities
        pred_box_class = y_pred[..., 5:]
        """
        Adjust ground truth
        """
        ### adjust x and y
        true_box_xy = y_true[...,
                             0:2]  # relative position to the containing cell

        ### adjust w and h
        true_box_wh = y_true[
            ..., 2:4]  # number of cells accross, horizontally and vertically

        ### adjust confidence
        true_wh_half = true_box_wh / 2.
        true_mins = true_box_xy - true_wh_half
        true_maxes = true_box_xy + true_wh_half

        pred_wh_half = pred_box_wh / 2.
        pred_mins = pred_box_xy - pred_wh_half
        pred_maxes = pred_box_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        true_box_conf = iou_scores * y_true[..., 4]

        ### adjust class probabilities
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        """ Determine the masks """
        ### coordinate mask: simply the position of the ground truth boxes (the predictors)
        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale

        ### confidence mask: penelize predictors + penalize boxes with low IOU
        # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
        true_xy = self.true_boxes[..., 0:2]
        true_wh = self.true_boxes[..., 2:4]

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy, 4)
        pred_wh = tf.expand_dims(pred_box_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_mask = conf_mask + tf.to_float(
            best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale

        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
        conf_mask = conf_mask + y_true[..., 4] * self.object_scale

        ### class mask: simply the position of the ground truth boxes (the predictors)
        class_mask = y_true[..., 4] * tf.gather(
            self.class_wt, true_box_class) * self.class_scale
        """ Warm-up training """
        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.)
        seen = tf.assign_add(seen, 1.)

        true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1),
                              lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
                                       true_box_wh + tf.ones_like(true_box_wh) * \
                                       np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \
                                       no_boxes_mask,
                                       tf.ones_like(coord_mask)],
                              lambda: [true_box_xy,
                                       true_box_wh,
                                       coord_mask])
        """ Finalize the loss """
        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
        nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))

        loss_xy = tf.reduce_sum(
            tf.square(true_box_xy - pred_box_xy) *
            coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_wh = tf.reduce_sum(
            tf.square(true_box_wh - pred_box_wh) *
            coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_conf = tf.reduce_sum(
            tf.square(true_box_conf - pred_box_conf) *
            conf_mask) / (nb_conf_box + 1e-6) / 2.
        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=true_box_class, logits=pred_box_class)
        loss_class = tf.reduce_sum(
            loss_class * class_mask) / (nb_class_box + 1e-6)

        loss = tf.cond(tf.less(seen, self.warmup_batches + 1),
                       lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
                       lambda: loss_xy + loss_wh + loss_conf + loss_class)

        if self.debug:
            nb_true_box = tf.reduce_sum(y_true[..., 4])
            nb_pred_box = tf.reduce_sum(
                tf.to_float(true_box_conf > 0.5) *
                tf.to_float(pred_box_conf > 0.3))

            current_recall = nb_pred_box / (nb_true_box + 1e-6)
            total_recall = tf.assign_add(total_recall, current_recall)

            loss = tf.Print(loss, [loss_xy],
                            message='Loss XY \t',
                            summarize=1000)
            loss = tf.Print(loss, [loss_wh],
                            message='Loss WH \t',
                            summarize=1000)
            loss = tf.Print(loss, [loss_conf],
                            message='Loss Conf \t',
                            summarize=1000)
            loss = tf.Print(loss, [loss_class],
                            message='Loss Class \t',
                            summarize=1000)
            loss = tf.Print(loss, [loss],
                            message='Total Loss \t',
                            summarize=1000)
            loss = tf.Print(loss, [current_recall],
                            message='Current Recall \t',
                            summarize=1000)
            # loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)

        return loss

    def train(self):
        train_batch, valid_batch = self.load_data_generators_seq(
            self.batch_size)
        #print('Length of generators:')
        #print(int(len(train_batch)))
        #print(int(len(valid_batch)))
        print("Length of generators: %d, %d" %
              (len(train_batch), len(valid_batch)))
        [x, b], y = train_batch[0]
        print("Input shapes train: ", x.shape, y.shape, b.shape)
        [x, b], y = valid_batch[0]
        print("Input shapes val: ", x.shape, y.shape, b.shape)
        self.warmup_batches = self.warmup_epochs *\
            (len(train_batch) + len(valid_batch))  / 4
        print("Using %d warmup batches" % self.warmup_batches)

        ############################################
        # Define your callbacks
        ############################################

        #defined by hs for best val
        checkpoint_multi_hs = MultiGPUCheckpoint(
            '{name}_{{epoch:02d}}_hsBbAndLstm_valLoss-{{val_loss:.2f}}.h5'.
            format(name=self.saved_weights_name),
            verbose=1,
            save_best_only=True,
        )

        #defined by HS
        # HS HSS originally i used monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6
        reduce_lr_hs = ReduceLROnPlateau(
            monitor='val_loss',
            factor=self.learning_rate_decay_factor,
            patience=self.learning_rate_decay_patience,
            min_lr=self.learning_rate_decay_min_lr)

        #HS HSS With a patience of 100 you finish in 200 epochs so I changed it to 400
        early_stop = EarlyStopping(monitor='val_loss',
                                   min_delta=self.early_stop_min_delta,
                                   patience=self.early_stop_patience,
                                   verbose=1)

        evaluate_callback_val = EvaluateCallback(valid_batch, self.evaluate)

        decay_lr = DecayLR(32, 40, 0.2)

        optimizer = Adam(lr=1e-4,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        # optimizer = SGD(lr=1e-4, momentum=0.9, decay=0.0, nesterov=True)

        print("Compiling a model...")
        with tf.device("/cpu:0"):
            self.full_model.compile(loss=self.custom_loss, optimizer=optimizer)
        print("Successfuly compiled the full model!")

        ############################################
        # Start the training process
        ############################################
        steps_per_epoch = len(train_batch)
        print("Using %d/%d for train/val steps_per_epoch of %d batch_size!" %\
                (steps_per_epoch,
                len(valid_batch),
                self.batch_size))

        parallel_model = multi_gpu_model(self.full_model, gpus=2)
        parallel_model.compile(loss=self.custom_loss, optimizer=optimizer)

        parallel_model.fit_generator(
            generator=train_batch,
            steps_per_epoch=steps_per_epoch,
            epochs=self.epochs,
            verbose=2 if self.debug else 1,
            validation_data=valid_batch,
            validation_steps=len(valid_batch),
            callbacks=[
                early_stop, checkpoint_multi_hs,
                TrainValTensorBoard_HS(self.full_log_dir,
                                       write_graph=False,
                                       write_images=True),
                ValOnlyProgbarLogger(verbose=1, count_mode='steps'),
                reduce_lr_hs
            ],  # reduce_lr
            workers=4,
            max_queue_size=10,
            use_multiprocessing=True,
            shuffle=False,
            initial_epoch=self.initial_epoch)

        self.full_model.save(saved_weights_name + "_fullModel_final.h5")

    def evaluate(self,
                 generator,
                 iou_threshold=0.3,
                 score_threshold=0.3,
                 max_detections=100,
                 save_path=None):
        """ Evaluate a given dataset using a given model.
            code originally from https://github.com/fizyr/keras-retinanet

            # Arguments
                generator       : The generator that represents the dataset to evaluate.
                model           : The model to evaluate.
                iou_threshold   : The threshold used to consider when a detection is positive or negative.
                score_threshold : The score confidence threshold to use for detections.
                max_detections  : The maximum number of detections to use per image.
                save_path       : The path to save images with visualized detections to.
            # Returns
                A dict mapping class names to mAP scores.
            """
        print("\nUsing %.2f IOU and %.2f Score thresholds!" %\
                (iou_threshold, score_threshold))
        # gather all detections and annotations
        all_detections = [[None for i in range(generator.num_classes())]
                          for j in range(generator.size())]
        all_annotations = [[None for i in range(generator.num_classes())]
                           for j in range(generator.size())]

        for i in range(generator.size()):
            if i % 100 == 0: print("%d/%d" % (i, generator.size()))
            raw_image = generator.load_image(i)
            raw_height, raw_width, raw_channels = raw_image.shape

            pred_boxes, filtered_boxes = self.predict(
                raw_image,
                obj_threshold=score_threshold,
                is_filter_bboxes=False)

            score = np.array([box.score for box in pred_boxes])
            pred_labels = np.array([box.label for box in pred_boxes])

            if len(pred_boxes) > 0:
                pred_boxes = np.array([[
                    box.xmin * raw_width, box.ymin * raw_height,
                    box.xmax * raw_width, box.ymax * raw_height, box.score
                ] for box in pred_boxes])
            else:
                pred_boxes = np.array([[]])

            # sort the boxes and the labels according to scores
            score_sort = np.argsort(-score)
            pred_labels = pred_labels[score_sort]
            pred_boxes = pred_boxes[score_sort]

            # copy detections to all_detections
            for label in range(generator.num_classes()):
                all_detections[i][label] = pred_boxes[pred_labels == label, :]

            annotations = generator.load_annotation(i)

            # copy detections to all_annotations
            for label in range(generator.num_classes()):
                all_annotations[i][label] = annotations[annotations[:, 4] ==
                                                        label, :4].copy()

        # compute mAP by comparing all detections and all annotations
        average_precisions = {}

        for label in range(generator.num_classes()):
            false_positives = np.zeros((0, ))
            true_positives = np.zeros((0, ))
            scores = np.zeros((0, ))
            num_annotations = 0.0

            for i in range(generator.size()):
                detections = all_detections[i][label]
                annotations = all_annotations[i][label]
                num_annotations += annotations.shape[0]
                detected_annotations = []

                for d in detections:
                    scores = np.append(scores, d[4])

                    if annotations.shape[0] == 0:
                        false_positives = np.append(false_positives, 1)
                        true_positives = np.append(true_positives, 0)
                        continue

                    overlaps = compute_overlap(np.expand_dims(d, axis=0),
                                               annotations)
                    assigned_annotation = np.argmax(overlaps, axis=1)
                    max_overlap = overlaps[0, assigned_annotation]

                    if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                        false_positives = np.append(false_positives, 0)
                        true_positives = np.append(true_positives, 1)
                        detected_annotations.append(assigned_annotation)
                    else:
                        false_positives = np.append(false_positives, 1)
                        true_positives = np.append(true_positives, 0)

            # no annotations -> AP for this class is 0 (is this correct?)
            if num_annotations == 0:
                average_precisions[label] = 0
                continue

            # sort by score
            indices = np.argsort(-scores)
            false_positives = false_positives[indices]
            true_positives = true_positives[indices]

            # compute false positives and true positives
            false_positives = np.cumsum(false_positives)
            true_positives = np.cumsum(true_positives)

            # compute recall and precision
            recall = true_positives / num_annotations
            precision = true_positives / np.maximum(
                true_positives + false_positives,
                np.finfo(np.float64).eps)

            # compute average precision
            average_precision = compute_ap(recall, precision)
            average_precisions[label] = average_precision

        return average_precisions

    def predict(self, image):
        image_h, image_w, _ = image.shape
        image = cv2.resize(image, (self.input_size, self.input_size))
        image = np.divide(image, 255., dtype=np.float32)

        input_image = image[:, :, ::-1]
        input_image = np.expand_dims(input_image, 0)
        dummy_array = np.zeros((1, 1, 1, 1, self.max_box_per_image, 4))

        netout = self.full_model.predict([input_image, dummy_array])[0]
        boxes = decode_netout(netout, self.detector.anchors,
                              self.detector.nb_class)

        return boxes

    def predict_on_image(self,
                         image,
                         obj_threshold=0.3,
                         nms_threshold=0.01,
                         is_inference_ensemble=False,
                         is_filter_bboxes=False,
                         shovel_type="Hydraulic"):
        image_h, image_w, _ = image.shape
        image = cv2.resize(image, (self.image_h, self.image_w))
        image = np.divide(image, 255., dtype=np.float32)

        input_image = image[:, :, ::-1]
        input_image = np.expand_dims(input_image, 0)
        input_image = np.expand_dims(input_image, 0)
        netout = self.full_model_inference.predict(input_image)
        boxes = decode_netout(netout[0, ...],
                              self.anchors,
                              self.nb_class,
                              obj_threshold=obj_threshold,
                              nms_threshold=nms_threshold)
        if is_filter_bboxes:
            boxes = filter_teeth_teethline(boxes, obj_threshold, shovel_type)

        return boxes

    def predict_on_h5(self,
                      h5_path,
                      idx,
                      path_to_save,
                      sequence_length=30,
                      stride=1,
                      obj_threshold=0.3,
                      nms_threshold=0.1,
                      is_yolo_pred=False):

        f = h5py.File(h5_path, 'r')
        x_batches = f["x_batches"]
        if is_yolo_pred:
            yolo_outs = f["yolo_out"]
        b_batches = f["b_batches"]
        y_batches = f["y_batches"]

        id_in_h5 = idx % x_batches.shape[0]

        x_batch = x_batches[id_in_h5, ...]  # read from disk
        start_time = time()
        x_batch = np.divide(x_batch, 255., dtype=np.float32)
        print("time: %.3f" % (time() - start_time))
        x_batch = x_batch[..., ::-1]
        if is_yolo_pred:
            yolo_out = yolo_outs[id_in_h5, ...]
        b_batch = b_batches[id_in_h5, ...]
        y_batch = y_batches[id_in_h5, ...]

        #                [reverse  ][:seq_length:skip ][reverse]
        x_batch = x_batch[::-1, ...][:sequence_length:stride][::-1, ...]
        if is_yolo_pred:
            yolo_out = yolo_out[::-1, ...][:sequence_length:stride][::-1, ...]
        image = x_batch[-1, ...].copy()
        image_yolo = image.copy()
        image_first = x_batch[0, ...].copy()
        b_batch = b_batch[::-1, ...][:sequence_length:stride][::-1, ...]
        x_batch = np.expand_dims(x_batch, axis=0)
        if is_yolo_pred:
            yolo_out = np.expand_dims(yolo_out, axis=0)
        b_batch = np.expand_dims(b_batch, axis=0)
        y_batch = np.expand_dims(y_batch, axis=0)

        # x_batch = yolo_out
        start_time = time()
        netouts = self.full_model.predict([x_batch, b_batch])
        print("Time taken ConvLSTM: %.3f" % (time() - start_time))
        print("image.shape: ", image.shape)
        # boxes_yolo = self.detector.predict(image, obj_threshold=obj_threshold-0.2)
        if is_yolo_pred:
            netouts_yolo_last = yolo_out[:, -1, ...]
            netouts_yolo_first = yolo_out[:, 0, ...]
        labels_tensor = y_batch[0, ...].copy()

        for i, netout in enumerate(netouts):
            start_time = time()
            boxes = decode_netout(netout, self.anchors, self.nb_class,
                                  obj_threshold, nms_threshold)
            print("Decoding time: %.3f" % (time() - start_time))
            if is_yolo_pred:
                boxes_yolo_first = decode_netout(netouts_yolo_first[i],
                                                 self.anchors, self.nb_class,
                                                 obj_threshold, nms_threshold)
                boxes_yolo_last = decode_netout(netouts_yolo_last[i],
                                                self.anchors, self.nb_class,
                                                obj_threshold, nms_threshold)
            labels_boxes = decode_netout(labels_tensor, self.anchors,
                                         self.nb_class, obj_threshold,
                                         nms_threshold)

            image_labels = image.copy()
            image_conv_lstm = draw_boxes(image, boxes, self.labels,
                                         obj_threshold)
            if is_yolo_pred:
                image_yolo_first = draw_boxes(image_first, boxes_yolo_first,
                                              self.labels, obj_threshold)
                image_yolo_last = draw_boxes(image_yolo, boxes_yolo_last,
                                             self.labels, obj_threshold)
            image_labels = draw_boxes(image_labels, labels_boxes, self.labels,
                                      obj_threshold)
            print("Bounding boxes found %d/%d" %\
                    (len(boxes), len(labels_boxes)))

        #fig, ax = plt.subplots(1, 3, figsize=(19, 10))
        #fig.tight_layout()
        #if is_yolo_pred:
        #   ax[0].imshow(image_yolo_first)
        #  ax[1].imshow(image_yolo_last)
        #ax[2].imshow(image_conv_lstm)
        #ax[0].set_title("First Frame YOLO pred")
        #ax[1].set_title("Last Frame YOLO pred")
        #ax[2].set_title("Conv LSTM")
        # plt.show()
        #plt.imshow(image_conv_lstm)
        #plt.savefig("/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/LSTM/try6__2lstm-256-1b1-30frames/preds_onHardTestSet/pred_" + h5_name + "_" +  str(idx) + ".jpg",
        #format='jpg')

        h5_name = h5_path.split('/')[-1]
        filepath = os.path.join(path_to_save,
                                "pred_" + h5_name + str(idx) + ".jpg")
        temps = image_conv_lstm * 255.  #np.uint8(image_conv_lstm*255)
        cv2.imwrite(filepath, temps)
        # plt.figure(figsize=(10, 10))
        # plt.imshow(image_conv_lstm)
        # plt.savefig("./lstm_preds/pred_" + h5_name + "_" +  str(idx) + ".jpg",
        #             format='jpg')
        # plt.show()

        f.close()

        return
Пример #9
0
from keras.preprocessing import image

with open('config.json') as config_buffer:
    config = json.load(config_buffer)

###############################
#   Make the model
###############################

yolo = YOLO(backend=config['model']['backend'],
            input_size=config['model']['input_size'],
            labels=config['model']['labels'],
            max_box_per_image=config['model']['max_box_per_image'],
            anchors=config['model']['anchors'])

yolo.load_weights('fire_weights.h5')
img_width, img_height = 640, 480
labels = config['model']['labels']

with picamera.PiCamera() as camera:
    camera.resolution = (640, 480)  # (320, 240)
    camera.framerate = 24
    frame = np.empty((480, 640, 3), dtype=np.uint8)  # (240, 320, 3)
    try:
        while True:
            camera.capture(frame, 'rgb', use_video_port=True)
            frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
            boxes = yolo.predict(frame)
            frame = draw_boxes(frame, boxes, config['model']['labels'])
            cv.imshow('frame', frame)
            cv.waitKey(2)
Пример #10
0
def trainer(config_path):
    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    if 'logdir' in config['train']:
        logdir = config['train']['logdir']
    else:
        Exception("В конфиге должна быть указана папка для логов!")

    logdir = os.path.dirname(config_path) + '/' + logdir
    os.makedirs(logdir, exist_ok=True)
    shutil.copy(config_path, logdir + 'config.json')

    ###############################
    #   Parse the annotations
    ###############################

    # parse annotations of the training set
    train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'],
                                                config['train']['train_image_folder'],
                                                config['model']['labels'])

    # parse annotations of the validation set, if any, otherwise split the training set
    if os.path.exists(config['valid']['valid_annot_folder']):
        valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'],
                                                    config['valid']['valid_image_folder'],
                                                    config['model']['labels'])
    else:
        train_valid_split = int(0.8 * len(train_imgs))
        np.random.shuffle(train_imgs)
        valid_imgs = train_imgs[train_valid_split:]
        train_imgs = train_imgs[:train_valid_split]

    if len(config['model']['labels']) > 0:
        overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys()))

        print('Seen labels:\t', train_labels)
        print('Given labels:\t', config['model']['labels'])
        print('Overlap labels:\t', overlap_labels)

        if len(overlap_labels) < len(config['model']['labels']):
            print('Some labels have no annotations! Please revise the list of labels in the config.json file!')
            return
    else:
        print('No labels are provided. Train on all seen labels.')
        config['model']['labels'] = train_labels.keys()



    ###############################
    #   Construct the model
    ###############################

    yolo = YOLO(architecture      = config['model']['architecture'],
                input_size        = config['model']['input_size'],
                labels            = config['model']['labels'],
                max_box_per_image = config['model']['max_box_per_image'],
                anchors           = config['model']['anchors'])

    ###############################
    #   Load the pretrained weights (if any)
    ###############################

    if os.path.exists(config['train']['pretrained_weights']):
        print("Loading pre-trained weights in {}".format(config['train']['pretrained_weights']))

        yolo.load_weights(config['train']['pretrained_weights'])

    ###############################
    #   Start the training process
    ###############################

    freq = config['train']['weights_saving_freq'] if 'weights_saving_freq' in config['train'] else 0
    yolo.train(train_imgs         = train_imgs,
               valid_imgs         = valid_imgs,
               train_times        = config['train']['train_times'],
               valid_times        = config['valid']['valid_times'],
               nb_epoch           = config['train']['nb_epoch'],
               learning_rate      = config['train']['learning_rate'],
               batch_size         = config['train']['batch_size'],
               warmup_epochs      = config['train']['warmup_epochs'],
               object_scale       = config['train']['object_scale'],
               no_object_scale    = config['train']['no_object_scale'],
               coord_scale        = config['train']['coord_scale'],
               class_scale        = config['train']['class_scale'],
               saved_weights_name = config['train']['saved_weights_name'],
               saving_freq        = freq,
               debug              = config['train']['debug'],
               logdir             = logdir)
Пример #11
0
def _main_(args):
    config_path = args.conf
    weights_path = args.weights
    image_path = args.input

    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    ###############################
    #   Make the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    #############################
    #   # Load trained weights
    ###############################

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes
    ###############################

    # video_reader = cv2.VideoCapture(image_path)
    # video_reader = cv2.VideoCapture('rtsp://192.168.0.35:555/PXyxOv2O_m')
    # video_reader = cv2.VideoCapture('rtsp://*****:*****@172.16.16.34/Streaming/Channels/1')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/left/5.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/left/0.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/nothing/3.avi')
    #
    video_reader = cv2.VideoCapture('maidan.avi')
    # video_reader = cv2.VideoCapture('Militari-1.avi')
    # video_reader = cv2.VideoCapture('weed.avi')
    # video_reader = cv2.VideoCapture('orig.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/military/Militari-8.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-may-be-copies/cash_desk_0.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/0.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-may-be-copies/warehouse_up_0.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/касса 2-3_nzvsm_2.avi')
    #
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/Очередь 3_20150323-174453--20150323-181951.tva.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/касса 1_20150618-110002--20150618-111330.tva.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/unsorted/VideoBK_1/ВК-2.1_20131119-110300--20131119-110500.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/shop/nothing/4.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/nothing/3.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-big/k10.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/BAD_2_THE_BONE_x5_p9.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/AC-D4031 21_20140208-123300--20140208-125100.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/AC-D4031 2_3.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Lanser 3MP-16 10_20171110-193448--20171110-194108.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/кассы 8-9_20171110-192101--20171110-192601.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 2-3_20180327-122122--20180327-122613.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 6-7_20180327-142813--20180327-143313.avi')
    # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 16-17_20180327-112348--20180327-113348.tmp.avi')

    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    every_nth = 50
    count = 0

    pbar = tqdm(total=nb_frames)
    while video_reader.isOpened():
        _, image = video_reader.read()

        count += 1
        pbar.update(1)

        if image is None:
            break

        if count % every_nth:
            continue

        boxes = yolo.predict(image)
        image = draw_boxes(image, boxes, config['model']['labels'])

        cv2.imshow('Predicted2', cv2.resize(image, (1280, 720)))
        # cv2.imshow('Predicted', image)
        cv2.waitKey(1)

    video_reader.release()
    pbar.close()
Пример #12
0
def _main_(args):
    config_path = args.conf
    weights_path = args.weights
    image_path = args.input
    use_camera = args.real_time
    keras.backend.tensorflow_backend.set_session(
        get_session())  #added by kenny.
    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    ###############################
    #   Make the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    ###############################
    #   Load trained weights
    ###############################

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes
    ###############################
    if use_camera:
        video_reader = cv2.VideoCapture(int(image_path))
        pbar = tqdm()
        while True:
            pbar.update(1)
            ret, frame = video_reader.read()
            if not ret:
                break
            boxes = yolo.predict(frame)
            frame = draw_boxes(frame, boxes, config['model']['labels'])
            cv2.imshow("frame", frame)
            key = cv2.waitKey(1)
            if key == ord("q") or key == 27:
                break
        pbar.close()
    elif image_path[-4:] == '.mp4':
        video_out = image_path[:-4] + '_detected' + image_path[-4:]
        video_reader = cv2.VideoCapture(image_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(video_out,
                                       cv2.VideoWriter_fourcc(*'MPEG'), 50.0,
                                       (frame_w, frame_h))

        for i in tqdm(range(nb_frames)):
            _, image = video_reader.read()

            boxes = yolo.predict(image)
            image = draw_boxes(image, boxes, config['model']['labels'])

            video_writer.write(np.uint8(image))

        video_reader.release()
        video_writer.release()
    else:
        image = cv2.imread(image_path)
        boxes = yolo.predict(image)
        image = draw_boxes(image, boxes, config['model']['labels'])

        print(len(boxes), 'boxes are found')
        path = '/home/creaton/keras-yolo2/detected object'
        cv2.imwrite('/home/creaton/keras-yolo2/detected object/img.jpg', image)
Пример #13
0
class predictor:
    def __init__(self, config_path, weights_path):
        with open(config_path) as config_buffer:
            config = json.loads(config_buffer.read())

        self.labels = config['model']['labels']

        self.yolo = YOLO(
            architecture=config['model']['architecture'],
            input_size=config['model']['input_size'],
            labels=self.labels,
            max_box_per_image=config['model']['max_box_per_image'],
            anchors=config['model']['anchors'])

        self.yolo.load_weights(weights_path)
        self.timing = [0, 0.]

    def _predict_one(self, image, threshold, decimals, draw_bboxes=True):

        t = timer()
        boxes = self.yolo.predict(image, threshold=threshold)
        image = draw_boxes(image, boxes, self.labels, decimals=decimals)
        t = timer() - t
        self.timing[0] += 1
        self.timing[1] += t
        print('{} boxes are found for {} s'.format(len(boxes), t))
        return image, boxes

    def predict_from_dir(self,
                         path_to_dir,
                         image_format,
                         path_to_outputs=None,
                         threshold=0.5,
                         decimals=8,
                         save_anno=False,
                         draw_truth=False):
        if path_to_outputs and not os.path.exists(path_to_outputs):
            print('Creating output path {}'.format(path_to_outputs))
            os.mkdir(path_to_outputs)

        for image_filename in os.listdir(path_to_dir):
            # TODO: здесь надо сделать адекватную проверку, изображение ли это
            if image_filename.endswith(image_format):
                image = cv2.imread(os.path.join(path_to_dir, image_filename),
                                   cv2.IMREAD_COLOR)
                image_h = image.shape[0]
                image_w = image.shape[1]

                curr_time = timer()

                image, boxes = self._predict_one(image,
                                                 threshold=threshold,
                                                 decimals=decimals)

                curr_time = timer() - curr_time
                print(curr_time)

                boxes = get_annoboxes(image_w=image_w,
                                      image_h=image_h,
                                      boxes=boxes,
                                      labels=self.labels)

                if path_to_outputs:

                    if save_anno:
                        #
                        save_anno_xml(
                            dir=path_to_outputs + 'annotations/',
                            img_name=image_filename[:-len(image_format) - 1],
                            img_format=image_format,
                            img_w=image.shape[1],
                            img_h=image.shape[0],
                            img_d=image.shape[2],
                            boxes=boxes,
                            quiet=False,
                            minConf=threshold)

                    retval = cv2.imwrite(
                        path_to_outputs + 'images/' + image_filename, image)
                    if retval:
                        print('Изображение {} успешно сохранено в папку {}'.
                              format(image_filename, path_to_outputs))
            else:
                print('В папке не только изображения - {}'.format(
                    image_filename))

        print('Все изображения обработаны')
        print(
            'Число изображений {}, общее время {}, среднее время на изображение {}'
            .format(self.timing[0], self.timing[1],
                    self.timing[1] / self.timing[0]))

    def predict_from_webcam(self, threshold=0.5, fps=False, decimals=8):
        vid = cv2.VideoCapture(1)
        if not vid.isOpened():
            raise IOError(
                ("Couldn't open webcam. If you're trying to open a webcam, "
                 "make sure you video_path is an integer!"))

        # Compute aspect ratio of video
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw / vidh

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return

            res_image = self._predict_one(orig_image,
                                          threshold=threshold,
                                          decimals=2)

            # Calculate FPS
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            if fps:
                curr_time = timer()
                exec_time = curr_time - prev_time
                prev_time = curr_time
                accum_time = accum_time + exec_time
                curr_fps = curr_fps + 1
                if accum_time > 1:
                    accum_time = accum_time - 1
                    fps = "FPS: " + str(curr_fps)
                    curr_fps = 0

                # Draw FPS in top left corner
                cv2.rectangle(res_image, (0, 0), (50, 17), (255, 255, 255), -1)
                cv2.putText(res_image, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.35, (0, 0, 0), 1)

            cv2.imshow("YOLOv2 result", res_image)
            pressedKey = cv2.waitKey(10)
            if pressedKey == 27:  # ESC key
                break

    def predict_from_video(self,
                           path_to_video,
                           threshold=0.5,
                           decimals=8,
                           output_file='',
                           crop=True,
                           writeFPS=False,
                           show=False):
        vid = cv2.VideoCapture(path_to_video)
        if not vid.isOpened():
            raise IOError((
                "Couldn't open webcam. Make sure you video_path is an integer!"
            ))

        # Compute aspect ratio of video
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)

        n = 0
        if crop:
            n = int((vidw - vidh) * 0.5)
            vidw = vidh

        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_file, fourcc, 20.0,
                              (int(vidw), int(vidh)))

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return

            if crop:
                orig_image = orig_image[:, n:int(n + vidh), :]

            res_image, boxes = self._predict_one(orig_image,
                                                 threshold=threshold,
                                                 decimals=decimals)

            # Calculate FPS
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            if writeFPS:
                curr_time = timer()
                exec_time = curr_time - prev_time
                prev_time = curr_time
                accum_time = accum_time + exec_time
                curr_fps = curr_fps + 1
                if accum_time > 1:
                    accum_time = accum_time - 1
                    fps = "FPS: " + str(curr_fps)
                    curr_fps = 0

                # Draw FPS in top left corner
                cv2.rectangle(res_image, (0, 0), (50, 17), (255, 255, 255), -1)
                cv2.putText(res_image, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.35, (0, 0, 0), 1)

            if show:
                cv2.imshow("YOLOv2 result", res_image)

            if output_file:
                out.write(res_image)

            pressedKey = cv2.waitKey(10)
            if pressedKey == 27:  # ESC key
                break

        out.release()
def _main_():
    config_path = 'config.json'
    weights_path = 'model.h5'
    image_path = 'image.mp4'

    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    ###############################
    #   Make the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    time_now = 0
    data_head = pd.DataFrame({'Time': [0], 'Head_count': [0]})

    ###############################
    #   Load trained weights
    ###############################

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes
    ###############################

    if image_path[-4:] == '.mp4':
        video_out = image_path[:-4] + '_detected' + image_path[-4:]
        video_reader = cv2.VideoCapture(image_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(video_out,
                                       cv2.VideoWriter_fourcc(*'MPEG'), 50.0,
                                       (frame_w, frame_h))

        for i in tqdm(range(nb_frames)):
            _, image = video_reader.read()
            time_now += 1
            boxes = yolo.predict(image)
            data_head = data_head.append(
                {
                    'Time': str(time_now // 60) + '/' + str(time_now % 60),
                    'Head_count': len(boxes)
                },
                ignore_index=True)
            image = draw_boxes(image, boxes, config['model']['labels'])

            video_writer.write(np.uint8(image))
            if (time_now == 62):
                break

        video_reader.release()
        video_writer.release()
        data_head.to_csv('head_count.csv', index=False)

    else:
        image = cv2.imread(image_path)
        boxes = yolo.predict(image)
        image = draw_boxes(image, boxes, config['model']['labels'])
        data_head = data_head.append({
            'Time': '0/1',
            'Head_count': len(boxes)
        },
                                     ignore_index=True)

        print(len(boxes), 'boxes are found')
        data_head.to_csv('head_count.csv', index=False)
        cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
Пример #15
0
def _main_(args):
    config_path = args.conf
    weights_path = args.weights
    image_path = args.input

    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    ###############################
    #   Make the model
    ###############################

    yolo = YOLO(backend=config['model']['backend'],
                input_size=config['model']['input_size'],
                labels=config['model']['labels'],
                max_box_per_image=config['model']['max_box_per_image'],
                anchors=config['model']['anchors'])

    ###############################
    #   Load trained weights
    ###############################

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes
    ###############################

    if image_path[-4:] == '.mp4':
        video_out = image_path[:-4] + '_detected' + image_path[-4:]
        video_reader = cv2.VideoCapture(image_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(
            video_out, cv2.VideoWriter_fourcc(*'mp4v'), 30.0,
            (frame_h, frame_w), True)  #(frame_w, frame_h) # Virando video

        for i in tqdm(range(nb_frames)):
            _, image = video_reader.read()
            image = np.rot90(image, 3)
            image = image.copy()  # Fix Bug np.rot90

            boxes = yolo.predict(image)
            #image = draw_boxes(image, boxes, config['model']['labels'], 20, 3.5, -90)
            image = draw_boxes(image, boxes, config['model']['labels'], 2, 1.1,
                               -30)

            video_writer.write(np.uint8(image))

        video_reader.release()
        video_writer.release()
    else:
        image = cv2.imread(image_path)

        if (args.annotFile != None):
            boxes_ann = []
            tree = ET.parse(args.annotFile)
            for elem in tree.iter():
                if 'object' in elem.tag or 'part' in elem.tag:
                    obj = {}

                    for attr in list(elem):
                        if 'name' in attr.tag:
                            obj['name'] = attr.text

                            boxes_ann.append(obj)

                        if 'bndbox' in attr.tag:
                            for dim in list(attr):
                                if 'xmin' in dim.tag:
                                    obj['xmin'] = int(round(float(dim.text)))
                                if 'ymin' in dim.tag:
                                    obj['ymin'] = int(round(float(dim.text)))
                                if 'xmax' in dim.tag:
                                    obj['xmax'] = int(round(float(dim.text)))
                                if 'ymax' in dim.tag:
                                    obj['ymax'] = int(round(float(dim.text)))

            for box in boxes_ann:
                cv2.rectangle(image, (box['xmin'], box['ymin']),
                              (box['xmax'], box['ymax']), (255, 0, 0), 30)

        boxes = yolo.predict(image)
        image = draw_boxes(image, boxes, config['model']['labels'], 30, 4.5,
                           35)

        print(len(boxes), 'boxes are found')

        cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
Пример #16
0
def detect_videos(annotations_list, video_folders_list, detected_folder):
    """ Detect videos by YOLO, and store the detected bounding boxes
    """
    yolo_config_path = "../config_aerial.json"
    with open(yolo_config_path) as config_buffer:    
        yolo_config = json.load(config_buffer)

    # ##############################
    #   Make the model 
    # ##############################

    yolo = YOLO(architecture        = yolo_config['model']['architecture'],
                input_size          = yolo_config['model']['input_size'], 
                labels              = yolo_config['model']['labels'], 
                max_box_per_image   = yolo_config['model']['max_box_per_image'],
                anchors             = yolo_config['model']['anchors'])

    # ###############################
    # #   Load trained weights
    # ###############################    

    yolo_weights_path = "../yolo_coco_aerial_person.h5"
    print("YOLO weights path:", yolo_weights_path)
    yolo.load_weights(yolo_weights_path)

    if len(annotations_list) != len(video_folders_list):
        raise IOError("Mismatch # videos {} {}.".format(len(annotations_list), len(video_folders_list)))

    for vid, video_folder in enumerate(video_folders_list):
        print(basename(video_folder))
        detected_label_path = os.path.join(detected_folder, basename(video_folder))
        if os.path.exists(detected_label_path + '.npy'):
            continue

        if basename(annotations_list[vid]) != (basename(video_folder) + ".txt"):
            print("Annot: {}".format(basename(annotations_list[vid])))
            print("image: {}".format(basename(video_folder)))
            raise IOError("Mismatch video {}.".format(basename(video_folder)))

        num_frames = sum(1 for line in open(annotations_list[vid], 'r'))
        image_path_list = sorted(glob.glob(video_folder + "/*"))
        sort_nicely(image_path_list)

        if num_frames != len(image_path_list):
            raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder)))

        with open(annotations_list[vid], 'r') as annot_file:
            first_box_unnormailzed = parse_label(annot_file.readline())

        first_image = cv2.imread(image_path_list[0])
        first_box = normalize_box(first_image.shape, first_box_unnormailzed)
        last_box = first_box

        # Write the detected labels into detected/
        detected_boxes = []
        detected_box = [first_box.x, first_box.y, first_box.w, first_box.h]
        detected_boxes.append(detected_box)

        # Write the detected features into features/

        for i, image_path in enumerate(image_path_list):
            print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path)))
            image = cv2.imread(image_path)
            if image is None:
                print('Cannot find', image_path)
            boxes, dummy_feature = yolo.predict_for_rolo(image)
            chosen_box = choose_best_box(boxes, last_box)
            last_box = chosen_box                                

            if i > 0:
                # Write the detected result of target
                detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h]
                detected_boxes.append(detected_box)

        print("======================= Save detected label result ==========================")
        detected_boxes = np.array(detected_boxes)
        print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0]))
        np.save(detected_label_path + '.npy', detected_boxes)