def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, weights_path='model_data/yolo_weights.h5'): '''create the training model''' K.clear_session() # get a new session image_input = Input(shape=(None, None, 3)) h, w = input_shape num_anchors = len(anchors) y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ num_anchors//3, num_classes+5)) for l in range(3)] model_body = yolo_body(image_input, num_anchors//3, num_classes) print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) if load_pretrained: model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) print('Load weights {}.'.format(weights_path)) if freeze_body in [1, 2]: # Freeze darknet53 body or freeze all but 3 output layers. num = (185, len(model_body.layers)-3)[freeze_body-1] for i in range(num): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) for y in range(-3, 0): model_body.layers[y].name = "conv2d_output_" + str(h//{-3:32, -2:16, -1:8}[y]) model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( [*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) return model
def _main_(args): config_path = args.conf print("load config") with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Create the validation generator ############################### valid_ints, labels = parse_voc_annotation( config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['valid']['cache_name'], config['model']['labels'] ) labels = labels.keys() if len(config['model']['labels']) == 0 else config['model']['labels'] labels = sorted(labels) print("valid generator") valid_generator = BatchGenerator( instances = valid_ints, anchors = config['model']['anchors'], labels = labels, downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image = 0, batch_size = config['train']['batch_size'], min_net_size = config['model']['min_input_size'], max_net_size = config['model']['max_input_size'], shuffle = True, jitter = 0.0, norm = normalize ) ############################### # Load the model and do evaluation ############################### os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus'] #ignore no training configuration #infer_model = load_model(config['train']['saved_weights_name']) infer_model = yolo_body(Input(shape=(None,None,3)), 3 , 20) #load_model(config['train']['saved_weights_name']) #infer_model = tiny_yolo_body(Input(shape=(None,None,3)), 3 , 20) infer_model.load_weights(config['train']['saved_weights_name']) print(config['train']['saved_weights_name']) print("get mAp for All classes") # compute mAP for all the classes average_precisions = evaluate(infer_model, valid_generator) # print the score for label, average_precision in average_precisions.items(): print(labels[label] + ': {:.4f}'.format(average_precision)) print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors==6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(model_path)) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if self.gpu_num>=2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes): '''data generator for fit_generator''' num_anchors = len(anchors) image_input = Input(shape=(416, 416, 3)) model = yolo_body(image_input, num_anchors // 3, num_classes) model.load_weights("model_data/trained_weights_final.h5") n = len(annotation_lines) i = 0 while True: image_data = [] box_data = [] for b in range(batch_size): if i == 0: np.random.shuffle(annotation_lines) image, box = get_random_data(annotation_lines[i], input_shape, random=True) image_data.append(image) box_data.append(box) i = (i + 1) % n image_data = np.array(image_data) box_data = np.array(box_data) #y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) # print(image_data.shape) y_true = model.predict(image_data) # print("d") # print(y_true[0].shape) # print(y_true[1].shape) # print(y_true[2].shape) # y_true[0] = y_true[0].reshape(y_true[0].shape[0], y_true[0].shape[1], y_true[0].shape[2], 3 , y_true[0].shape[3]//3 ) # y_true[1] = y_true[1].reshape(y_true[1].shape[0], y_true[1].shape[1], y_true[1].shape[2], 3 , y_true[1].shape[3]//3 ) # y_true[2] = y_true[2].reshape(y_true[2].shape[0], y_true[2].shape[1], y_true[2].shape[2], 3 , y_true[2].shape[3]//3 ) yield [image_data, *y_true] #, np.zeros(batch_size)
def _main(): train_path = '2007_train.txt' val_path = '2007_val.txt' # test_path = '2007_test.txt' log_dir = 'logs/000/' classes_path = 'class/voc_classes.txt' anchors_path = 'anchors/yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) num_anchors = len(anchors) input_shape = (416,416) # multiple of 32, hw with open(train_path) as f: train_lines = f.readlines() #s with open(val_path) as f: val_lines = f.readlines() # with open(test_path) as f: # test_lines = f.readlines() num_anchors = len(anchors) image_input = Input(shape=(416, 416, 3)) model = yolo_body(image_input, num_anchors//3, num_classes) model.load_weights("model_data/trained_weights_final.h5") yolo3 = Reshape((13, 13, 3, 25))(model.layers[-3].output) yolo2 = Reshape((26, 26, 3, 25))(model.layers[-2].output) yolo1 = Reshape((52, 52, 3, 25))(model.layers[-1].output) model = Model( inputs= model.input , outputs=[yolo3,yolo2,yolo1] ) batch_size = 1 # create an hdf5 file train_size = len(train_lines) print( "total "+ str(len(train_lines)) + " loop "+ str( train_size ) ) with h5py.File("train_logits.h5",'w') as f: # create a dataset for your movie img = f.create_dataset("img_data", shape=( train_size, 416, 416, 3)) #len(train_lines) bbox = f.create_dataset("big_logits", shape=( train_size, 13, 13, 3, 25)) mbox = f.create_dataset("medium_logits", shape=( train_size , 26, 26, 3, 25)) sbox = f.create_dataset("small_logits", shape=( train_size , 52, 52, 3, 25)) i = 0 for logits in tqdm( data_generator_wrapper(train_lines, batch_size, input_shape, anchors, num_classes,model) ) : img[i] = logits[0][0] # np.random.randint(255, size=(416, 416, 3)) # bbox[i] = logits[1][0] mbox[i] = logits[2][0] sbox[i] = logits[3][0] i+=1 if i>= train_size:#(len(train_lines)) : break ''' fp = h5py.File('train_logits.h5','r') #train_logits = [] print(fp["img_data"][0].shape) print(fp["big_logits"][1].shape) boxan = np.where(fp["big_logits"][1][:,:,:,4] > 0.3 ) print(boxan) print(fp["medium_logits"][1].shape) boxan = np.where(fp["medium_logits"][1][:,:,:,4] > 0.3 ) print(boxan) print(fp["small_logits"][1].shape) boxan = np.where(fp["small_logits"][1][:,:,:,4] > 0.3 ) print(boxan) ''' val_size = len(val_lines) print( "total "+ str(len(val_lines)) + " loop "+ str( val_size ) ) with h5py.File("val_logits.h5",'w') as f: # create a dataset for your movie img = f.create_dataset("img_data", shape=( val_size, 416, 416, 3)) # bbox = f.create_dataset("big_logits", shape=( val_size, 13, 13, 3, 25)) mbox = f.create_dataset("medium_logits", shape=( val_size, 26, 26, 3, 25)) sbox = f.create_dataset("small_logits", shape=( val_size, 52, 52, 3, 25)) # fill the 10 frames with a random image i = 0 for logits in tqdm( data_generator_wrapper(val_lines, batch_size, input_shape, anchors, num_classes,model) ) : img[i] = logits[0][0] # np.random.randint(255, size=(416, 416, 3)) # bbox[i] = logits[1][0] mbox[i] = logits[2][0] sbox[i] = logits[3][0] i+=1 if i>= val_size:#(len(val_lines)) : break
def _main(): train_path = '2007_train.txt' val_path = '2007_val.txt' # test_path = '2007_test.txt' log_dir = 'logs/logits_only_000/' classes_path = 'class/voc_classes.txt' anchors_path = 'anchors/yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) input_shape = (416, 416) # multiple of 32, hw is_tiny_version = len(anchors) == 6 # default setting if is_tiny_version: model = create_tiny_model( input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5') else: model = create_model( input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/trained_weights_final_mobilenetv2.h5' ) # make sure you know what you freeze logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) with open(train_path) as f: train_lines = f.readlines() with open(val_path) as f: val_lines = f.readlines() # with open(test_path) as f: # test_lines = f.readlines() # train_lines = np.load('train_logits.npy')[()] # val_lines = np.load('val_logits.npy')[()] num_val = int(len(train_lines)) num_train = int(len(val_lines)) #declare model num_anchors = len(anchors) image_input = Input(shape=(416, 416, 3)) teacher = yolo_body(image_input, num_anchors // 3, num_classes) teacher.load_weights("model_data/trained_weights_final.h5") # return the constructed network architecture # class+5 yolo3 = Reshape((13, 13, 3, 25))(teacher.layers[-3].output) yolo2 = Reshape((26, 26, 3, 25))(teacher.layers[-2].output) yolo1 = Reshape((52, 52, 3, 25))(teacher.layers[-1].output) teacher = Model(inputs=teacher.input, outputs=[yolo3, yolo2, yolo1]) teacher._make_predict_function() # Train with frozen layers first, to get a stable loss. # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. if True: model.compile( optimizer=Adam(lr=1e-3), loss={ # use custom yolo_loss Lambda layer. 'yolo_loss': lambda y_true, y_pred: y_pred }) batch_size = 2 #16#32 meanAP = AveragePrecision( data_generator_wrapper(val_lines, 1, input_shape, anchors, num_classes, teacher), num_val, input_shape, len(anchors) // 3, anchors, num_classes) print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator(data_generator_wrapper(train_lines, batch_size, input_shape, anchors, num_classes, teacher), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( val_lines, batch_size, input_shape, anchors, num_classes, teacher), validation_steps=max(1, num_val // batch_size), epochs=30, initial_epoch=0, callbacks=[logging, checkpoint, meanAP]) model.save_weights(log_dir + 'distillation_mobilenet_trained_weights_stage_1.h5') # Unfreeze and continue training, to fine-tune. # Train longer if the result is not good. if True: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=Adam(lr=1e-4), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print('Unfreeze all of the layers.') batch_size = 2 #16#32 note that more GPU memory is required after unfreezing the body meanAP = AveragePrecision( data_generator_wrapper(val_lines, 1, input_shape, anchors, num_classes, teacher), num_val, input_shape, len(anchors) // 3, anchors, num_classes) print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator( data_generator_wrapper(train_lines, batch_size, input_shape, anchors, num_classes, teacher), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper(val_lines, batch_size, input_shape, anchors, num_classes, teacher), validation_steps=max(1, num_val // batch_size), epochs=60, initial_epoch=30, callbacks=[logging, checkpoint, reduce_lr, early_stopping, meanAP]) model.save_weights(log_dir + 'distillation_mobilenet_trained_weights_final.h5')
def _main(): train_path = '2007_train.txt' val_path = '2007_val.txt' test_path = '2007_test.txt' #log_dir = 'logs/logits_only_000/' classes_path = 'class/voc_classes.txt' anchors_path = 'anchors/yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) input_shape = (416,416) # multiple of 32, hw with open(train_path) as f: train_lines = f.readlines() with open(val_path) as f: val_lines = f.readlines() with open(test_path) as f: test_lines = f.readlines() num_val = int(len(train_lines)) num_train = int(len(val_lines)) num_test = int(len(test_lines)) #declare model num_anchors = len(anchors) image_input = Input(shape=(416, 416, 3)) eval_model = yolo_body(image_input, num_anchors//3, num_classes) eval_model.load_weights("model_data/trained_weights_final.h5") # return the constructed network architecture # class+5 yolo3 = Reshape((13, 13, 3, 25))(eval_model.layers[-3].output) yolo2 = Reshape((26, 26, 3, 25))(eval_model.layers[-2].output) yolo1 = Reshape((52, 52, 3, 25))(eval_model.layers[-1].output) eval_model = Model( inputs= eval_model.input , outputs=[yolo3,yolo2,yolo1] ) eval_model._make_predict_function() batch_size = 1 all_detections = [ [] for i in range(num_classes) ] all_annotations = [ [] for i in range(num_classes) ] count_detections = [ [0 for i in range(num_classes)] for i in range(3) ] num_layers = len(anchors)//3 datagen = data_generator_wrapper(test_lines, batch_size, input_shape, anchors, num_classes,eval_model) print( "{} test data".format(num_test) ) for n in tqdm( range(num_test) ):#num_test img,flogits,mlogits = next(datagen) for l in range(num_layers): #print( "layer" + str(l) ) arrp = flogits[l] box = np.where(arrp[...,4] > 0 ) box = np.transpose(box) for i in range(len(box)): #print("obj" + str(i) ) #detection_label = np.argmax( flogits[l][tuple(box[i])][5:]) annotation_label = np.argmax( flogits[l][tuple(box[i])][5:]) #print( "{} ({}) {} == ({}) {} ".format(l, detection_label, class_names[ detection_label ] ,annotation_label, class_names[ annotation_label ] ) ) all_detections[annotation_label].append( mlogits[l][tuple(box[i])] ) all_annotations[annotation_label].append( flogits[l][tuple(box[i])] ) count_detections[l][annotation_label] +=1 print(len(all_detections) ) print(len(all_annotations) ) print(count_detections) iou_thres = 0.5 conf_thres = 0.5 average_precisions = {} for label in tqdm( range( num_classes ) ) : false_positives = np.zeros((0,)) true_positives = np.zeros((0,)) scores = np.zeros((0,)) num_detect = len( all_detections[label] ) for det in range( num_detect ): detect_box = all_detections[label][det][...,0:4] detect_conf = all_detections[label][det][...,4] detect_label = np.argmax( all_detections[label][det][...,5:] ) annot_box = all_annotations[label][det][...,0:4] annot_conf = all_annotations[label][det][...,4] detect_label = np.argmax( all_detections[label][det][...,5:] ) iou = numpy_box_iou( detect_box , annot_box) scores = np.append(scores, detect_conf ) if( iou > iou_thres and detect_conf > conf_thres and (label == detect_label ) ): #print( best_iou[tuple(box[i])] ) print("pos") false_positives = np.append(false_positives, 0) true_positives = np.append(true_positives, 1) else: print("neg") false_positives = np.append(false_positives, 1) true_positives = np.append(true_positives, 0) indices = np.argsort(-scores) false_positives = false_positives[indices] true_positives = true_positives[indices] print(true_positives) false_positives = np.cumsum(false_positives) true_positives = np.cumsum(true_positives) print(true_positives) recall = true_positives / num_detect print( recall ) precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) print( precision ) average_precision = compute_ap(recall, precision) average_precisions[label] = average_precision print(average_precisions) for label, average_precision in average_precisions.items(): print(class_names[label] + ': {:.4f}'.format(average_precision)) print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions))) '''
from keras.applications.mobilenet import MobileNet #from model.yolo3 import tiny_yolo_body #from model.small_mobilenets2 import yolo_body #from model.medium_darknet import yolo_body #from model.mobilenet import yolo_body from model.yolo3 import yolo_body run_meta = tf.RunMetadata() with tf.Session(graph=tf.Graph()) as sess: K.set_session(sess) #net = MobileNet(alpha=.75, input_tensor=tf.placeholder('float32', shape=(1,32,32,3)) ) #net = MobileNet(input_tensor=tf.placeholder('float32', shape=(1,416,416,3)) ,weights='imagenet') image_input = Input(shape=(416, 416, 3)) #net = tiny_yolo_body(image_input, 3 , 20) net = yolo_body(image_input, 3, 20) opts = tf.profiler.ProfileOptionBuilder.float_operation() flops = tf.profiler.profile(sess.graph, run_meta=run_meta, cmd='op', options=opts) opts = tf.profiler.ProfileOptionBuilder.trainable_variables_parameter() params = tf.profiler.profile(sess.graph, run_meta=run_meta, cmd='op', options=opts) print("floatops _ {:,} totalparams _ {:,}".format(flops.total_float_ops, params.total_parameters))
def _main(): train_path = '2007_train.txt' val_path = '2007_val.txt' # test_path = '2007_test.txt' log_dir = 'logs/000/' classes_path = 'class/voc_classes.txt' anchors_path = 'anchors/yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) input_shape = (416, 416) # multiple of 32, hw is_tiny_version = len(anchors) == 6 # default setting if is_tiny_version: model = create_tiny_model( input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5') else: model = create_model( input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/trained_weights_final_mobilenetv2.h5' ) # make sure you know what you freeze logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) with open(train_path) as f: train_lines = f.readlines() with open(val_path) as f: val_lines = f.readlines() # with open(test_path) as f: # test_lines = f.readlines() # train_lines = np.load('train_logits.npy')[()] # val_lines = np.load('val_logits.npy')[()] num_val = int(len(train_lines)) num_train = int(len(val_lines)) num_anchors = len(anchors) image_input = Input(shape=(416, 416, 3)) teacher = yolo_body(image_input, num_anchors // 3, num_classes) teacher.load_weights("model_data/trained_weights_final.h5") # return the constructed network architecture # class+5 yolo3 = Reshape((13, 13, 3, 25))(teacher.layers[-3].output) yolo2 = Reshape((26, 26, 3, 25))(teacher.layers[-2].output) yolo1 = Reshape((52, 52, 3, 25))(teacher.layers[-1].output) teacher = Model(inputs=teacher.input, outputs=[yolo3, yolo2, yolo1]) teacher._make_predict_function() batch_size = 2 i = 0 #step for logits in data_generator_wrapper(train_lines, batch_size, input_shape, anchors, num_classes, teacher): #x , y = dat #train_logits[i] = logits #trainY[i] = dat #print(x.shape) print(logits[0][1]) print(logits[0][0].shape) print(logits[0][1].shape) #print(logits[0][1]) #print( logits[1] ) #print( train_logits[0][0][1].shape) #print( len( train_logits[0][1] ) ) #print(i) #print(img.shape) #print(dat) i += 1 if i >= 3: #(len(train_lines)//batch_size+1) : break '''
def create_model(input_shape, anchors, num_classes, ignore_thresh, load_pretrained=True, freeze_body=2, weights_path='data/yolo_weights.h5'): """ 创建训练模型 :param input_shape: 输入层尺寸 :param anchors: 锚框坐标 :param num_classes: 类别数 :param ignore_thresh: iou阈值 :param load_pretrained: 预训练控制位 :param freeze_body: 冻结控制层数 :param weights_path: 预训练模型地址 :return: 创建的模型包括模型主体和损失函数 """ K.clear_session() image_input = Input(shape=(None, None, 3)) h, w = input_shape num_anchors = len(anchors) # [(13, 13, 3, n+6), (26, 26, 3, n+6), (52, 52, 3, n+6)]三种大小 y_true = [ Input(shape=(h // { 0: 32, 1: 16, 2: 8 }[layer], w // { 0: 32, 1: 16, 2: 8 }[layer], num_anchors // 3, num_classes + 5)) for layer in range(3) ] # 这里的3可以按三种尺度的锚框数来统计 model_body = yolo_body(image_input, num_anchors // 3, num_classes) print( f"Creat YOLOv3 model with {num_anchors} anchors and {num_classes} classes." ) # 加载预训练,并冻结非输出层 if load_pretrained: model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) print(f"Load weights {weights_path}") if freeze_body in [1, 2]: # 冻结除了最后三层外的所有层 num = (185, len(model_body.layers) - 3)[freeze_body - 1] for i in range(num): model_body.layers[i].trainable = False print( f"Freeze the frist {num} layers of total {len(model_body.layers)} layers" ) # 构建损失层,计算损失 model_loss = Lambda(yolo_loss, output_shape=(1, ), name="yolo_loss", arguments={ "anchors": anchors, "num_classes": num_classes, "ignore_thresh": ignore_thresh })([*model_body.output, *y_true]) models = Model([model_body.input, *y_true], model_loss) return models