def eval(img_file): #create config object config = load_dict(CONFIG) config.BATCH_SIZE = 1 #文件数量 config.FINAL_THRESHOLD = 0.5 #conf阈值 ''' #open files with images and ground truths files with full path names with open(imgs_list) as imgs: img_names = imgs.read().splitlines() imgs.close() ''' #hide the other gpus so tensorflow only uses this one os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES #tf config and session cfg = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=cfg) K.set_session(sess) #instantiate model squeeze = SqueezeDet(config) squeeze.model.load_weights(checkpoint_dir) img = cv2.imread(img_file).astype(np.float32, copy=False) orig_h, orig_w, _ = [float(v) for v in img.shape] # scale image draw_img = img img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) img = (img - np.mean(img)) / np.std(img) img = np.reshape(img, (1, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, 3)) y_pred = squeeze.model.predict(img) #filter batch with nms all_filtered_boxes, all_filtered_classes, all_filtered_scores = filter_batch( y_pred, config) font = cv2.FONT_HERSHEY_SIMPLEX for j, det_box in enumerate(all_filtered_boxes[0]): #transform into xmin, ymin, xmax, ymax det_box = bbox_transform_single_box(det_box) print(all_filtered_scores[0][j]) #add rectangle and text cv2.rectangle(draw_img, (det_box[0], det_box[1]), (det_box[2], det_box[3]), (0, 0, 255), 1) cv2.putText(draw_img, 'head' + str(all_filtered_scores[0][j]), (det_box[0], det_box[1]), font, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.imwrite('restul.png', draw_img)
def train(): """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch """ #create subdirs for logging of checkpoints and tensorboard stuff checkpoint_dir = log_dir_name + "/checkpoints" tb_dir = log_dir_name + "/tensorboard" #delete old checkpoints and tensorboard stuff if tf.gfile.Exists(checkpoint_dir): # tf.gfile.re(checkpoint_dir) tf.gfile.DeleteRecursively(checkpoint_dir) if tf.gfile.Exists(tb_dir): # tf.gfile.remove(tb_dir) tf.gfile.DeleteRecursively(tb_dir) tf.gfile.MakeDirs(tb_dir) tf.gfile.MakeDirs(checkpoint_dir) #open files with images and ground truths files with full path names # with open(img_file) as imgs: # img_names = imgs.read().splitlines() # imgs.close() # with open(gt_file) as gts: # gt_names = gts.read().splitlines() # gts.close() img_names = glob.glob(img_files + '/*.png') img_names.sort() gt_names = glob.glob(lbl_files + '/*.txt') gt_names.sort() #create config object cfg = load_dict(CONFIG) #add stuff for documentation to config cfg.img_file = img_file cfg.gt_file = gt_file cfg.images = img_names cfg.gts = gt_names cfg.init_file = init_file cfg.EPOCHS = EPOCHS cfg.OPTIMIZER = OPTIMIZER cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES cfg.GPUS = GPUS cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU #set gpu if GPUS < 2: os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES else: gpus = "" for i in range(GPUS): gpus += str(i) + "," os.environ['CUDA_VISIBLE_DEVICES'] = gpus #scale batch size to gpus cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS #compute number of batches per epoch nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE) if STEPS is not None: nbatches_train = STEPS cfg.STEPS = nbatches_train #print some run info print("Number of images: {}".format(len(img_names))) print("Number of epochs: {}".format(EPOCHS)) print("Number of batches: {}".format(nbatches_train)) print("Batch size: {}".format(cfg.BATCH_SIZE)) #tf config and session config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) K.set_session(sess) #instantiate model squeeze = SqueezeDet(cfg) #callbacks cb = [] #set optimizer #multiply by number of workers do adjust for increased batch size if OPTIMIZER == "adam": opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "rmsprop": opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "adagrad": opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 1 * GPUS #use default is nothing is given else: # create sgd with momentum and gradient clipping opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS, decay=0, momentum=cfg.MOMENTUM, nesterov=False, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = cfg.LEARNING_RATE * GPUS print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS)) #add manuall learning rate decay #lrCallback = LearningRateScheduler(schedule) #cb.append(lrCallback) #save config file to log dir with open(log_dir_name + '/config.pkl', 'wb') as f: pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL) #add tensorboard callback tbCallBack = TensorBoard(log_dir=tb_dir, histogram_freq=0, write_graph=True, write_images=True) cb.append(tbCallBack) #if flag was given, add reducelronplateu callback if REDUCELRONPLATEAU: reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, verbose=1, patience=5, min_lr=0.0) cb.append(reduce_lr) #print keras model summary if VERBOSE: print(squeeze.model.summary()) if init_file != "none": print("Weights initialized by name from {}".format(init_file)) load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE) #since these layers already existed in the ckpt they got loaded, you can reinitialized them. TODO set flag for that """ for layer in squeeze.model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if "fire10" in layer.name or "fire11" in layer.name or "conv12" in layer.name: if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=sess) #print('reinitializing layer {}.{}'.format(layer.name, v)) """ #create train generator train_generator = generator_from_data_path(img_names, gt_names, config=cfg) #make model parallel if specified if GPUS > 1: #use multigpu model checkpoint ckp_saver = ModelCheckpointMultiGPU( checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) cb.append(ckp_saver) print("Using multi gpu support with {} GPUs".format(GPUS)) # make the model parallel parallel_model = multi_gpu_model(squeeze.model, gpus=GPUS) parallel_model.compile(optimizer=opt, loss=[squeeze.loss], metrics=[ squeeze.loss_without_regularization, squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss ]) #actually do the training parallel_model.fit_generator(train_generator, epochs=EPOCHS, steps_per_epoch=nbatches_train, callbacks=cb) else: # add a checkpoint saver ckp_saver = ModelCheckpoint(checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) cb.append(ckp_saver) print("Using single GPU") #compile model from squeeze object, loss is not a function of model directly squeeze.model.compile(optimizer=opt, loss=[squeeze.loss], metrics=[ squeeze.loss_without_regularization, squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss ]) #actually do the training squeeze.model.fit_generator(train_generator, epochs=EPOCHS, steps_per_epoch=nbatches_train, callbacks=cb) gc.collect()
def eval(): """ Checks for keras checkpoints in a tensorflow dir and evaluates losses and given metrics. Also creates visualization and writes everything to tensorboard. """ #create config object cfg = load_dict(CONFIG) #open files with images and ground truths files with full path names with open(img_file) as imgs: img_names = imgs.read().splitlines() imgs.close() with open(gt_file) as gts: gt_names = gts.read().splitlines() gts.close() #if multigpu support, adjust batch size if GPUS > 1: cfg.BATCH_SIZE = GPUS * cfg.BATCH_SIZE #compute number of batches per epoch nbatches_valid, mod = divmod(len(gt_names), cfg.BATCH_SIZE) #if a number for steps was given if steps is not None: nbatches_valid = steps #set gpu to use if no multigpu #hide the other gpus so tensorflow only uses this one os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES #tf config and session config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) K.set_session(sess) #Variables to visualize losses (as metrics) for tensorboard loss_var = tf.Variable( initial_value=0, trainable=False, name='val_loss', dtype=tf.float32 ) loss_without_regularization_var = tf.Variable( initial_value=0, trainable=False, name='val_loss_without_regularization', dtype=tf.float32 ) conf_loss_var = tf.Variable( initial_value=0, trainable=False, name='val_conf_loss', dtype=tf.float32 ) class_loss_var = tf.Variable( initial_value=0, trainable=False, name='val_class_loss', dtype=tf.float32 ) bbox_loss_var = tf.Variable( initial_value=0, trainable=False, name='val_bbox_loss', dtype=tf.float32 ) #create placeholders for metrics. Variables get assigned these. loss_placeholder = tf.placeholder(loss_var.dtype, shape=()) loss_without_regularization_placeholder = tf.placeholder(loss_without_regularization_var.dtype, shape=()) conf_loss_placeholder = tf.placeholder(conf_loss_var.dtype, shape=()) class_loss_placeholder = tf.placeholder(class_loss_var.dtype, shape=()) bbox_loss_placeholder = tf.placeholder(bbox_loss_var.dtype, shape=()) #we have to create the assign ops here and call the assign ops with a feed dict, otherwise memory leak loss_assign_ops = [ loss_var.assign(loss_placeholder), loss_without_regularization_var.assign(loss_without_regularization_placeholder), conf_loss_var.assign(conf_loss_placeholder), class_loss_var.assign(class_loss_placeholder), bbox_loss_var.assign(bbox_loss_placeholder) ] tf.summary.scalar("loss", loss_var) tf.summary.scalar("loss_without_regularization", loss_without_regularization_var) tf.summary.scalar("conf_loss", conf_loss_var) tf.summary.scalar("class_loss", class_loss_var) tf.summary.scalar("bbox_loss", bbox_loss_var) #variables for images to visualize images_with_boxes = tf.Variable( initial_value = np.zeros((cfg.VISUALIZATION_BATCH_SIZE, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3)), name="image", dtype=tf.float32) update_placeholder = tf.placeholder(images_with_boxes.dtype, shape=images_with_boxes.get_shape()) update_images = images_with_boxes.assign(update_placeholder) tf.summary.image("images", images_with_boxes, max_outputs=cfg.VISUALIZATION_BATCH_SIZE ) #variables for precision recall and mean average precision precisions = [] recalls = [] APs = [] f1s= [] #placeholders as above precision_placeholders = [] recall_placeholders = [] AP_placeholders = [] f1_placeholders = [] prmap_assign_ops = [] #add variables, placeholders and assign ops for each class for i, name in enumerate(cfg.CLASS_NAMES): print("Creating tensorboard plots for " + name) precisions.append( tf.Variable( initial_value=0, trainable=False, name="precision/" +name , dtype=tf.float32 )) recalls.append( tf.Variable( initial_value=0, trainable=False, name="recall/" +name , dtype=tf.float32 )) f1s.append( tf.Variable( initial_value=0, trainable=False, name="f1/" +name , dtype=tf.float32 )) APs.append( tf.Variable( initial_value=0, trainable=False, name="AP/" +name , dtype=tf.float32 )) precision_placeholders.append( tf.placeholder(dtype=precisions[i].dtype, shape=precisions[i].shape)) recall_placeholders.append( tf.placeholder(dtype=recalls[i].dtype, shape=recalls[i].shape)) AP_placeholders.append( tf.placeholder(dtype=APs[i].dtype, shape=APs[i].shape)) f1_placeholders.append( tf.placeholder(dtype=f1s[i].dtype, shape=f1s[i].shape)) prmap_assign_ops.append( precisions[i].assign(precision_placeholders[i])) prmap_assign_ops.append(recalls[i].assign(recall_placeholders[i])) prmap_assign_ops.append(APs[i].assign(AP_placeholders[i])) prmap_assign_ops.append(f1s[i].assign(f1_placeholders[i])) #same for mean average precision mAP = tf.Variable( initial_value=0, trainable=False, name="mAP", dtype=tf.float32 ) mAP_placeholder = tf.placeholder(mAP.dtype, shape=()) prmap_assign_ops.append(mAP.assign(mAP_placeholder)) tf.summary.scalar("mAP", mAP) for i, name in enumerate(cfg.CLASS_NAMES): tf.summary.scalar("precision/" + name, precisions[i]) tf.summary.scalar("recall/" + name, recalls[i]) tf.summary.scalar("AP/" + name, APs[i]) tf.summary.scalar("f1/" + name, f1s[i]) merged = tf.summary.merge_all() if STARTWITH is None: #check for tensorboard dir and delete old stuff if tf.gfile.Exists(tensorboard_dir): tf.gfile.DeleteRecursively(tensorboard_dir) tf.gfile.MakeDirs(tensorboard_dir) writer = tf.summary.FileWriter(tensorboard_dir) #instantiate model squeeze = SqueezeDet(cfg) #dummy optimizer for compilation sgd = optimizers.SGD(lr=cfg.LEARNING_RATE, decay=0, momentum=cfg.MOMENTUM, nesterov=False, clipnorm=cfg.MAX_GRAD_NORM) if GPUS > 1: #parallelize model model = multi_gpu_model(squeeze.model, gpus=GPUS) model.compile(optimizer=sgd, loss=[squeeze.loss], metrics=[squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss, squeeze.loss_without_regularization]) else: #compile model from squeeze object, loss is not a function of model directly squeeze.model.compile(optimizer=sgd, loss=[squeeze.loss], metrics=[squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss, squeeze.loss_without_regularization]) model = squeeze.model #models already evaluated evaluated_models = set() #get the best ckpts for test set best_val_loss_ckpt = None best_val_loss = np.inf best_mAP_ckpt = None best_mAP = -np.inf time_out_counter = 0 #use this for saving metrics to a csv f = open( log_dir_name + "/metrics.csv", "w") header = "epoch;regularized;loss;bbox;class;conf;" for i, name in enumerate(cfg.CLASS_NAMES): header += name +"_precision;" + name+"_recall;" + name + "_AP;" + name + "_f1;" header += "\n" f.write(header) #listening for new checkpoints #evaluate on test set if TESTING: ckpts = set(ckpts) #get test images and gt with open(img_file_test) as imgs: img_names_test = imgs.read().splitlines() imgs.close() with open(gt_file_test) as gts: gt_names_test = gts.read().splitlines() gts.close() #compute number of batches per epoch nbatches_test, mod = divmod(len(gt_names_test), cfg.BATCH_SIZE) #if a number for steps was given if steps is not None: nbatches_test = steps #again create Variables to visualize losses for tensorboard, but this time for test set test_loss_var = tf.Variable( initial_value=0, trainable=False, name='test_loss', dtype=tf.float32 ) test_loss_without_regularization_var = tf.Variable( initial_value=0, trainable=False, name='test_loss_without_regularization', dtype=tf.float32 ) test_conf_loss_var = tf.Variable( initial_value=0, trainable=False, name='test_conf_loss', dtype=tf.float32 ) test_class_loss_var = tf.Variable( initial_value=0, trainable=False, name='test_class_loss', dtype=tf.float32 ) test_bbox_loss_var = tf.Variable( initial_value=0, trainable=False, name='test_bbox_loss', dtype=tf.float32 ) #we have to create the assign ops here and call the assign ops with a feed dictg, otherwise memory leak test_loss_placeholder = tf.placeholder(loss_var.dtype, shape=()) test_loss_without_regularization_placeholder = tf.placeholder(loss_without_regularization_var.dtype, shape=()) test_conf_loss_placeholder = tf.placeholder(conf_loss_var.dtype, shape=()) test_class_loss_placeholder = tf.placeholder(class_loss_var.dtype, shape=()) test_bbox_loss_placeholder = tf.placeholder(bbox_loss_var.dtype, shape=()) test_loss_assign_ops = [ test_loss_var.assign(test_loss_placeholder), test_loss_without_regularization_var.assign(test_loss_without_regularization_placeholder), test_conf_loss_var.assign(test_conf_loss_placeholder), test_class_loss_var.assign(test_class_loss_placeholder), test_bbox_loss_var.assign(test_bbox_loss_placeholder) ] tf.summary.scalar("test/loss", loss_var, collections=["test"]) tf.summary.scalar("test/loss_without_regularization", loss_without_regularization_var, collections=["test"]) tf.summary.scalar("test/conf_loss", conf_loss_var, collections=["test"]) tf.summary.scalar("test/class_loss", class_loss_var, collections=["test"]) tf.summary.scalar("test/bbox_loss", bbox_loss_var, collections=["test"]) #variables for precision recall and mean average precision precisions = [] recalls = [] APs = [] f1s= [] precision_placeholders = [] recall_placeholders = [] AP_placeholders = [] f1_placeholders = [] prmap_assign_ops = [] for i, name in enumerate(cfg.CLASS_NAMES): precisions.append( tf.Variable( initial_value=0, trainable=False, name="test/precision/" +name , dtype=tf.float32 )) recalls.append( tf.Variable( initial_value=0, trainable=False, name="test/recall/" +name , dtype=tf.float32 )) f1s.append( tf.Variable( initial_value=0, trainable=False, name="test/f1/" +name , dtype=tf.float32 )) APs.append( tf.Variable( initial_value=0, trainable=False, name="test/AP/" +name , dtype=tf.float32 )) precision_placeholders.append( tf.placeholder(dtype=precisions[i].dtype, shape=precisions[i].shape)) recall_placeholders.append( tf.placeholder(dtype=recalls[i].dtype, shape=recalls[i].shape)) AP_placeholders.append( tf.placeholder(dtype=APs[i].dtype, shape=APs[i].shape)) f1_placeholders.append( tf.placeholder(dtype=f1s[i].dtype, shape=f1s[i].shape)) prmap_assign_ops.append(precisions[i].assign(precision_placeholders[i])) prmap_assign_ops.append(recalls[i].assign(recall_placeholders[i])) prmap_assign_ops.append(APs[i].assign(AP_placeholders[i])) prmap_assign_ops.append(f1s[i].assign(f1_placeholders[i])) test_mAP = tf.Variable( initial_value=0, trainable=False, name="mAP", dtype=tf.float32 ) mAP_placeholder = tf.placeholder(test_mAP.dtype, shape=()) prmap_assign_ops.append(test_mAP.assign(mAP_placeholder)) tf.summary.scalar("test/mAP", mAP, collections=["test"]) tf.summary.scalar("test/precision/" + name, precisions[i], collections=["test"]) tf.summary.scalar("test/recall/" + name, recalls[i], collections=["test"]) tf.summary.scalar("test/AP/" + name, APs[i], collections=["test"]) tf.summary.scalar("test/f1/" + name, f1s[i], collections=["test"]) merged = tf.summary.merge_all(key="test") #check for tensorboard dir and delete old stuff if tf.gfile.Exists(tensorboard_dir_test): tf.gfile.DeleteRecursively(tensorboard_dir_test) tf.gfile.MakeDirs(tensorboard_dir_test) writer = tf.summary.FileWriter(tensorboard_dir_test) i=1 #go through given checkpoints print("Evaluating model {} on test data".format(ckpt) ) #load this ckpt current_model = ckpt squeeze.model.load_weights(checkpoint_dir + "/"+ ckpt) # create 2 validation generators, one for metrics and one for object detection evaluation # we have to reset them each time to have the same data val_generator_1 = generator_from_data_path(img_names_test, gt_names_test, config=cfg) val_generator_2 = generator_from_data_path(img_names_test, gt_names_test, config=cfg) # create a generator for the visualization of bounding boxes print(" Evaluate losses...") #compute losses of whole val set losses = model.evaluate_generator(val_generator_1, steps=nbatches_test, max_queue_size=10, use_multiprocessing=False) #manually add losses to tensorboard sess.run(loss_assign_ops , {loss_placeholder: losses[0], loss_without_regularization_placeholder: losses[4], conf_loss_placeholder: losses[3], class_loss_placeholder: losses[2], bbox_loss_placeholder: losses[1]}) print(" Losses:") print(" Loss with regularization: {} val loss:{} \n bbox_loss:{} \n class_loss:{} \n conf_loss:{}". format(losses[0], losses[4], losses[1], losses[2], losses[3]) ) #compute precision recall and mean average precision precision, recall, f1, AP = evaluate(model=model, generator=val_generator_2, steps=nbatches_test, config=cfg) #create feed dict for visualization prmap_feed_dict = {} for i, name in enumerate(cfg.CLASS_NAMES): prmap_feed_dict[precision_placeholders[i]] = precision[i] prmap_feed_dict[recall_placeholders[i]] = recall[i] prmap_feed_dict[AP_placeholders[i]] = AP[i,1] prmap_feed_dict[f1_placeholders[i]] = f1[i] prmap_feed_dict[mAP_placeholder] = np.mean(AP[:,1], axis=0) sess.run(prmap_assign_ops, prmap_feed_dict) #write everything to tensorboard writer.add_summary(merged.eval(session=sess), i) writer.flush() i+=1
def train(): """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch """ checkpoint_dir = log_dir_name + "/checkpoints" tb_dir = log_dir_name + "/tensorboard" if tf.gfile.Exists(checkpoint_dir): tf.gfile.DeleteRecursively(checkpoint_dir) if tf.gfile.Exists(tb_dir): tf.gfile.DeleteRecursively(tb_dir) tf.gfile.MakeDirs(tb_dir) tf.gfile.MakeDirs(checkpoint_dir) with open(img_file) as imgs: img_names = imgs.read().splitlines() imgs.close() with open(gt_file) as gts: gt_names = gts.read().splitlines() gts.close() cfg = load_dict(CONFIG) cfg.img_file = img_file cfg.gt_file = gt_file cfg.images = img_names cfg.gts = gt_names cfg.init_file = init_file cfg.EPOCHS = EPOCHS cfg.OPTIMIZER = OPTIMIZER cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES cfg.GPUS = GPUS cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU if GPUS < 2: os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES else: gpus = "" for i in range(GPUS): gpus += str(i) + "," os.environ['CUDA_VISIBLE_DEVICES'] = gpus cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE) if STEPS is not None: nbatches_train = STEPS cfg.STEPS = nbatches_train print("Number of images: {}".format(len(img_names))) print("Number of epochs: {}".format(EPOCHS)) print("Number of batches: {}".format(nbatches_train)) print("Batch size: {}".format(cfg.BATCH_SIZE)) config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) K.set_session(sess) squeeze = SqueezeDet(cfg) cb = [] if OPTIMIZER == "adam": opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "rmsprop": opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "adagrad": opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 1 * GPUS else: opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS, decay=0, momentum=cfg.MOMENTUM, nesterov=False, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = cfg.LEARNING_RATE * GPUS print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS)) with open(log_dir_name + '/config.pkl', 'wb') as f: pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL) tbCallBack = TensorBoard(log_dir=tb_dir, histogram_freq=0, write_graph=True, write_images=True) cb.append(tbCallBack) if REDUCELRONPLATEAU: reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, verbose=1, patience=5, min_lr=0.0) cb.append(reduce_lr) if VERBOSE: print(squeeze.model.summary()) if init_file != "none": print("Weights initialized by name from {}".format(init_file)) load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE) """ for layer in squeeze.model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if "fire10" in layer.name or "fire11" in layer.name or "conv12" in layer.name: if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=sess) #print('reinitializing layer {}.{}'.format(layer.name, v)) """ train_generator = generator_from_data_path(img_names, gt_names, config=cfg) if GPUS > 1: ckp_saver = ModelCheckpointMultiGPU( checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) cb.append(ckp_saver) print("Using multi gpu support with {} GPUs".format(GPUS)) parallel_model = multi_gpu_model(squeeze.model, gpus=GPUS) parallel_model.compile(optimizer=opt, loss=[squeeze.loss], metrics=[ squeeze.loss_without_regularization, squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss ]) parallel_model.fit_generator(train_generator, epochs=EPOCHS, steps_per_epoch=nbatches_train, callbacks=cb) else: ckp_saver = ModelCheckpoint(checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) cb.append(ckp_saver) print("Using single GPU") squeeze.model.compile(optimizer=opt, loss=[squeeze.loss], metrics=[ squeeze.loss_without_regularization, squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss ]) squeeze.model.fit_generator(train_generator, epochs=EPOCHS, steps_per_epoch=nbatches_train, callbacks=cb) gc.collect()
#/_/ \_,_/_/ \_,_/_/_/_/\__/\__/\__/_/ /___/ ################################################# wheights_path = cur_dir+'/data/'+"sqd_1k_anchors_176eps.hdf5" CONFIG = cur_dir+'/data/'+"squeeze.config" ################################################# #wheights_path = 'kitti.hdf5' #wheights_path = "log/checkpoints/kitti.hdf5" #wheights_path= "squeezedet_start_from_kitti_50eps.hdf5" #wheights_path ="model.10-11.06.hdf5" #wheights_path ="sqd_300eps_correct_ann.hdf5" #wheights_path ="sqd_1k_anchors_176eps.hdf5" #wheights_path = "sqd_400eps_lr01_start_from_kitti.hdf5" #wheights_path = "sqd_350eps.hdf5" # create config object cfg = load_dict(CONFIG) squeeze = SqueezeDet(cfg) # dummy optimizer for compilation sgd = optimizers.SGD(lr=cfg.LEARNING_RATE, decay=0, momentum=cfg.MOMENTUM, nesterov=False, clipnorm=cfg.MAX_GRAD_NORM) squeeze.model.compile(optimizer=sgd, loss=[squeeze.loss], metrics=[squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss, squeeze.loss_without_regularization]) model = squeeze.model i = 0 squeeze.model.load_weights(wheights_path) def file_len(fname):
def train(): """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch """ #create subdirs for logging of checkpoints and tensorboard stuff checkpoint_dir = log_dir_name + "/checkpoints" tb_dir = log_dir_name + "/tensorboard" #delete old checkpoints and tensorboard stuff if tf.gfile.Exists(checkpoint_dir): tf.gfile.DeleteRecursively(checkpoint_dir) if tf.gfile.Exists(tb_dir): tf.gfile.DeleteRecursively(tb_dir) tf.gfile.MakeDirs(tb_dir) tf.gfile.MakeDirs(checkpoint_dir) #open files with images and ground truths files with full path names with open(img_file) as imgs: img_names = imgs.read().splitlines() imgs.close() with open(gt_file) as gts: gt_names = gts.read().splitlines() gts.close() #create config object cfg = load_dict(CONFIG) print('cfg.ANCHORS:', cfg.ANCHORS) #add stuff for documentation to config cfg.img_file = img_file cfg.gt_file = gt_file cfg.images = img_names cfg.gts = gt_names cfg.init_file = init_file cfg.EPOCHS = EPOCHS cfg.OPTIMIZER = OPTIMIZER cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES cfg.GPUS = GPUS cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES #scale batch size to gpus cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS #compute number of batches per epoch nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE) if STEPS is not None: nbatches_train = STEPS cfg.STEPS = nbatches_train #print some run info print("Number of images: {}".format(len(img_names))) print("Number of epochs: {}".format(EPOCHS)) print("Number of batches: {}".format(nbatches_train)) print("Batch size: {}".format(cfg.BATCH_SIZE)) #tf config and session config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) K.set_session(sess) #instantiate model squeeze = SqueezeDet(cfg) #callbacks cb = [] #set optimizer #multiply by number of workers do adjust for increased batch size if OPTIMIZER == "adam": opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "rmsprop": opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 0.001 * GPUS if OPTIMIZER == "adagrad": opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = 1 * GPUS #use default is nothing is given else: # create sgd with momentum and gradient clipping opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS, decay=0, momentum=cfg.MOMENTUM, nesterov=False, clipnorm=cfg.MAX_GRAD_NORM) cfg.LR = cfg.LEARNING_RATE * GPUS print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS)) #save config file to log dir with open(log_dir_name + '/config.pkl', 'wb') as f: pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL) #add tensorboard callback tbCallBack = TensorBoard(log_dir=tb_dir, histogram_freq=0, write_graph=True, write_images=True) cb.append(tbCallBack) #if flag was given, add reducelronplateu callback #当学习停滞时,减少2倍或10倍的学习率常常能获得较好的效果。该回调函数检测指标的情况,如果在5个epoch中看不到模型性能提升,则减少学习率 if REDUCELRONPLATEAU: reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, verbose=1, patience=5, min_lr=0.0) cb.append(reduce_lr) #print keras model summary if VERBOSE: print(squeeze.model.summary()) if init_file != "none": print("Weights initialized by name from {}".format(init_file)) load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE) #create train generator train_generator = generator_from_data_path(img_names, gt_names, config=cfg) # add a checkpoint saver ckp_saver = ModelCheckpoint(checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) cb.append(ckp_saver) print("Using single GPU") #compile model from squeeze object, loss is not a function of model directly squeeze.model.compile(optimizer=opt, loss=[squeeze.loss], metrics=[ squeeze.loss_without_regularization, squeeze.bbox_loss, squeeze.class_loss, squeeze.conf_loss ]) #actually do the training squeeze.model.fit_generator(train_generator, epochs=EPOCHS, steps_per_epoch=nbatches_train, callbacks=cb) gc.collect()