示例#1
0
def train():
    """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch
    """

    checkpoint_dir = log_dir_name + "/checkpoints"
    tb_dir = log_dir_name + "/tensorboard"

    if tf.gfile.Exists(checkpoint_dir):
        tf.gfile.DeleteRecursively(checkpoint_dir)

    if tf.gfile.Exists(tb_dir):
        tf.gfile.DeleteRecursively(tb_dir)

    tf.gfile.MakeDirs(tb_dir)
    tf.gfile.MakeDirs(checkpoint_dir)

    with open(img_file) as imgs:
        img_names = imgs.read().splitlines()
    imgs.close()
    with open(gt_file) as gts:
        gt_names = gts.read().splitlines()
    gts.close()

    cfg = load_dict(CONFIG)

    cfg.img_file = img_file
    cfg.gt_file = gt_file
    cfg.images = img_names
    cfg.gts = gt_names
    cfg.init_file = init_file
    cfg.EPOCHS = EPOCHS
    cfg.OPTIMIZER = OPTIMIZER
    cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES
    cfg.GPUS = GPUS
    cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU

    if GPUS < 2:

        os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES

    else:

        gpus = ""
        for i in range(GPUS):
            gpus += str(i) + ","
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus

    cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS

    nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE)

    if STEPS is not None:
        nbatches_train = STEPS

    cfg.STEPS = nbatches_train

    print("Number of images: {}".format(len(img_names)))
    print("Number of epochs: {}".format(EPOCHS))
    print("Number of batches: {}".format(nbatches_train))
    print("Batch size: {}".format(cfg.BATCH_SIZE))

    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    K.set_session(sess)

    squeeze = SqueezeDet(cfg)

    cb = []

    if OPTIMIZER == "adam":
        opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS
    if OPTIMIZER == "rmsprop":
        opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS

    if OPTIMIZER == "adagrad":
        opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 1 * GPUS

    else:

        opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS,
                             decay=0,
                             momentum=cfg.MOMENTUM,
                             nesterov=False,
                             clipnorm=cfg.MAX_GRAD_NORM)

        cfg.LR = cfg.LEARNING_RATE * GPUS

        print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS))

    with open(log_dir_name + '/config.pkl', 'wb') as f:
        pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL)

    tbCallBack = TensorBoard(log_dir=tb_dir,
                             histogram_freq=0,
                             write_graph=True,
                             write_images=True)

    cb.append(tbCallBack)

    if REDUCELRONPLATEAU:

        reduce_lr = ReduceLROnPlateau(monitor='loss',
                                      factor=0.1,
                                      verbose=1,
                                      patience=5,
                                      min_lr=0.0)

        cb.append(reduce_lr)

    if VERBOSE:
        print(squeeze.model.summary())

    if init_file != "none":

        print("Weights initialized by name from {}".format(init_file))

        load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE)
        """
        for layer in squeeze.model.layers:
            for v in layer.__dict__:
                v_arg = getattr(layer, v)
                if "fire10" in layer.name or "fire11" in layer.name or "conv12" in layer.name:
                    if hasattr(v_arg, 'initializer'):
                        initializer_method = getattr(v_arg, 'initializer')
                        initializer_method.run(session=sess)
                        #print('reinitializing layer {}.{}'.format(layer.name, v))
        

        """

    train_generator = generator_from_data_path(img_names, gt_names, config=cfg)

    if GPUS > 1:

        ckp_saver = ModelCheckpointMultiGPU(
            checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5",
            monitor='loss',
            verbose=0,
            save_best_only=False,
            save_weights_only=True,
            mode='auto',
            period=1)

        cb.append(ckp_saver)

        print("Using multi gpu support with {} GPUs".format(GPUS))

        parallel_model = multi_gpu_model(squeeze.model, gpus=GPUS)
        parallel_model.compile(optimizer=opt,
                               loss=[squeeze.loss],
                               metrics=[
                                   squeeze.loss_without_regularization,
                                   squeeze.bbox_loss, squeeze.class_loss,
                                   squeeze.conf_loss
                               ])

        parallel_model.fit_generator(train_generator,
                                     epochs=EPOCHS,
                                     steps_per_epoch=nbatches_train,
                                     callbacks=cb)

    else:

        ckp_saver = ModelCheckpoint(checkpoint_dir +
                                    "/model.{epoch:02d}-{loss:.2f}.hdf5",
                                    monitor='loss',
                                    verbose=0,
                                    save_best_only=False,
                                    save_weights_only=True,
                                    mode='auto',
                                    period=1)
        cb.append(ckp_saver)

        print("Using single GPU")
        squeeze.model.compile(optimizer=opt,
                              loss=[squeeze.loss],
                              metrics=[
                                  squeeze.loss_without_regularization,
                                  squeeze.bbox_loss, squeeze.class_loss,
                                  squeeze.conf_loss
                              ])

        squeeze.model.fit_generator(train_generator,
                                    epochs=EPOCHS,
                                    steps_per_epoch=nbatches_train,
                                    callbacks=cb)

    gc.collect()
def train():
    """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch
    """

    #create subdirs for logging of checkpoints and tensorboard stuff
    checkpoint_dir = log_dir_name + "/checkpoints"
    tb_dir = log_dir_name + "/tensorboard"

    #delete old checkpoints and tensorboard stuff
    if tf.gfile.Exists(checkpoint_dir):
        # tf.gfile.re(checkpoint_dir)
        tf.gfile.DeleteRecursively(checkpoint_dir)
    if tf.gfile.Exists(tb_dir):
        # tf.gfile.remove(tb_dir)
        tf.gfile.DeleteRecursively(tb_dir)

    tf.gfile.MakeDirs(tb_dir)
    tf.gfile.MakeDirs(checkpoint_dir)

    #open files with images and ground truths files with full path names
    # with open(img_file) as imgs:
    #     img_names = imgs.read().splitlines()
    # imgs.close()
    # with open(gt_file) as gts:
    #     gt_names = gts.read().splitlines()
    # gts.close()
    img_names = glob.glob(img_files + '/*.png')
    img_names.sort()
    gt_names = glob.glob(lbl_files + '/*.txt')
    gt_names.sort()

    #create config object
    cfg = load_dict(CONFIG)

    #add stuff for documentation to config
    cfg.img_file = img_file
    cfg.gt_file = gt_file
    cfg.images = img_names
    cfg.gts = gt_names
    cfg.init_file = init_file
    cfg.EPOCHS = EPOCHS
    cfg.OPTIMIZER = OPTIMIZER
    cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES
    cfg.GPUS = GPUS
    cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU

    #set gpu
    if GPUS < 2:
        os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES

    else:

        gpus = ""
        for i in range(GPUS):
            gpus += str(i) + ","
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus

    #scale batch size to gpus
    cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS

    #compute number of batches per epoch
    nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE)

    if STEPS is not None:
        nbatches_train = STEPS

    cfg.STEPS = nbatches_train

    #print some run info
    print("Number of images: {}".format(len(img_names)))
    print("Number of epochs: {}".format(EPOCHS))
    print("Number of batches: {}".format(nbatches_train))
    print("Batch size: {}".format(cfg.BATCH_SIZE))

    #tf config and session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    K.set_session(sess)

    #instantiate model
    squeeze = SqueezeDet(cfg)

    #callbacks
    cb = []

    #set optimizer
    #multiply by number of workers do adjust for increased batch size
    if OPTIMIZER == "adam":
        opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS
    if OPTIMIZER == "rmsprop":
        opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS

    if OPTIMIZER == "adagrad":
        opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 1 * GPUS

    #use default is nothing is given
    else:

        # create sgd with momentum and gradient clipping
        opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS,
                             decay=0,
                             momentum=cfg.MOMENTUM,
                             nesterov=False,
                             clipnorm=cfg.MAX_GRAD_NORM)

        cfg.LR = cfg.LEARNING_RATE * GPUS

        print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS))

        #add manuall learning rate decay
        #lrCallback = LearningRateScheduler(schedule)
        #cb.append(lrCallback)

    #save config file to log dir
    with open(log_dir_name + '/config.pkl', 'wb') as f:
        pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL)

    #add tensorboard callback
    tbCallBack = TensorBoard(log_dir=tb_dir,
                             histogram_freq=0,
                             write_graph=True,
                             write_images=True)

    cb.append(tbCallBack)

    #if flag was given, add reducelronplateu callback
    if REDUCELRONPLATEAU:

        reduce_lr = ReduceLROnPlateau(monitor='loss',
                                      factor=0.1,
                                      verbose=1,
                                      patience=5,
                                      min_lr=0.0)

        cb.append(reduce_lr)

    #print keras model summary
    if VERBOSE:
        print(squeeze.model.summary())

    if init_file != "none":

        print("Weights initialized by name from {}".format(init_file))

        load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE)

        #since these layers already existed in the ckpt they got loaded, you can reinitialized them. TODO set flag for that
        """
        for layer in squeeze.model.layers:
            for v in layer.__dict__:
                v_arg = getattr(layer, v)
                if "fire10" in layer.name or "fire11" in layer.name or "conv12" in layer.name:
                    if hasattr(v_arg, 'initializer'):
                        initializer_method = getattr(v_arg, 'initializer')
                        initializer_method.run(session=sess)
                        #print('reinitializing layer {}.{}'.format(layer.name, v))
        

        """

    #create train generator
    train_generator = generator_from_data_path(img_names, gt_names, config=cfg)

    #make model parallel if specified
    if GPUS > 1:

        #use multigpu model checkpoint
        ckp_saver = ModelCheckpointMultiGPU(
            checkpoint_dir + "/model.{epoch:02d}-{loss:.2f}.hdf5",
            monitor='loss',
            verbose=0,
            save_best_only=False,
            save_weights_only=True,
            mode='auto',
            period=1)

        cb.append(ckp_saver)

        print("Using multi gpu support with {} GPUs".format(GPUS))

        # make the model parallel
        parallel_model = multi_gpu_model(squeeze.model, gpus=GPUS)
        parallel_model.compile(optimizer=opt,
                               loss=[squeeze.loss],
                               metrics=[
                                   squeeze.loss_without_regularization,
                                   squeeze.bbox_loss, squeeze.class_loss,
                                   squeeze.conf_loss
                               ])

        #actually do the training
        parallel_model.fit_generator(train_generator,
                                     epochs=EPOCHS,
                                     steps_per_epoch=nbatches_train,
                                     callbacks=cb)

    else:

        # add a checkpoint saver
        ckp_saver = ModelCheckpoint(checkpoint_dir +
                                    "/model.{epoch:02d}-{loss:.2f}.hdf5",
                                    monitor='loss',
                                    verbose=0,
                                    save_best_only=False,
                                    save_weights_only=True,
                                    mode='auto',
                                    period=1)
        cb.append(ckp_saver)

        print("Using single GPU")
        #compile model from squeeze object, loss is not a function of model directly
        squeeze.model.compile(optimizer=opt,
                              loss=[squeeze.loss],
                              metrics=[
                                  squeeze.loss_without_regularization,
                                  squeeze.bbox_loss, squeeze.class_loss,
                                  squeeze.conf_loss
                              ])

        #actually do the training
        squeeze.model.fit_generator(train_generator,
                                    epochs=EPOCHS,
                                    steps_per_epoch=nbatches_train,
                                    callbacks=cb)

    gc.collect()
示例#3
0
def train():
    """Def trains a Keras model of SqueezeDet and stores the checkpoint after each epoch
    """

    #create subdirs for logging of checkpoints and tensorboard stuff
    checkpoint_dir = log_dir_name + "/checkpoints"
    tb_dir = log_dir_name + "/tensorboard"

    #delete old checkpoints and tensorboard stuff
    if tf.gfile.Exists(checkpoint_dir):
        tf.gfile.DeleteRecursively(checkpoint_dir)

    if tf.gfile.Exists(tb_dir):
        tf.gfile.DeleteRecursively(tb_dir)

    tf.gfile.MakeDirs(tb_dir)
    tf.gfile.MakeDirs(checkpoint_dir)

    #open files with images and ground truths files with full path names
    with open(img_file) as imgs:
        img_names = imgs.read().splitlines()
    imgs.close()
    with open(gt_file) as gts:
        gt_names = gts.read().splitlines()
    gts.close()

    #create config object
    cfg = load_dict(CONFIG)
    print('cfg.ANCHORS:', cfg.ANCHORS)
    #add stuff for documentation to config
    cfg.img_file = img_file
    cfg.gt_file = gt_file
    cfg.images = img_names
    cfg.gts = gt_names
    cfg.init_file = init_file
    cfg.EPOCHS = EPOCHS
    cfg.OPTIMIZER = OPTIMIZER
    cfg.CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES
    cfg.GPUS = GPUS
    cfg.REDUCELRONPLATEAU = REDUCELRONPLATEAU
    os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES
    #scale batch size to gpus
    cfg.BATCH_SIZE = cfg.BATCH_SIZE * GPUS
    #compute number of batches per epoch
    nbatches_train, mod = divmod(len(img_names), cfg.BATCH_SIZE)

    if STEPS is not None:
        nbatches_train = STEPS

    cfg.STEPS = nbatches_train

    #print some run info
    print("Number of images: {}".format(len(img_names)))
    print("Number of epochs: {}".format(EPOCHS))
    print("Number of batches: {}".format(nbatches_train))
    print("Batch size: {}".format(cfg.BATCH_SIZE))

    #tf config and session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    K.set_session(sess)

    #instantiate model
    squeeze = SqueezeDet(cfg)

    #callbacks
    cb = []

    #set optimizer
    #multiply by number of workers do adjust for increased batch size
    if OPTIMIZER == "adam":
        opt = optimizers.Adam(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS
    if OPTIMIZER == "rmsprop":
        opt = optimizers.RMSprop(lr=0.001 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 0.001 * GPUS
    if OPTIMIZER == "adagrad":
        opt = optimizers.Adagrad(lr=1.0 * GPUS, clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = 1 * GPUS
    #use default is nothing is given
    else:
        # create sgd with momentum and gradient clipping
        opt = optimizers.SGD(lr=cfg.LEARNING_RATE * GPUS,
                             decay=0,
                             momentum=cfg.MOMENTUM,
                             nesterov=False,
                             clipnorm=cfg.MAX_GRAD_NORM)
        cfg.LR = cfg.LEARNING_RATE * GPUS
        print("Learning rate: {}".format(cfg.LEARNING_RATE * GPUS))

    #save config file to log dir
    with open(log_dir_name + '/config.pkl', 'wb') as f:
        pickle.dump(cfg, f, pickle.HIGHEST_PROTOCOL)

    #add tensorboard callback
    tbCallBack = TensorBoard(log_dir=tb_dir,
                             histogram_freq=0,
                             write_graph=True,
                             write_images=True)
    cb.append(tbCallBack)

    #if flag was given, add reducelronplateu callback
    #当学习停滞时,减少2倍或10倍的学习率常常能获得较好的效果。该回调函数检测指标的情况,如果在5个epoch中看不到模型性能提升,则减少学习率
    if REDUCELRONPLATEAU:
        reduce_lr = ReduceLROnPlateau(monitor='loss',
                                      factor=0.1,
                                      verbose=1,
                                      patience=5,
                                      min_lr=0.0)
        cb.append(reduce_lr)

    #print keras model summary
    if VERBOSE:
        print(squeeze.model.summary())

    if init_file != "none":
        print("Weights initialized by name from {}".format(init_file))
        load_only_possible_weights(squeeze.model, init_file, verbose=VERBOSE)
    #create train generator
    train_generator = generator_from_data_path(img_names, gt_names, config=cfg)
    # add a checkpoint saver
    ckp_saver = ModelCheckpoint(checkpoint_dir +
                                "/model.{epoch:02d}-{loss:.2f}.hdf5",
                                monitor='loss',
                                verbose=0,
                                save_best_only=False,
                                save_weights_only=True,
                                mode='auto',
                                period=1)
    cb.append(ckp_saver)
    print("Using single GPU")
    #compile model from squeeze object, loss is not a function of model directly
    squeeze.model.compile(optimizer=opt,
                          loss=[squeeze.loss],
                          metrics=[
                              squeeze.loss_without_regularization,
                              squeeze.bbox_loss, squeeze.class_loss,
                              squeeze.conf_loss
                          ])

    #actually do the training
    squeeze.model.fit_generator(train_generator,
                                epochs=EPOCHS,
                                steps_per_epoch=nbatches_train,
                                callbacks=cb)
    gc.collect()