示例#1
0
    def find_last(self, verbose=0):
        '''
        Finds the last checkpoint file of the last trained model in the
        model directory.
        
        Returns:
        --------
            log_dir: The directory where events and weights are saved
            checkpoint_path: the path to the last checkpoint file
        '''
        if verbose:
            print('>>> find_last checkpoint in : ', self.model_dir)
        # Get directory names. Each directory corresponds to a model
        dir_name, checkpoint = None, None
        dir_names = next(os.walk(self.model_dir))[1]
        key = self.config.NAME.lower()
        dir_names = list(filter(lambda f: f.startswith(key), dir_names))

        if verbose:
            print('>>> find_last checkpoint in : ', self.model_dir)
            print('    Dir starting with       : ', key, ' :', dir_names)

        dir_names = sorted(dir_names)
        if not dir_names:
            return None, None

        ## Loop over folders to find most recent foder with a valid weights file
        for search_dir in dir_names[-1::-1]:
            dir_name = os.path.join(self.model_dir, search_dir)
            # Find the last checkpoint in this dir

            checkpoints = next(os.walk(dir_name))[2]
            checkpoints = filter(lambda f: f.startswith(key), checkpoints)

            checkpoints = sorted(checkpoints)
            if verbose:
                print('    Folder: ', dir_name)
                print('    Checkpoints: ', checkpoints)
            if not checkpoints:
                continue
                # return dir_name, None
            checkpoint = os.path.join(dir_name, checkpoints[-1])
            break

        # old method
        # dir_name = os.path.join(self.model_dir, dir_names[-1])
        # Find the last checkpoint
        # checkpoints = next(os.walk(dir_name))[2]
        # checkpoints = filter(lambda f: f.startswith(key), checkpoints)
        # checkpoints = sorted(checkpoints)
        # if not checkpoints:
        # return dir_name, None
        # checkpoint = os.path.join(dir_name, checkpoints[-1])
        if verbose:
            log("    find_last():   dir_name: {}".format(
                'NotFound' if dir_name is None else dir_name))
            log("    find_last(): checkpoint: {}".format(
                'NotFound' if checkpoint is None else checkpoint))

        return dir_name, checkpoint
示例#2
0
    def compile_only(self, learning_rate, layers):
        '''
        Compile the model without adding loss info
        learning_rate:  The learning rate to train with
        
        layers:         Allows selecting wich layers to train. It can be:
                        - A regular expression to match layer names to train
                        - One of these predefined values:
                        heads: The RPN, classifier and mask heads of the network
                        all: All the layers
                        3+: Train Resnet stage 3 and up
                        4+: Train Resnet stage 4 and up
                        5+: Train Resnet stage 5 and up
        '''
        # Use Pre-defined layer regular expressions
        if layers in self.layer_regex.keys():
            layers = self.layer_regex[layers]

        # Train
        log("Compile with learing rate; {} Learning Moementum: {} ".format(
            learning_rate, self.config.LEARNING_MOMENTUM))
        log("Checkpoint Folder:  {} ".format(self.checkpoint_path))

        self.set_trainable(layers)
        self.compile(learning_rate, self.config.LEARNING_MOMENTUM)

        out_labels = self.get_deduped_metrics_names()
        callback_metrics = out_labels + ['val_' + n for n in out_labels]
        print(
            'Callback_metrics are:  ( val + _get_deduped_metrics_names() )\n')
        pp.pprint(callback_metrics)
        return
示例#3
0
    def set_log_dir(self, model_path=None):
        """Sets the model log directory and epoch counter.

        model_path: If None, or a format different from what this code uses
            then set a new log directory and start epochs from 0. Otherwise,
            extract the log directory and the epoch counter from the file
            name.
        """
        # Set date and epoch counter as if starting a new model
        # print('>>> Set_log_dir() -- model dir is ', self.model_dir)
        # print('    model_path           :   ', model_path)
        # print('    config.LAST_EPOCH_RAN:   ', self.config.LAST_EPOCH_RAN)

        self.tb_dir = os.path.join(self.model_dir, 'tensorboard')
        # self.epoch  = 0
        last_checkpoint_epoch = 0
        now = datetime.datetime.now()

        # If we have a model path with date and epochs use them

        if model_path:
            # Continue from we left off. Get epoch and date from the file name
            # A sample model path might look like:
            # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5
            model_path = model_path.replace('\\', "/")
            # print('    set_log_dir: model_path (input) is : {}  '.format(model_path))

            regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_\w+(\d{4})\.h5"
            m = re.match(regex, model_path)
            if m:
                now = datetime.datetime(int(m.group(1)), int(m.group(2)),
                                        int(m.group(3)), int(m.group(4)),
                                        int(m.group(5)))
                last_checkpoint_epoch = int(m.group(6)) + 1
            # print('    set_log_dir: self.epoch set to {}  (Next epoch to run)'.format(self.epoch))
            # print('    set_log_dir: tensorboard path: {}'.format(self.tb_dir))

        if last_checkpoint_epoch > 0 and self.config.LAST_EPOCH_RAN > last_checkpoint_epoch:
            self.epoch = self.config.LAST_EPOCH_RAN
        else:
            self.epoch = last_checkpoint_epoch

        # Directory for training logs
        self.log_dir = os.path.join(
            self.model_dir, "{}{:%Y%m%dT%H%M}".format(self.config.NAME.lower(),
                                                      now))

        # Path to save after each epoch. Include placeholders that get filled by Keras.
        self.checkpoint_path = os.path.join(
            self.log_dir,
            "mask_rcnn_{}_*epoch*.h5".format(self.config.NAME.lower()))
        self.checkpoint_path = self.checkpoint_path.replace(
            "*epoch*", "{epoch:04d}")
        log('    set_log_dir: Checkpoint path set to : {}'.format(
            self.checkpoint_path))
        log('    set_log_dir: self.epoch set to {} '.format(self.epoch))
示例#4
0
    def run_graph(self, images, outputs):
        '''Runs a sub-set of the computation graph that computes the given
        outputs.

        outputs: List of tuples (name, tensor) to compute. The tensors are
            symbolic TensorFlow tensors and the names are for easy tracking.

        Returns an ordered dict of results. Keys are the names received in the
        input and values are Numpy arrays.
        '''
        model = self.keras_model

        # Organize desired outputs into an ordered dict
        outputs = OrderedDict(outputs)
        for o in outputs.values():
            assert o is not None

        # Build a Keras function to run parts of the computation graph
        inputs = model.inputs
        if model.uses_learning_phase and not isinstance(
                KB.learning_phase(), int):
            inputs += [KB.learning_phase()]
        kf = KB.function(model.inputs, list(outputs.values()))

        # Run inference
        molded_images, image_metas, windows = self.mold_inputs(images)
        # TODO: support training mode?
        # if TEST_MODE == "training":
        #     model_in = [molded_images, image_metas,
        #                 target_rpn_match, target_rpn_bbox,
        #                 input_normalized_gt_boxes, gt_masks]
        #     if not config.USE_RPN_ROIS:
        #         model_in.append(target_rois)
        #     if model.uses_learning_phase and not isinstance(KB.learning_phase(), int):
        #         model_in.append(1.)
        #     outputs_np = kf(model_in)
        # else:

        model_in = [molded_images, image_metas]
        if model.uses_learning_phase and not isinstance(
                KB.learning_phase(), int):
            model_in.append(0.)
        outputs_np = kf(model_in)

        # Pack the generated Numpy arrays into a a dict and log the results.
        outputs_np = OrderedDict([(k, v)
                                  for k, v in zip(outputs.keys(), outputs_np)])
        for k, v in outputs_np.items():
            log(k, v)
        return outputs_np
示例#5
0
    def load_weights(self, filepath, by_name=False, exclude=None):
        """
        Modified version of the correspoding Keras function with
        the addition of multi-GPU support and the ability to exclude
        some layers from loading.
        exlude: list of layer names to excluce
        """
        import h5py
        from keras.engine import topology
        print('>>> load_weights()')
        if exclude:
            by_name = True

        if h5py is None:
            raise ImportError('`load_weights` requires h5py.')

        log('    load_weights: Loading weights from: {}'.format(filepath))
        f = h5py.File(filepath, mode='r')
        if 'layer_names' not in f.attrs and 'model_weights' in f:
            f = f['model_weights']

        # In multi-GPU training, we wrap the model. Get layers
        # of the inner model because they have the weights.
        keras_model = self.keras_model
        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
            else keras_model.layers

        # Exclude some layers
        if exclude:
            layers = filter(lambda l: l.name not in exclude, layers)

        # print(' layers to load ' )
        # print('----------------' )
        # for idx,layer in enumerate(layers):
        # print('>layer {} : name : {:40s}  type: {}'.format(idx,layer.name,layer))

        if by_name:
            topology.load_weights_from_hdf5_group_by_name(f, layers)
        else:
            topology.load_weights_from_hdf5_group(f, layers)
        if hasattr(f, 'close'):
            f.close()

        log('    load_weights: Log directory set to : {}'.format(filepath))
        # Update the log directory
        self.set_log_dir(filepath)
        print('    Load weights complete : ', filepath)
        return (filepath)
示例#6
0
    def save_model(self, filepath, filename=None, by_name=False, exclude=None):
        '''
        Modified version of the correspoding Keras function with
        the addition of multi-GPU support and the ability to exclude
        some layers from loading.
        exlude: list of layer names to excluce
        '''
        print('>>> save_model_architecture()')

        model_json = self.keras_model.to_json()
        full_filepath = os.path.join(filepath, filename)
        log('    save model to  {}'.format(full_filepath))

        with open(full_filepath, 'w') as f:
            # json.dump(model_json, full_filepath)
            if hasattr(f, 'close'):
                f.close()
                print('file closed')

        print('    save_weights: save directory is  : {}'.format(filepath))
        print('    save model Load weights complete')
        return (filepath)
示例#7
0
    def save_model(self,
                   filepath=None,
                   filename=None,
                   by_name=False,
                   exclude=None):
        '''
        Modified version of the correspoding Keras function with
        the addition of multi-GPU support and the ability to exclude
        some layers from loading.
        exlude: list of layer names to excluce
        '''
        print('>>> save_model() -- Weights only')
        if os.path.splitext(filename)[1] != '.h5':
            filename += '.h5'

        if filepath is None:
            full_filepath = os.path.join(self.log_dir, filename)
        else:
            full_filepath = os.path.join(self.log_dir, filename)

        log('    save model to  {}'.format(full_filepath))
        self.keras_model.save_weights(full_filepath, overwrite=True)

        # Following doesnt' work - some objects are not JSON serializable
        # self.keras_model.save_model(model, filepath, overwrite=True, include_optimizer=True):

        # Following doesnt' work - some objects are not JSON serializable
        # model_json = self.keras_model.to_json()
        # with open(full_filepath , 'w') as f:
        # json.dump(model_json, full_filepath)
        # if hasattr(f, 'close'):
        # f.close()
        # print('file closed')
        print('    save_weights: save directory is  : {}'.format(filepath))
        print('    save model weights complete')
        return (full_filepath)
示例#8
0
    def set_trainable(self,
                      layer_regex,
                      keras_model=None,
                      indent=0,
                      verbose=0):
        '''
        Sets model layers as trainable if their names match the given
        regular expression.
        '''
        # Print message on the first call (but not on recursive calls)
        if verbose > 0 and keras_model is None:
            log("\nSelecting layers to train")
            log("-------------------------")
            log("{:5}    {:20}     {}".format('Layer', 'Layer Name',
                                              'Layer Type'))

        keras_model = keras_model or self.keras_model

        # In multi-GPU training, we wrap the model. Get layers
        # of the inner model because they have the weights.
        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
            else keras_model.layers

        # go through layers one by one, if the layer matches a layer reg_ex, set it to trainable
        for ind, layer in enumerate(layers):
            # Is the layer a model?
            if layer.__class__.__name__ == 'Model':
                if verbose > 0:
                    print("Entering model layer: ", layer.name,
                          '------------------------------')

                self.set_trainable(layer_regex,
                                   keras_model=layer,
                                   indent=indent + 4)
                indent -= 4

                if verbose > 0:
                    print("Exiting model layer ", layer.name,
                          '--------------------------------')
                continue

            if not layer.weights:
                if verbose > 0:
                    log(" {}{:3}  {:20}   ({:20})   ............................no weights to train ]". \
                    format(" " * indent, ind, layer.name,layer.__class__.__name__))
                continue

            # Is it trainable?
            trainable = bool(re.fullmatch(layer_regex, layer.name))

            # Update layer. If layer is a container, update inner layer.
            if layer.__class__.__name__ == 'TimeDistributed':
                layer.layer.trainable = trainable
            else:
                layer.trainable = trainable
            # Print trainble layer names

            if trainable:
                log(" {}{:3}  {:20}   ({:20})   TRAIN ".\
                    format(" " * indent, ind, layer.name, layer.__class__.__name__))
            else:
                if verbose > 0:
                    log(" {}{:3}  {:20}   ({:20})   ............................not a layer we want to train ]". \
                    format(" " * indent, ind, layer.name, layer.__class__.__name__))
                pass
        return
示例#9
0
    def set_log_dir(self, model_path=None, new_folder=False):
        '''
        Sets the model log directory and epoch counter.

        model_path: If None, or a format different from what this code uses
            then set a new log directory and start epochs from 0. Otherwise,
            extract the log directory and the epoch counter from the file
            name.
        '''
        # Set date and epoch counter as if starting a new model
        # print('>>> Set_log_dir() -- model dir is ', self.model_dir)
        # print('    model_path           :   ', model_path)
        # print('    config.LAST_EPOCH_RAN:   ', self.config.LAST_EPOCH_RAN)

        self.tb_dir = os.path.join(self.model_dir, 'tensorboard')
        self.epoch = 0
        regex_match = False
        last_checkpoint_epoch = 0
        now = datetime.datetime.now()

        # If we have a model path with date and epochs use them

        if model_path:
            # Continue from we left off. Get epoch and date from the file name
            # A sample model path might look like:
            # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5
            model_path = model_path.replace('\\', "/")
            # print('    set_log_dir: model_path (input) is : {}  '.format(model_path))

            regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/fcn\w+(\d{4})\.h5"
            regex_match = re.match(regex, model_path)

            if regex_match:
                now = datetime.datetime(int(regex_match.group(1)),
                                        int(regex_match.group(2)),
                                        int(regex_match.group(3)),
                                        int(regex_match.group(4)),
                                        int(regex_match.group(5)))
                last_checkpoint_epoch = int(regex_match.group(6)) + 1
                # print('    set_log_dir: self.epoch set to {}  (Next epoch to run)'.format(self.epoch))
                # print('    set_log_dir: tensorboard path: {}'.format(self.tb_dir))
                if last_checkpoint_epoch > 0 and self.config.LAST_EPOCH_RAN > last_checkpoint_epoch:
                    self.epoch = self.config.LAST_EPOCH_RAN
                else:
                    self.epoch = last_checkpoint_epoch

        # Set directory for training logs
        # if new_folder = True or appropriate checkpoint filename was not found, generate new folder
        if new_folder or self.config.NEW_LOG_FOLDER:
            now = datetime.datetime.now()

        self.log_dir = os.path.join(
            self.model_dir, "{}{:%Y%m%dT%H%M}".format(self.config.NAME.lower(),
                                                      now))

        ##--------------------------------------------------------------------------------
        ## Create checkpoint folder if it doesn't exists
        ##--------------------------------------------------------------------------------
        from tensorflow.python.platform import gfile
        print('  set_log_dir(): self.log_dir : {} '.format(self.log_dir),
              file=sys.__stdout__)
        if not gfile.IsDirectory(self.log_dir):
            print('  Creating checkpoint folder : {}'.format(self.log_dir),
                  file=sys.__stdout__)
            gfile.MakeDirs(self.log_dir)
        else:
            print('  Checkpoint folder already exists: {}'.format(
                self.log_dir),
                  file=sys.__stdout__)

        # Path to save after each epoch. Include placeholders that get filled by Keras.
        self.checkpoint_path = os.path.join(
            self.log_dir, "{}_*epoch*.h5".format(self.config.NAME.lower()))
        self.checkpoint_path = self.checkpoint_path.replace(
            "*epoch*", "{epoch:04d}")

        log('  set_log_dir(): self.Checkpoint_path: {} '.format(
            self.checkpoint_path))
        log('  set_log_dir(): self.log_dir        : {} '.format(self.log_dir))
        log('  set_log_dir(): Last completed epoch (self.epoch): {} '.format(
            self.epoch))

        return
示例#10
0
    def load_weights(self,
                     filepath,
                     by_name=False,
                     exclude=None,
                     new_folder=False):
        '''
        Modified version of the correspoding Keras function with
        the addition of multi-GPU support and the ability to exclude
        some layers from loading.
        exlude: list of layer names to excluce
        '''
        import h5py

        from keras.engine import topology
        log('   >>> load_weights() from : {}'.format(filepath))
        if exclude:
            by_name = True

        if h5py is None:
            raise ImportError('`load_weights` requires h5py.')

        f = h5py.File(filepath, mode='r')
        pp.pprint(f.__dict__)
        if 'layer_names' not in f.attrs and 'model_weights' in f:
            print('im here')
            f = f['model_weights']
        else:
            print('im not here')

        # In multi-GPU training, we wrap the model. Get layers
        # of the inner model because they have the weights.
        keras_model = self.keras_model
        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
            else keras_model.layers

        print('\n\n')
        print('--------------------------------')
        print(' List of all Layers in Model    ')
        print('--------------------------------')
        print('\n\n')
        for idx, layer in enumerate(layers):
            print('>layer {} : name : {:40s}  type: {}'.format(
                idx, layer.name, layer))

        # Exclude some layers
        if exclude:
            layers = filter(lambda l: l.name not in exclude, layers)

        print(' --------------------------------------')
        print('  layers to load (not in exclude list) ')
        print(' --------------------------------------')
        for idx, layer in enumerate(layers):
            print('    >layer {} : name : {:40s}  type: {}'.format(
                idx, layer.name, layer))
        print('\n\n')

        if by_name:
            topology.load_weights_from_hdf5_group_by_name(f, layers)
        else:
            topology.load_weights_from_hdf5_group(f, layers)

        if hasattr(f, 'close'):
            f.close()

        # Update the log directory
        print('   Weights file loaded: {} '.format(filepath))
        print('   Weights file loaded: {} '.format(filepath),
              file=sys.__stdout__)

        if self.mode == 'training':
            self.set_log_dir(filepath, new_folder)

        print(" MODEL Load weight file COMPLETE    ")

        return (filepath)
def build_heatmap_files(mrcnn_model,
                        dataset,
                        iterations=5,
                        start_from=0,
                        dest_path=None):
    '''
    train_dataset:  Training Dataset objects.

    '''
    assert mrcnn_model.mode == "trainfcn", "Create model in training mode."
    log("Starting for  {} iterations - batch size of each iteration: {}".
        format(iterations, batch_size))
    log(" Output destination: {}".format(dest_path))
    tr_generator = data_generator(dataset,
                                  mrcnn_model.config,
                                  shuffle=False,
                                  augment=False,
                                  batch_size=mrcnn_model.config.BATCH_SIZE,
                                  image_index=start_from)

    ## Start main loop
    epoch_idx = 0
    for epoch_idx in range(iterations):
        tm_start = time.time()

        train_batch_x, train_batch_y = next(tr_generator)
        print(
            ' ==> mrcnn_model: step {} of {} iterations, image_id: {} '.format(
                epoch_idx, iterations, train_batch_x[1][:, 0]))

        # print('   length of train_batch_x:', len(train_batch_x), ' number of things in batch x :', train_batch_x[1].shape)
        # for i in train_batch_x:
        # print('       ', i.shape)
        # print('length of train_batch_y:', len(train_batch_y))

        # results = get_layer_output_1(mrcnn_model.keras_model, train_batch_x, [0,1,2,3], 1)

        results = mrcnn_model.keras_model.predict(train_batch_x)

        # pr_hm_norm, gt_hm_norm, pr_hm_scores, gt_hm_scores = results[:4]

        for i in range(batch_size):
            # print('  pr_hm_norm shape   :', results[0][i].shape)
            # print('  pr_hm_scores shape :', results[1][i].shape)
            # print('  gt_hm_norm shape   :', results[2][i].shape)
            # print('  gt_hm_scores shape :', results[3][i].shape)
            image_id = train_batch_x[1][i, 0]

            coco_image_id = dataset.image_info[image_id]['id']
            coco_filename = os.path.basename(
                dataset.image_info[image_id]['path'])

            ## If we want to save the files with a sequence # 0,1,2,.... which is the index of dataset.image_info[index] use this:
            # filename = 'hm_{:012d}.npz'.format(image_id)

            ## If we want to use the coco_id as the file name, use the following:
            filename = 'hm_{:012d}.npz'.format(coco_image_id)

            print(
                '  output: {}  image_id: {}  coco_image_id: {} coco_filename: {} output file: {}'
                .format(i, image_id, coco_image_id, coco_filename, filename))
            #             print('  output file: ',os.path.join(dest_path, filename))
            np.savez_compressed(os.path.join(dest_path, filename),
                                input_image_meta=train_batch_x[1][i],
                                pr_hm_norm=results[0][i],
                                pr_hm_scores=results[1][i],
                                gt_hm_norm=results[2][i],
                                gt_hm_scores=results[3][i],
                                coco_info=np.array(
                                    [coco_image_id, coco_filename]))
        tm_stop = time.time()
        print(' ==> Elapsed time {:.4f}s #        of items in results: {} '.
              format(tm_stop - tm_start, len(train_batch_x)))

    print('Final : mrcnn_model epoch_idx{}   iterations {}'.format(
        epoch_idx, iterations))
    return
示例#12
0
    def train_in_batches(self,
                         train_dataset,
                         val_dataset,
                         learning_rate,
                         layers,
                         losses=None,
                         epochs_to_run=1,
                         batch_size=0,
                         steps_per_epoch=0):
        '''
        Train the model.
        train_dataset, 
        val_dataset:    Training and validation Dataset objects.
        
        learning_rate:  The learning rate to train with
        
        epochs:         Number of training epochs. Note that previous training epochs
                        are considered to be done already, so this actually determines
                        the epochs to train in total rather than in this particaular
                        call.
                        
        layers:         Allows selecting wich layers to train. It can be:
                        - A regular expression to match layer names to train
                        - One of these predefined values:
                        heads: The RPN, classifier and mask heads of the network
                        all: All the layers
                        3+: Train Resnet stage 3 and up
                        4+: Train Resnet stage 4 and up
                        5+: Train Resnet stage 5 and up
        '''
        assert self.mode == "training", "Create model in training mode."

        # Use Pre-defined layer regular expressions
        # if layers in self.layer_regex.keys():
        # layers = self.layer_regex[layers]
        print(layers)
        train_regex_list = [self.layer_regex[x] for x in layers]
        print(train_regex_list)
        layers = '|'.join(train_regex_list)
        print('layers regex :', layers)

        if batch_size == 0:
            batch_size = self.config.BATCH_SIZE

        if steps_per_epoch == 0:
            steps_per_epoch = self.config.STEPS_PER_EPOCH

        # Data generators
        train_generator = data_generator(train_dataset,
                                         self.config,
                                         shuffle=True,
                                         batch_size=batch_size)
        val_generator = data_generator(val_dataset,
                                       self.config,
                                       shuffle=True,
                                       batch_size=batch_size,
                                       augment=False)

        log("    Last epoch completed : {} ".format(self.epoch))
        log("    Starting from epoch  : {} for {} epochs".format(
            self.epoch, epochs_to_run))
        log("    Learning Rate        : {} ".format(learning_rate))
        log("    Steps per epoch      : {} ".format(steps_per_epoch))
        log("    Batchsize            : {} ".format(batch_size))
        log("    Checkpoint Folder    : {} ".format(self.checkpoint_path))
        epochs = self.epoch + epochs_to_run

        from tensorflow.python.platform import gfile
        if not gfile.IsDirectory(self.log_dir):
            log('Creating checkpoint folder')
            gfile.MakeDirs(self.log_dir)
        else:
            log('Checkpoint folder already exists')

        self.set_trainable(layers)
        self.compile(learning_rate, self.config.LEARNING_MOMENTUM, losses)

        # copied from \keras\engine\training.py
        # def _get_deduped_metrics_names(self):
        ## get metrics from keras_model.metrics_names
        out_labels = self.get_deduped_metrics_names()
        print(' ====> out_labels : ', out_labels)

        ## setup Progress Bar callback
        callback_metrics = out_labels + ['val_' + n for n in out_labels]
        print(' Callback metrics monitored by progbar')
        pp.pprint(callback_metrics)

        progbar = keras.callbacks.ProgbarLogger(count_mode='steps')
        progbar.set_model(self.keras_model)
        progbar.set_params({
            'epochs': epochs,
            'steps': steps_per_epoch,
            'verbose': 1,
            'do_validation': False,
            'metrics': callback_metrics,
        })

        progbar.set_model(self.keras_model)

        ## setup Checkpoint callback
        chkpoint = keras.callbacks.ModelCheckpoint(self.checkpoint_path,
                                                   monitor='val_loss',
                                                   verbose=1,
                                                   save_best_only=True,
                                                   save_weights_only=True)
        chkpoint.set_model(self.keras_model)

        progbar.on_train_begin()
        epoch_idx = self.epoch

        if epoch_idx >= epochs:
            print(
                'Final epoch {} has already completed - Training will not proceed'
                .format(epochs))
        else:
            while epoch_idx < epochs:
                progbar.on_epoch_begin(epoch_idx)

                for steps_index in range(steps_per_epoch):
                    batch_logs = {}
                    # print(' self.epoch {}   epochs {}  step {} '.format(self.epoch, epochs, steps_index))
                    batch_logs['batch'] = steps_index
                    batch_logs['size'] = batch_size
                    progbar.on_batch_begin(steps_index, batch_logs)

                    train_batch_x, train_batch_y = next(train_generator)

                    outs = self.keras_model.train_on_batch(
                        train_batch_x, train_batch_y)

                    if not isinstance(outs, list):
                        outs = [outs]
                    for l, o in zip(out_labels, outs):
                        batch_logs[l] = o

                    progbar.on_batch_end(steps_index, batch_logs)

                    # print(outs)
                progbar.on_epoch_end(epoch_idx, {})
                # if (epoch_idx % 10) == 0:
                chkpoint.on_epoch_end(epoch_idx, batch_logs)
                epoch_idx += 1

            # if epoch_idx != self.epoch:
            # chkpoint.on_epoch_end(epoch_idx -1, batch_logs)
            self.epoch = max(epoch_idx - 1, epochs)

            print('Final : self.epoch {}   epochs {}'.format(
                self.epoch, epochs))
示例#13
0
    def train(self,
              train_dataset,
              val_dataset,
              learning_rate,
              layers=None,
              losses=None,
              epochs=0,
              epochs_to_run=0,
              batch_size=0,
              steps_per_epoch=0):
        '''
        Train the model.
        train_dataset, 
        val_dataset:    Training and validation Dataset objects.
        
        learning_rate:  The learning rate to train with
        
        layers:         Allows selecting wich layers to train. It can be:
                        - A regular expression to match layer names to train
                        - One of these predefined values:
                        heads: The RPN, classifier and mask heads of the network
                        all: All the layers
                        3+: Train Resnet stage 3 and up
                        4+: Train Resnet stage 4 and up
                        5+: Train Resnet stage 5 and up
        
        losses:         List of losses to monitor.
        
        epochs:         Number of training epochs. Note that previous training epochs
                        are considered to be done already, so this actually determines
                        the epochs to train in total rather than in this particaular
                        call.
        
        epochs_to_run:  Number of epochs to run, will update the 'epochs parm.                        
                        
        '''
        assert self.mode == "training", "Create model in training mode."

        if batch_size == 0:
            batch_size = self.config.BATCH_SIZE
        if epochs_to_run > 0:
            epochs = self.epoch + epochs_to_run
        if steps_per_epoch == 0:
            steps_per_epoch = self.config.STEPS_PER_EPOCH

        # use Pre-defined layer regular expressions
        # if layers in self.layer_regex.keys():
        # layers = self.layer_regex[layers]
        print(layers)
        # train_regex_list = []
        # for x in layers:
        # print( ' layers ias : ',x)
        # train_regex_list.append(x)
        train_regex_list = [self.layer_regex[x] for x in layers]
        print(train_regex_list)
        layers = '|'.join(train_regex_list)
        print('layers regex :', layers)

        # Data generators
        train_generator = data_generator(train_dataset,
                                         self.config,
                                         shuffle=True,
                                         batch_size=batch_size)
        val_generator = data_generator(val_dataset,
                                       self.config,
                                       shuffle=True,
                                       batch_size=batch_size,
                                       augment=False)

        # my_callback = MyCallback()

        # Callbacks
        ## call back for model checkpoint was originally (?) loss. chanegd to val_loss (which is default) 2-5-18
        callbacks = [
            keras.callbacks.TensorBoard(log_dir=self.log_dir,
                                        histogram_freq=0,
                                        batch_size=32,
                                        write_graph=True,
                                        write_grads=False,
                                        write_images=True,
                                        embeddings_freq=0,
                                        embeddings_layer_names=None,
                                        embeddings_metadata=None),
            keras.callbacks.ModelCheckpoint(self.checkpoint_path,
                                            mode='auto',
                                            period=1,
                                            monitor='val_loss',
                                            verbose=1,
                                            save_best_only=True,
                                            save_weights_only=True),
            keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                              mode='auto',
                                              factor=0.3,
                                              cooldown=30,
                                              patience=50,
                                              min_lr=0.00001,
                                              verbose=1),
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          mode='auto',
                                          min_delta=1e-5,
                                          patience=200,
                                          verbose=1)
        ]

        # Train

        self.set_trainable(layers)
        self.compile(learning_rate, self.config.LEARNING_MOMENTUM, losses)

        log("Starting at epoch {} of {} epochs. LR={}\n".format(
            self.epoch, epochs, learning_rate))
        log("Steps per epochs {} ".format(steps_per_epoch))
        log("Batch size       {} ".format(batch_size))
        log("Checkpoint Path: {} ".format(self.checkpoint_path))

        self.keras_model.fit_generator(
            train_generator,
            initial_epoch=self.epoch,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            callbacks=callbacks,
            validation_data=next(val_generator),
            validation_steps=self.config.VALIDATION_STEPS,
            max_queue_size=100,
            workers=1,  # max(self.config.BATCH_SIZE // 2, 2),
            use_multiprocessing=False)
        self.epoch = max(self.epoch, epochs)

        print('Final : self.epoch {}   epochs {}'.format(self.epoch, epochs))