def use_release(self, gpus=1): """Use the latest DeepForest model release from github and load model. Optionally download if release doesn't exist. Returns: model (object): A trained keras model gpus: number of gpus to parallelize, default to 1 """ # Download latest model from github release release_tag, self.weights = utilities.use_release() # load saved model and tag release self.__release_version__ = release_tag print("Loading pre-built model: {}".format(release_tag)) if gpus == 1: with warnings.catch_warnings(): # Suppress compilte warning, not relevant here warnings.filterwarnings("ignore", category=UserWarning) self.model = utilities.read_model(self.weights, self.config) # Convert model self.prediction_model = convert_model(self.model) elif gpus > 1: backbone = models.backbone(self.config["backbone"]) n_classes = len(self.labels.keys()) self.model, self.training_model, self.prediction_model = create_models( backbone.retinanet, num_classes=n_classes, weights=self.weights, multi_gpu=gpus) # add to config self.config["weights"] = self.weights
def test_lengths(config): """Assert that a csv generator and tfrecords create the same number of images in a epoch""" created_records = tfrecords.create_tfrecords( annotations_file="tests/output/testfile_tfrecords.csv", class_file="tests/output/classes.csv", image_min_side=config["image-min-side"], backbone_model=config["backbone"], size=100, savedir="tests/output/") # tfdata tf_filenames = find_tf_filenames(path="tests/output/*.tfrecord") # keras generator backbone = models.backbone(config["backbone"]) generator = csv_generator.CSVGenerator( csv_data_file="tests/output/testfile_tfrecords.csv", csv_class_file="tests/output/classes.csv", image_min_side=config["image-min-side"], preprocess_image=backbone.preprocess_image, ) fit_genertor_length = generator.size() assert len(tf_filenames) == fit_genertor_length
def __init__(self, weights=None, saved_model=None): self.weights = weights self.saved_model = saved_model # Read config file - if a config file exists in local dir use it, # if not use installed. if os.path.exists("deepforest_config.yml"): config_path = "deepforest_config.yml" else: try: config_path = get_data("deepforest_config.yml") except Exception as e: raise ValueError( "No deepforest_config.yml found either in local " "directory or in installed package location. {}".format(e)) print("Reading config file: {}".format(config_path)) self.config = utilities.read_config(config_path) # Create a label dict, defaults to "Tree" self.read_classes() # release version id to flag if release is being used self.__release_version__ = None # Load saved model if needed if self.saved_model: print("Loading saved model") # Capture user warning, not relevant here with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) self.model = models.load_model(saved_model) self.prediction_model = convert_model(self.model) elif self.weights: print("Creating model from weights") backbone = models.backbone(self.config["backbone"]) self.model, self.training_model, self.prediction_model = create_models( backbone.retinanet, num_classes=1, weights=self.weights) else: print( "A blank deepforest object created. " "To perform prediction, either train or load an existing model." ) self.model = None
def test_equivalence(config, setup_create_tensors): # unpack created tensors tf_inputs, tf_targets = setup_create_tensors # the image going in to tensorflow should be equivalent # to the image from the fit_generator backbone = models.backbone(config["backbone"]) # CSV generator generator = csv_generator.CSVGenerator( csv_data_file="tests/output/testfile_tfrecords.csv", csv_class_file="tests/data/classes.csv", image_min_side=config["image-min-side"], preprocess_image=backbone.preprocess_image, ) # find file in randomize generator group first_file = generator.groups[0][0] gen_filename = os.path.join(generator.base_dir, generator.image_names[first_file]) original_image = generator.load_image(first_file) inputs, targets = generator.__getitem__(0) image = inputs[0, ...] targets = targets[0][0, ...] with tf.Session() as sess: # seek the randomized image to match tf_inputs, tf_targets = sess.run([tf_inputs, tf_targets]) # assert filename is the same as generator # assert gen_filename == filename # tf_image = tf_image[0,...] tf_inputs = tf_inputs[0, ...] tf_targets = tf_targets[0][0, ...] # Same shape # assert tf_image.shape == image.shape assert tf_inputs.shape == image.shape assert tf_targets.shape == targets.shape
def create_tfrecords(annotations_file, class_file, backbone_model="resnet50", image_min_side=800, size=1, savedir="./"): """ Args: annotations_file: path to 6 column data in form image_path, xmin, ymin, xmax, ymax, label backbone_model: A keras retinanet backbone image_min_side: resized image object minimum size size: Number of images per tfrecord savedir: dir path to save tfrecords files Returns: written_files: A list of path names of written tfrecords """ memory_used = [] # Image preprocess function backbone = models.backbone(backbone_model) # filebase name image_basename = os.path.splitext(os.path.basename(annotations_file))[0] # Syntax checks # Check annotations file only JPEG, PNG, GIF, or BMP are allowed. # df = pd.read_csv(annotations_file, # names=["image_path","xmin","ymin","xmax","ymax","label"]) # df['FileType'] = df.image_path.str.split('.').str[-1].str.lower() # bad_files = df[~df['FileType'].isin(["jpeg","jpg","png","gif","bmp"])] # if not bad_files.empty: # raise ValueError("Check annotations file, only JPEG, PNG, GIF, or BMP are allowed, # {} incorrect files found /n {}: ".format(bad_files.shape[0],bad_files.head())) # Check dtypes, cannot use pandas, or will coerce in the presence of NAs with open(annotations_file, 'r') as f: reader = csv.reader(f, delimiter=',') row = next(reader) if row[1].count(".") > 0: raise ValueError( "Annotation files should be headerless with integer box, {} is not a int" .format(row[1])) # Create generator - because of how retinanet yields data, # this should always be 1. Shape problems in the future? train_generator = CSVGenerator(annotations_file, class_file, batch_size=1, image_min_side=image_min_side, preprocess_image=backbone.preprocess_image) # chunk size indices = np.arange(train_generator.size()) chunks = [ indices[i * size:(i * size) + size] for i in range(ceil(len(indices) / size)) ] written_files = [] for chunk in chunks: # Create tfrecord dataset and save it for output fname = savedir + "{}_{}.tfrecord".format(image_basename, chunk[0]) written_files.append(fname) writer = tf.io.TFRecordWriter(fname) images = [] regression_targets = [] class_targets = [] filename = [] original_image = [] for i in chunk: # Original image original_image.append(train_generator.load_image(i)) batch = train_generator.__getitem__(i), # split into images and tar gets inputs, targets = batch[0] # grab image, asssume batch size of 1, squeeze images.append(inputs[0, ...]) # Grab anchor targets regression_batch, labels_batch = targets # grab regression anchors # regression_batch: batch that contains bounding-box regression targets # for an image & anchor states (np.array of shape (batch_size, N, 4 + 1), # where N is the number of anchors for an image, the first 4 columns # define regression targets for (x1, y1, x2, y2) and the # last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg). regression_anchors = regression_batch[0, ...] regression_targets.append(regression_anchors) # grab class labels - squeeze out batch size # From retinanet: labels_batch: batch that contains labels & anchor states # (np.array of shape (batch_size, N, num_classes + 1), # where N is the number of anchors for an image and the last column defines # the anchor state (-1 for ignore, 0 for bg, 1 for fg). labels = labels_batch[0, ...] print("Label shape is: {}".format(labels.shape)) class_targets.append(labels) # append filename by looking at group index current_index = train_generator.groups[i][0] # Grab filename and append to the full path fname = train_generator.image_names[current_index] fname = os.path.join(train_generator.base_dir, fname) filename.append(fname) for image, regression_target, class_target, fname, orig_image in zip( images, regression_targets, class_targets, filename, original_image): tf_example = create_tf_example(image, regression_target, class_target, fname, orig_image) writer.write(tf_example.SerializeToString()) memory_used.append(psutil.virtual_memory().used / 2**30) #plt.plot(memory_used) #plt.title('Evolution of memory') #plt.xlabel('iteration') #plt.ylabel('memory used (GB)') #plt.savefig(os.path.join(savedir, "memory.png")) return written_files
def main(forest_object, args=None, input_type="fit_generator", list_of_tfrecords=None, comet_experiment=None): """ Main Training Loop Args: forest_object: a deepforest class object args: Keras retinanet argparse list_of_tfrecords: list of tfrecords to parse input_type: "fit_generator" or "tfrecord" input type """ # parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # create object that stores backbone information backbone = models.backbone(args.backbone) # make sure keras is the minimum required version check_keras_version() # optionally choose specific GPU if args.gpu: setup_gpu(args.gpu) # optionally load config parameters if args.config: args.config = read_config_file(args.config) # data input if input_type == "fit_generator": # create the generators train_generator, validation_generator = create_generators( args, backbone.preprocess_image) # placeholder target tensor for creating models targets = None elif input_type == "tfrecord": # Create tensorflow iterators iterator = tfrecords.create_dataset(list_of_tfrecords, args.batch_size) next_element = iterator.get_next() # Split into inputs and targets inputs = next_element[0] targets = [next_element[1], next_element[2]] validation_generator = None else: raise ValueError( "{} input type is invalid. Only 'tfrecord' or 'for_generator' " "input types are accepted for model training".format(input_type)) # create the model if args.snapshot is not None: print('Loading model, this may take a second...') model = models.load_model(args.snapshot, backbone_name=args.backbone) training_model = model anchor_params = None if args.config and 'anchor_parameters' in args.config: anchor_params = parse_anchor_parameters(args.config) prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) else: weights = args.weights # default to imagenet if nothing else is specified if weights is None and args.imagenet_weights: weights = backbone.download_imagenet() print('Creating model, this may take a second...') if input_type == "fit_generator": num_of_classes = train_generator.num_classes() else: # Add background class num_of_classes = len(forest_object.labels.keys()) model, training_model, prediction_model = create_models( backbone_retinanet=backbone.retinanet, num_classes=num_of_classes, weights=weights, multi_gpu=args.multi_gpu, freeze_backbone=args.freeze_backbone, lr=args.lr, config=args.config, targets=targets, freeze_layers=args.freeze_layers) # print model summary print(model.summary()) # this lets the generator compute backbone layer shapes using the actual backbone model if 'vgg' in args.backbone or 'densenet' in args.backbone: train_generator.compute_shapes = make_shapes_callback(model) if validation_generator: validation_generator.compute_shapes = train_generator.compute_shapes # create the callbacks callbacks = create_callbacks(model, training_model, prediction_model, validation_generator, args, comet_experiment) if not args.compute_val_loss: validation_generator = None # start training if input_type == "fit_generator": history = training_model.fit_generator( generator=train_generator, steps_per_epoch=args.steps, epochs=args.epochs, verbose=1, callbacks=callbacks, workers=args.workers, use_multiprocessing=args.multiprocessing, max_queue_size=args.max_queue_size, validation_data=validation_generator) elif input_type == "tfrecord": # Fit model history = training_model.fit(x=inputs, steps_per_epoch=args.steps, epochs=args.epochs, callbacks=callbacks) else: raise ValueError( "{} input type is invalid. Only 'tfrecord' or 'for_generator' " "input types are accepted for model training".format(input_type)) # Assign history to deepforest model class forest_object.history = history # return trained model return model, prediction_model, training_model