def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Creates the image generators and then trains RetinaNet model on the image paths in the input dataframe column. Can choose to use validation generator. """ # Create object that stores backbone information self.backbone = models.backbone(self.hyperparams["backbone"]) # Create the generators train_generator = CSVGenerator( self.annotations, self.classes, self.base_dir, self.hyperparams["batch_size"], self.backbone.preprocess_image, shuffle_groups=False, ) ## Create model logger.info("Creating model...") model, training_model, prediction_model = self._create_models( backbone_retinanet=self.backbone.retinanet, num_classes=train_generator.num_classes(), lr=self.hyperparams["learning_rate"], ) ## Set up callbacks callbacks = self._create_callbacks( model, training_model, prediction_model, ) start_time = time.time() logger.info("Starting training...") training_model.fit_generator( generator=train_generator, steps_per_epoch=self.hyperparams["n_steps"], epochs=self.hyperparams["n_epochs"], verbose=1, callbacks=callbacks, ) training_model.save_weights(self.hyperparams["weights_path"] + "model_weights.h5") logger.info( f"Training complete. Training took {time.time()-start_time} seconds." ) return CallResult(None)
def _create_generator(self, annotations, classes, shuffle_groups): """ Create generator for evaluation. """ validation_generator = CSVGenerator(self.annotations, self.classes, self.base_dir, self.hyperparams['batch_size'], self.backbone.preprocess_image, shuffle_groups = False) return validation_generator
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Creates the image generators and then trains RetinaNet model on the image paths in the input dataframe column. Can choose to use validation generator. If no weight file is provided, the default is to use the ImageNet weights. """ # Create object that stores backbone information self.backbone = models.backbone(self.hyperparams['backbone']) # Create the generators train_generator = CSVGenerator(self.annotations, self.classes, self.base_dir, self.hyperparams['batch_size'], self.backbone.preprocess_image) # Running the model ## Assign weights if self.hyperparams['weights'] is False: weights = None else: weights = self.volumes[self.hyperparams['backbone']] ## Create model print('Creating model...', file=sys.__stdout__) model, self.training_model, prediction_model = self._create_models( backbone_retinanet=self.backbone.retinanet, num_classes=train_generator.num_classes(), weights=weights, freeze_backbone=self.hyperparams['freeze_backbone'], lr=self.hyperparams['learning_rate']) model.summary() ### !!! vgg AND densenet BACKBONES CURRENTLY NOT IMPLEMENTED !!! ## Let the generator compute the backbone layer shapes using the actual backbone model # if 'vgg' in self.hyperparams['backbone'] or 'densenet' in self.hyperparams['backbone']: # train_generator.compute_shapes = make_shapes_callback(model) # if validation_generator: # validation_generator.compute_shapes = train_generator.compute_shapes ## Set up callbacks callbacks = self._create_callbacks( model, self.training_model, prediction_model, ) start_time = time.time() print('Starting training...', file=sys.__stdout__) self.training_model.fit_generator( generator=train_generator, steps_per_epoch=self.hyperparams['n_steps'], epochs=self.hyperparams['n_epochs'], verbose=1, callbacks=callbacks, workers=self.workers, use_multiprocessing=self.multiprocessing, max_queue_size=self.max_queue_size) print( f'Training complete. Training took {time.time()-start_time} seconds.', file=sys.__stdout__) return CallResult(None)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce image detection predictions. Parameters ---------- inputs : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, and bounding box for each image. Returns ------- outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as a string (8 coordinate format), and confidence scores. """ iou_threshold = ( 0.5 # Bounding box overlap threshold for false positive or true positive ) score_threshold = 0.05 # The score confidence threshold to use for detections max_detections = 100 # Maxmimum number of detections to use per image # Create object that stores backbone information backbone = models.backbone(self.hyperparams["backbone"]) # Create the generators train_generator = CSVGenerator( self.annotations, self.classes, self.base_dir, self.hyperparams["batch_size"], backbone.preprocess_image, shuffle_groups=False, ) # Instantiate model model, training_model, prediction_model = self._create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), lr=self.hyperparams["learning_rate"], ) # Load model weights saved in fit training_model.load_weights(self.hyperparams["weights_path"] + "model_weights.h5") # Convert training model to inference model inference_model = models.convert_model(training_model) # Generate image paths image_cols = inputs.metadata.get_columns_with_semantic_type( "https://metadata.datadrivendiscovery.org/types/FileName") self.base_dir = [ inputs.metadata.query( (metadata_base.ALL_ELEMENTS, t))["location_base_uris"][0].replace("file:///", "/") for t in image_cols ] self.image_paths = np.array([[ os.path.join(self.base_dir, filename) for filename in inputs.iloc[:, col] ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten() self.image_paths = pd.Series(self.image_paths) # Initialize output objects box_list = [] score_list = [] image_name_list = [] # Predict bounding boxes and confidence scores for each image image_list = [ x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i ] start_time = time.time() logger.info("Starting testing...") for i in image_list: image = read_image_bgr(i) # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) boxes, scores, labels = inference_model.predict_on_batch( tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32)) # correct for image scale boxes /= scale for box, score in zip(boxes[0], scores[0]): if score < 0.5: break b = box.astype(int) box_list.append(b) score_list.append(score) image_name_list.append(i * len(b)) logger.info( f"Testing complete. Testing took {time.time()-start_time} seconds." ) ## Convert predicted boxes from a list of arrays to a list of strings boxes = np.array(box_list).tolist() boxes = list( map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes)) # Convert to 8 coordinate format for D3M boxes = list(map(lambda x: ",".join(map(str, x)), boxes)) # Create mapping between image names and D3M index input_df = pd.DataFrame({ "d3mIndex": inputs.d3mIndex, "image": [os.path.basename(list) for list in self.image_paths], }) d3mIdx_image_mapping = input_df.set_index("image").T.to_dict("list") # Extract values for image name keys and get missing image predictions (if they exist) image_name_list = [os.path.basename(list) for list in image_name_list] d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list] empty_predictions_image_names = [ k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx ] d3mIdx = [item for sublist in d3mIdx for item in sublist] # Flatten list of lists ## Assemble in a Pandas DataFrame results = pd.DataFrame({ "d3mIndex": d3mIdx, "bounding_box": boxes, "confidence": score_list }) # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return # predictions for an image, create a dummy empty prediction row to add to results_df for that # missing image. if len(empty_predictions_image_names) != 0: # Create data frame of empty predictions for missing each image and concat with results. # Sort results_df. empty_predictions_df = self._fill_empty_predictions( empty_predictions_image_names, d3mIdx_image_mapping) results_df = pd.concat([results, empty_predictions_df ]).sort_values("d3mIndex") else: results_df = results # Convert to DataFrame container results_df = d3m_DataFrame(results_df) ## Assemble first output column ('d3mIndex) col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0))) col_dict["structural_type"] = type("1") col_dict["name"] = "d3mIndex" col_dict["semantic_types"] = ( "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 0), col_dict) ## Assemble second output column ('bounding_box') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))) col_dict["structural_type"] = type("1") col_dict["name"] = "bounding_box" col_dict["semantic_types"] = ( "http://schema.org/Text", "https://metadata.datadrivendiscovery.org/types/PredictedTarget", "https://metadata.datadrivendiscovery.org/types/BoundingPolygon", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 1), col_dict) ## Assemble third output column ('confidence') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))) col_dict["structural_type"] = type("1") col_dict["name"] = "confidence" col_dict["semantic_types"] = ( "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/Score", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 2), col_dict) return CallResult(results_df)