def train_one_epoch(model, optimizer, dataloader, writer, epoch, device, loss_type='bce', write_steps=50): model.train() for step, ((context, context_len), (answer, answer_len)) in enumerate(dataloader, start=epoch * len(dataloader)): # print(context.shape, answer.shape) optimizer.zero_grad() context_embeddings = model( context.to(device)) # [batch_size, emb_size] answer_embeddings = model(answer.to(device)) # [batch_size, emb_size] if loss_type == 'bce': loss = bce(context_embeddings, answer_embeddings) elif loss_type == 'triplet': loss = triplet_loss(context_embeddings, answer_embeddings) else: raise NotImplemented('No such loss') if step % write_steps == 0: print( f'Epoch = {epoch}, step = {step}, train_loss = {loss.item()}') write_metrics(writer, step, loss.item()) loss.backward() optimizer.step()
def create_models(backbone_retinanet, num_classes, weights, args, num_gpus=0, freeze_backbone=False, lr=1e-5, config=None): """ Creates three models (model, training_model, prediction_model). Args backbone_retinanet: A function to call to create a retinanet model with a given backbone. num_classes: The number of classes to train. weights: The weights to load into the model. num_gpus: The number of GPUs to use for training. freeze_backbone: If True, disables learning for the backbone. config: Config parameters, None indicates the default configuration. Returns model: The base model. This is also the model that is saved in snapshots. training_model: The training model. If num_gpus=0, this is identical to model. prediction_model: The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). """ modifier = freeze_model if freeze_backbone else None # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if num_gpus > 1: from keras.utils import multi_gpu_model with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=num_gpus) else: model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model) # compile model training_model.compile(loss={ 'regression': losses.iou_loss(args.loss, args.loss_weight), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=lr)) return model, training_model, prediction_model
def __init__(self, backbone): # a dictionary mapping custom layer names to the correct classes self.custom_objects = { 'UpsampleLike': layers.UpsampleLike, 'PriorProbability': initializers.PriorProbability, 'RegressBoxes': layers.RegressBoxes, 'FilterDetections': layers.FilterDetections, 'Anchors': layers.Anchors, 'ClipBoxes': layers.ClipBoxes, '_focal': losses.focal(), 'bce_': losses.bce(), 'iou_': losses.iou(), } self.backbone = backbone self.validate()
def evaluate(model, dataloader, writer, epoch, device, loss_type='bce'): contexts = [] answers = [] model.eval() loss_history = [] for (context, context_len), (answer, answer_len) in dataloader: context_embeddings = model( context.to(device)) # [batch_size, emb_size] answer_embeddings = model(answer.to(device)) # [batch_size, emb_size] if loss_type == 'bce': loss = bce(context_embeddings, answer_embeddings) elif loss_type == 'triplet': loss = triplet_loss(context_embeddings, answer_embeddings) else: raise NotImplemented('No such loss') loss_history.append(loss.item()) contexts.append(context_embeddings.cpu().detach().numpy()) answers.append(answer_embeddings.cpu().detach().numpy()) loss_value = np.mean(loss_history) contexts = np.array(contexts).reshape(-1, contexts[-1].shape[-1]) answers = np.array(answers).reshape(-1, answers[-1].shape[-1]) emb_size = answers.shape[1] faiss_index = faiss.IndexFlat(emb_size) faiss_index.verbose = True faiss_index.add(answers) _, indexes = faiss_index.search(contexts, k=100) mrr = calculate_mrr(y_true=np.arange(indexes.shape[0]).reshape(-1, 1), preds=indexes) write_metrics(writer, epoch * len(dataloader), loss_value, mrr=mrr, prefix='eval') print( f'Epoch = {epoch}, step = {epoch * len(dataloader)}, eval_loss = {loss_value}, mrr = {mrr}' )
def validate(self, epoch, output_save): self.model.eval() batch_loss_bce = 0.0 batch_loss_l1 = 0.0 batch_iou = 0.0 vis_save = os.path.join(output_save, "epoch%02d" % (epoch + 1)) n_batches = len(self.dataloader_val) with torch.no_grad(): for idx, sample in enumerate(self.dataloader_val): input = sample['occ_grid'].to(self.device) target_occ = sample['occ_gt'].to(self.device) target_df = sample['occ_df_gt'].to(self.device) names = sample['name'] # ===================forward===================== output_occ = self.model(input) loss_bce = losses.bce(output_occ, target_occ) # Convert occ to df to calculate l1 loss output_df = utils.occs_to_dfs(output_occ, trunc=args.truncation, pred=True) loss_l1 = losses.l1(output_df, target_df) iou = metric.iou_occ(output_occ, target_occ) # ===================log======================== batch_loss_bce += loss_bce.item() batch_loss_l1 += loss_l1.item() batch_iou += iou # save the predictions at the end of the epoch # if epoch > args.save_epoch and (idx + 1) == n_batches-1: # pred_occs = output_occ[:args.n_vis+1] # target_occs = target_occ[:args.n_vis+1] # names = names[:args.n_vis+1] # utils.save_predictions(vis_save, args.model_name, args.gt_type, names, pred_dfs=None, target_dfs=None, # pred_occs=pred_occs, target_occs=target_occs) val_loss_bce = batch_loss_bce / (idx + 1) val_loss_l1 = batch_loss_l1 / (idx + 1) mean_iou = batch_iou / (idx + 1) return val_loss_bce, val_loss_l1, mean_iou
def train(self): self.model.train() batch_loss = 0.0 for idx, sample in enumerate(self.dataloader_train): input = sample['occ_grid'].to(self.device) target_occ = sample['occ_gt'].to(self.device) # zero the parameter gradients self.optimizer.zero_grad() # ===================forward===================== output_occ = self.model(input) loss = losses.bce(output_occ, target_occ) # ===================backward + optimize==================== loss.backward() self.optimizer.step() # ===================log======================== batch_loss += loss.item() train_loss = batch_loss / (idx + 1) return train_loss
def train(self, epochs, backbone_name, evaluation): #Compile model self.model.compile( loss={ 'regression': losses.iou(), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=1e-5) # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True) ) # create the generators train_generator, validation_generator = create_generators( self.config, self.dataset) # create the callbacks callbacks = create_callbacks( self.config, backbone_name, self.model, self.training_model, self.prediction_model, validation_generator, evaluation, self.log_dir, ) # start training return self.training_model.fit_generator( generator=train_generator, initial_epoch=0, steps_per_epoch=self.config.STEPS_PER_EPOCH, epochs=epochs, verbose=1, callbacks=callbacks, max_queue_size=10, validation_data=validation_generator)
def main(args=None): # create object that stores backbone information backbone = models.backbone(args.backbone) # create the generators train_generator, validation_generator = create_generators( args, backbone.preprocess_image) # create the model if args.snapshot is not None: print('Loading model, this may take a second...') model = models.load_model(args.snapshot, backbone_name=args.backbone) training_model = model anchor_params = None if args.config and 'anchor_parameters' in args.config: anchor_params = parse_anchor_parameters(args.config) prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile( loss={ 'regression': losses.iou_loss(args.loss, args.loss_weight), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.Adam(lr=1e-5) # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True) ) else: weights = args.weights # default to imagenet if nothing else is specified if weights is None and args.imagenet_weights: weights = backbone.download_imagenet() print('Creating model, this may take a second...') model, training_model, prediction_model = create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), weights=weights, num_gpus=args.num_gpus, freeze_backbone=args.freeze_backbone, lr=args.lr, config=args.config, args=args) parallel_model = multi_gpu_model(training_model, gpus=2) parallel_model.compile(loss={ 'regression': losses.iou_loss(args.loss, args.loss_weight), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.Adam(lr=1e-4)) # print model summary # print(model.summary()) # this lets the generator compute backbone layer shapes using the actual backbone model if 'vgg' in args.backbone or 'densenet' in args.backbone: train_generator.compute_shapes = make_shapes_callback(model) if validation_generator: validation_generator.compute_shapes = train_generator.compute_shapes # create the callbacks callbacks = create_callbacks( model, training_model, prediction_model, validation_generator, args, ) if not args.compute_val_loss: validation_generator = None # start training parallel_model.fit_generator(generator=train_generator, steps_per_epoch=len(train_generator), epochs=args.epochs, verbose=1, callbacks=callbacks, validation_data=validation_generator)
def main(args=None): # parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # create object that stores backbone information backbone = models.backbone(args.backbone) # make sure keras is the minimum required version check_keras_version() # optionally choose specific GPU if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu keras.backend.tensorflow_backend.set_session(get_session()) # optionally load config parameters if args.config: args.config = read_config_file(args.config) # create the generators train_generator, validation_generator = create_generators( args, backbone.preprocess_image) # create the model if args.snapshot is not None: print('Loading model, this may take a second...') model = models.load_model(args.snapshot, backbone_name=args.backbone) training_model = model anchor_params = None if args.config and 'anchor_parameters' in args.config: anchor_params = parse_anchor_parameters(args.config) prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile( loss={ 'regression': losses.iou(), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=1e-5) # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True) ) else: weights = args.weights # default to imagenet if nothing else is specified if weights is None and args.imagenet_weights: weights = backbone.download_imagenet() print('Creating model, this may take a second...') model, training_model, prediction_model = create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), weights=weights, num_gpus=args.num_gpus, freeze_backbone=args.freeze_backbone, lr=args.lr, config=args.config) # print model summary # print(model.summary()) # this lets the generator compute backbone layer shapes using the actual backbone model if 'vgg' in args.backbone or 'densenet' in args.backbone: train_generator.compute_shapes = make_shapes_callback(model) if validation_generator: validation_generator.compute_shapes = train_generator.compute_shapes # create the callbacks callbacks = create_callbacks( model, training_model, prediction_model, validation_generator, args, ) if not args.compute_val_loss: validation_generator = None # start training return training_model.fit_generator( generator=train_generator, initial_epoch=0, steps_per_epoch=args.steps, epochs=args.epochs, verbose=1, callbacks=callbacks, workers=args.workers, use_multiprocessing=args.multiprocessing, max_queue_size=args.max_queue_size, validation_data=validation_generator)