class ResNet50_FasterRCNN:
    def __init__(self, pretrained=False):
        # Building our FasterRCNN model for objects detection
        backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained)
        num_classes = 4 + 1

        anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250),
                                           aspect_ratios=(0.7, 1.0, 1.3))
        self.model = FRCNN(backbone,
                           num_classes=num_classes,
                           rpn_anchor_generator=anchor_generator)

    def train(self):
        self.model.train()

    def to(self, device):
        self.model.to(device)

    def eval(self):
        self.model.eval()

    def parameters(self):
        return self.model.parameters()

    def get_state_dict(self):
        return self.model.state_dict()

    def set_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

    def fit_batch(self, images, target):
        return self.model(images, target)

    def predict_batch(self, images):
        return self.model(images)
示例#2
0
def train_mask():
    torch.manual_seed(1)
    dataset = unimib_data.UNIMIBDataset(get_transform(train=True))
    dataset_test = unimib_data.UNIMIBDataset(get_transform(train=False))
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=True,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False,
        collate_fn=utils.collate_fn)

    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    device = "cpu"
    num_classes = 74
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    for param in backbone:
        param.requires_grad = False
    backbone.out_channels = 1280
    model.backbone = backbone
    model = FasterRCNN(backbone,
                       num_classes=74)
    model = torch.nn.Sequential(
                                model.rpn,
                                model.roi_heads
                                )
    print(model)
    # for param in model.backbone:
    #     param.requires_grad = False
    # model = get_instance_segmentation_model(num_classes)
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
    num_epochs = 1
    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        # train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        # lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)
示例#3
0
def train(args):
    # use_cuda = args.num_gpus > 0
    # logger.debug("Number of gpus available - {}".format(args.num_gpus))
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if device.type == 'cuda':
        torch.cuda.manual_seed(args.seed)

    train_loader = _get_train_data_loader(args.resize)
    test_loader = _get_test_data_loader(args.resize)

    logger.debug("Processes {}/{} ({:.0f}%) of train data".format(
        len(train_loader.sampler), len(train_loader.dataset),
        100. * len(train_loader.sampler) / len(train_loader.dataset)))

    model = FasterRCNN().to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=args.lr)

    for epoch in range(1, args.epochs + 1):
        print("Epoch: ", epoch)

        model.train()

        for batch_idx, (batch_images,
                        batch_targets) in enumerate(train_loader):

            images = list(img.to(device) for img in batch_images)
            targets = [{k: v.to(device)
                        for k, v in t.items()} for t in batch_targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            model.zero_grad()
            losses.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0:
                logger.info(
                    'Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                        epoch, batch_idx * len(images),
                        len(train_loader.sampler),
                        100. * batch_idx / len(train_loader), losses.item()))

    save_model(model, args.model_directory, args.fn)
示例#4
0
class FasterRCNN_Encoder(nn.Module):
    def __init__(self, out_dim=None, fine_tune=False):
        super(FasterRCNN_Encoder, self).__init__()
        backbone = resnet_fpn_backbone('resnet50', False)
        self.faster_rcnn = FasterRCNN(backbone,
                                      num_classes=91,
                                      rpn_post_nms_top_n_train=200,
                                      rpn_post_nms_top_n_test=100)
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=True)
        self.faster_rcnn.load_state_dict(state_dict)

        # modify the last linear layer of the ROI pooling if there is
        # a special requirement of output size
        if out_dim is not None:
            self.faster_rcnn.roi_heads.box_head.fc7 = nn.Linear(
                in_features=1024, out_features=out_dim)

        # in captioning task, we may not want fine-tune faster-rcnn model
        if not fine_tune:
            for param in self.faster_rcnn.parameters():
                param.requires_grad = False

    def forward(self, images, targets=None):
        '''
        Forward propagation of faster-rcnn encoder
        Args:
            images: List[Tensor], a list of image data
            targets: List[Tensor], a list of ground-truth bounding box data,
                     used only in fine-tune
        Returns:
            proposal features after ROI pooling and RPN loss
        '''
        images, targets = self.faster_rcnn.transform(images, targets)
        # the base features produced by backbone network, i.e. resnet50
        features = self.faster_rcnn.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])
        # proposals produced by RPN, i.e. the coordinates of bounding box
        # which contain foreground objects
        proposals, proposal_losses = self.faster_rcnn.rpn(
            images, features, targets)
        # get the corresponding features of the proposals produced by RPN and perform roi pooling
        box_features = self.faster_rcnn.roi_heads.box_roi_pool(
            features, proposals, images.image_sizes)
        # project the features to shape (batch_size, num_boxes, feature_dim)
        box_features = self.faster_rcnn.roi_heads.box_head(box_features)
        return box_features, proposal_losses
示例#5
0
def model_creation(pretrain=True,
                   num_classes=5,
                   num_epoch=20,
                   device="cuda:0"):

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)
    num_classes = 5
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    backbone.out_channels = 1280
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)
    model = FasterRCNN(backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    if device == 'cuda:0':
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        if device == "cpu":
            print("cuda is no available")

    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    # let's train it for 10 epochs

    return model, optimizer, lr_scheduler, num_epoch,
示例#6
0
class FasterRCNNFood:
    def __init__(self,
                 backbone_name: str,
                 pretrained: bool = True,
                 finetune: bool = True,
                 num_classes: int = 2):
        self.__pretrained = pretrained
        self.__num_classes = num_classes
        self.__model_name = backbone_name
        backbone = build_backbone(backbone_name, pretrained, finetune)

        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                        output_size=7,
                                                        sampling_ratio=2)

        self.model = FasterRCNN(backbone=backbone,
                                num_classes=num_classes,
                                rpn_anchor_generator=anchor_generator,
                                box_roi_pool=roi_pooler)

        self.params = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = torch.optim.Adam(params=self.params,
                                          lr=0.005,
                                          weight_decay=0.0005)

        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer=self.optimizer, step_size=3, gamma=0.1)

    def train(self,
              data_loader: DataLoader,
              data_loader_test: DataLoader,
              num_epochs: int = 10,
              use_cuda: bool = True,
              epoch_save_ckpt: Union[int, list] = None,
              dir: str = None):
        """
        Method to train FasterRCNNFood model.
        Args:
            data_loader (torch.utils.data.DataLoader): data loader to train model on
            data_loader_test (torch.utils.data.DataLoader): data loader to evaluate model on
            num_epochs (int = 10): number of epoch to train model
            use_cuda (bool = True): use cuda or not
            epoch_save_ckpt (list or int): Epoch at which you want to save the model. If -1 save only last epoch.
            dir (str = "models/): Directory where model are saved under the name "{model_name}_{date}_ep{epoch}.pth"
        """
        if epoch_save_ckpt == -1:
            epoch_save_ckpt = [num_epochs - 1]
        if not dir:
            dir = "models"
        dir = Path(dir)
        dir.mkdir(parents=True, exist_ok=True)
        # choose device
        if use_cuda and torch.cuda.is_available():
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")

        # define dataset
        self.model.to(device)
        writer = SummaryWriter()

        for epoch in range(num_epochs):
            # train for one epoch, printing every 50 iterations
            train_one_epoch(self.model,
                            self.optimizer,
                            data_loader,
                            device,
                            epoch,
                            print_freq=50,
                            writer=writer)
            # update the learning rate
            self.lr_scheduler.step()
            # evaluate on the test dataset
            evaluate(self.model,
                     data_loader_test,
                     device=device,
                     writer=writer,
                     epoch=epoch)
            # save checkpoint
            if epoch in epoch_save_ckpt:
                self.save_checkpoint(dir.as_posix(), epoch)
        writer.close()
        print("That's it!")

    def save_checkpoint(self, dir: str, epoch: int):
        """
        Save a model checkpoint at a given epoch.
        Args:
            dir: dir folder to save the .pth file
            epoch: epoch the model is
        """
        state = {
            'epoch': epoch + 1,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'num_classes': self.__num_classes,
            'pretrained': self.__pretrained,
            "model_name": self.__model_name
        }
        now = datetime.now()
        filename = "{model_name}_{date}_ep{epoch}.pth".format(
            model_name=self.__model_name,
            date=now.strftime("%b%d_%H-%M"),
            epoch=epoch)
        torch.save(state, Path(dir) / filename)
        "Checkpoint saved : {}".format(Path(dir) / filename)

    def predict(self, dataset, idx):
        img, _ = dataset[idx]
        img.to("cpu")
        self.model.eval()
        self.model.to("cpu")
        pred = self.model([img])
        return img, pred[0]

    @staticmethod
    def load_checkpoint(filename: str,
                        cuda: bool = True) -> ("FasterRCNNFood", int):
        """
        Load a model checkpoint to continue training.
        Args:
            filename (str): filename/path of the checkpoint.pth
            cuda (bool = True): use cuda

        Returns:
            (FasterRCNNFood) model
            (int) number of epoch + 1 the model was trained with
        """
        device = torch.device("cuda") if (
            cuda and torch.cuda.is_available()) else torch.device("cpu")
        start_epoch = 0
        if Path(filename).exists():
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename, map_location=device)
            # Load params
            pretrained = checkpoint['pretrained']
            num_classes = checkpoint["num_classes"]
            start_epoch = checkpoint['epoch']
            model_name = checkpoint['model_name']
            # Build model key/architecture
            model = FasterRCNNFood(model_name, pretrained, num_classes)
            # Update model and optimizer
            model.model.load_state_dict(checkpoint['state_dict'])
            model.optimizer.load_state_dict(checkpoint['optimizer'])

            model.model = model.model.to(device)
            # now individually transfer the optimizer parts...
            for state in model.optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.to(device)

            print("=> loaded checkpoint '{}' (epoch {})".format(
                filename, checkpoint['epoch']))
            return model, start_epoch
        else:
            print("=> no checkpoint found at '{}'".format(filename))

    @staticmethod
    def load_for_inference(filename: str,
                           cuda: bool = True) -> "FasterRCNNFood":
        """
        Load a model checkpoint to make inference.
        Args:
            filename (str): filename/path of the checkpoint.pth
            cuda (bool = True): use cuda
        Returns:
            (FasterRCNNFood) model
        """
        device = torch.device("cuda") if (
            cuda and torch.cuda.is_available()) else torch.device("cpu")
        if Path(filename).exists():
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename, map_location=device)
            # Load params
            pretrained = checkpoint['pretrained']
            num_classes = checkpoint["num_classes"]
            model_name = checkpoint['model_name']
            # Build model key/architecture
            model = FasterRCNNFood(model_name, pretrained, num_classes)
            # Update model and optimizer
            model.model.load_state_dict(checkpoint['state_dict'])
            model.model = model.model.to(device)
            model.model = model.model.eval()

            print("=> loaded checkpoint '{}'".format(filename))
            return model
        else:
            print("=> no checkpoint found at '{}'".format(filename))
示例#7
0
# In[10]:

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params,
                            lr=0.005,
                            momentum=0.9,
                            weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

# In[11]:

# let's train it for 10 epochs
num_epochs = 10
示例#8
0
    model_res34 = torchvision.models.resnet34(pretrained=False)

    network_helpers.copy_weights_between_models(sim_clr, model_res34)

    modules = list(model_res34.children())[:-1]
    backbone = nn.Sequential(*modules)
    backbone.out_channels = 512
    model_fnn = FasterRCNN(backbone=backbone, num_classes=10)

    # ImageNet
    #in_features = model_fnn.roi_heads.box_predictor.cls_score.in_features
    #model_fnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=10)

    model_fnn.to(device)
    params_fnn = [p for p in model_fnn.parameters() if p.requires_grad]

    params_cnn_fnn = list(params_cnn) + list(params_fnn)

    optimizer = optim.SGD(params_cnn_fnn,
                          lr=args.lr,
                          momentum=0.6,
                          weight_decay=0.0005)

    lr_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer,
                                                     base_lr=1e-3,
                                                     max_lr=6e-3)
    # lr_scheduler = CosineAnnealingLR(optimizer, 50, eta_min=1e-3, last_epoch=-1)

    model_save_path = args.save_final_model_path
示例#9
0
    # Start to traning FASTER RCNN
    backbone = backboneNet_efficient()  # use efficientnet as our backbone
    backboneFPN = backboneWithFPN(backbone)  # add FPN

    anchor_generator = AnchorGenerator(cfg.anchor_sizes, cfg.aspect_ratios)

    model_ft = FasterRCNN(backboneFPN,
                          num_classes=cfg.num_classes,
                          rpn_anchor_generator=anchor_generator,
                          min_size=cfg.min_size,
                          max_size=cfg.max_size)

    model_ft.to(device)

    optimizer_ft = optim.SGD(model_ft.parameters(),
                             lr=cfg.learning_rate,
                             momentum=cfg.momentum,
                             weight_decay=cfg.weight_decay)

    lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft,
                                            milestones=cfg.milestones,
                                            gamma=cfg.gamma)

    model_ft = train_model(model_ft,
                           train_loader,
                           valid_loader,
                           optimizer_ft,
                           lr_scheduler,
                           num_epochs=cfg.epochs)
示例#10
0
def main():

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)

    num_classes = 2

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    backbone = torchvision.models.mobilenet_v2(pretrained=True).features

    backbone.out_channels = 1280

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    # Train classifier
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    dataset = OysterDataset(
        "/Users/darwin/Downloads/Bay project/Training",
        "/Users/darwin/Downloads/Bay project/Training/TrainingDetectionLabels2.csv",
        get_transform(train=True))

    dataset_test = OysterDataset(
        "/Users/darwin/Downloads/Bay project/Training",
        "/Users/darwin/Downloads/Bay project/Training/TrainingDetectionLabels2.csv",
        get_transform(train=False))

    indices = torch.randperm(len(dataset)).tolist()

    dataset = torch.utils.data.Subset(dataset, indices[:100])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=2,
                                              shuffle=False,
                                              num_workers=4,
                                              collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                                   batch_size=2,
                                                   shuffle=False,
                                                   num_workers=4,
                                                   collate_fn=utils.collate_fn)

    model = get_model_instance_segmentation(num_classes)

    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.8,
                                weight_decay=0.0005)

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    num_epochs = 10

    for epoch in range(num_epochs):
        train_one_epoch(model,
                        optimizer,
                        data_loader,
                        device,
                        epoch,
                        print_freq=10)

        lr_scheduler.step()

        evaluate(model, data_loader_test, device=device)

    print("That's it!")
示例#11
0
backbone = torch.nn.Sequential(net.conv1,net.bn1,net.relu,net.maxpool,
                               net.layer1,net.layer2,net.layer3,net.layer4)

backbone.out_channels = 512

anchor_generator = AnchorGenerator(sizes=((32,64,128,256,512)),
                                   aspect_ratios=((0.5,1.0,2.0)))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

model = FasterRCNN(backbone,
                   num_classes=91,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
model = model.cuda()



criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.9,weight_decay=5e-4)
solver = Solver(model,"./models/rcnn_4.pth",trainLoader,valLoader,criterion,optimizer,
                logfile="./logs/rcnn_resnet18.log",
                print_freq=20,save_name="rcnn")


solver.train(4)



示例#12
0
class FasterRCNNMODEL:
    #TODO: Later on enable passing params params

    def __init__(self, model_params=None):
        self.params = model_params
        self.model = None
        self.optimizer = None
        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')

    def set_backbone(self, backbone):
        """
        backbone is a string containing the backbone we want to use in the model. add more options
        """
        if 'vgg' in backbone.lower():
            "to somthing-check for options"
        elif 'mobilenet_v2' in backbone.lower():
            self.backbone = torchvision.models.mobilenet_v2(
                pretrained=True).features
            self.backbone.out_channels = 1280
        elif 'resnet50' in backbone.lower():
            self.backbone = torchvision.models.resnet50(
                pretrained=True).features
            self.backbone.out_channels = 256

    def set_model(self):
        """
        Set model and determine configuration
        :return: None, generate self.model to be used for training and testing
        """
        # Default values: box_score_thresh = 0.05, box_nms_thresh = 0.5
        kwargs = {
            'box_score_thresh': 0.3,
            'box_nms_thresh': 0.3,
            'box_detections_per_img': 6
        }
        # self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False,
        #                                                                   pretrained_backbone=True,
        #                                                                   **kwargs)
        self.model = FasterRCNN(self.backbone, num_classes=7, **kwargs)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        num_classes = 7
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # Allow Multiple GPUs:
        # if torch.cuda.device_count() > 1:
        #     self.model = nn.DataParallel(self.model)

        self.model = self.model.to(device)

        if self.params is None:
            params = [p for p in self.model.parameters() if p.requires_grad]
        else:
            # TODO: Enable user defined model params
            pass

        self.optimizer = torch.optim.SGD(params, lr=0.01)

    def train_model(self, train_loader, num_epochs):
        """
        Train (only!) of the model
        :param train_loader: DataLoader object
        :param num_epochs: int. Number of epochs to train the model
        :return: None,
        """
        self.model.train()  # Set to training mode
        for epoch in range(num_epochs):
            for images, targets in train_loader:
                images = list(image.to(self.device) for image in images)
                targets = [{k: v.to(self.device)
                            for k, v in t.items()} for t in targets]

                # Zero Gradients
                self.optimizer.zero_grad()

                # self.model = self.model.double()

                # Calculate Loss
                loss_dict = self.model(images, targets)  # what happens here?
                losses = sum(loss for loss in loss_dict.values())
                losses.backward()

                # Update weights
                self.optimizer.step()

            print('Train Loss = {:.4f}'.format(losses.item()))

    def train_eval_model(self, train_loader, val_loader, num_epochs):
        """
        Train model and evaluate performance after each epoch
        :param train_loader: DataLoader object. Training images and targets
        :param val_loader: DataLoader object. validation images and targets
        :param num_epochs: int. Number of epochs for training and validation
        :return:
        """
        # For evaluation
        imgs_name_list = []
        bbox_list = []
        labels_list = []

        for epoch in range(num_epochs):
            train_loss = 0
            val_loss = 0
            self.model.train()  # Set to training mode
            with torch.set_grad_enabled(True):
                for images, targets in train_loader:
                    # Pass data to GPU
                    images = list(image.to(self.device) for image in images)
                    targets = [{k: v.to(self.device)
                                for k, v in t.items()} for t in targets]

                    # Zero Gradients
                    self.optimizer.zero_grad()

                    # self.model = self.model.double()

                    # Calculate Loss
                    loss_dict = self.model(images,
                                           targets)  # what happens here?
                    losses = sum(loss for loss in loss_dict.values())
                    train_loss += losses.item() * len(images)

                    # Backward Prop & Update weights
                    losses.backward()
                    self.optimizer.step()

                print('Train Loss = {:.4f}'.format(train_loss /
                                                   len(train_loader.dataset)))

            # TODO: Calculate Dice and IoU loss for it

            with torch.no_grad():
                for idx, (imgs_name, images, targets) in enumerate(val_loader):
                    self.model.train()
                    images = list(image.to(self.device) for image in images)
                    targets = [{k: v.to(self.device)
                                for k, v in t.items()} for t in targets]

                    loss_dict = self.model(images, targets)
                    losses = sum(loss for loss in loss_dict.values())
                    val_loss += losses.item() * len(images)

                    if epoch == num_epochs - 1:
                        self.model.eval()  # Set model to evaluate performance
                        targets = self.model(images)

                        # Think of moving all this into gen_out_file - Looks nicer
                        imgs_name_list.extend(imgs_name)
                        bbox_list.extend([
                            target['boxes'].int().cpu().tolist()
                            for target in targets
                        ])
                        labels_list.extend([
                            target['labels'].int().cpu().tolist()
                            for target in targets
                        ])
                    """Optional - SEE the performance on the second last batch"""
                    if (epoch == num_epochs - 1) and idx == (len(val_loader) -
                                                             2):
                        self.model.eval()  # Set model to evaluate performance
                        targets = self.model(images)
                        MiscUtils.view(images,
                                       targets,
                                       k=len(images),
                                       model_type='faster_rcnn')

                DataUtils.gen_out_file('output_file.txt', imgs_name_list,
                                       bbox_list, labels_list)
                print('Validation Loss = {:.4f}'.format(
                    val_loss / len(val_loader.dataset)))
示例#13
0
def main(network):
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2

    #dataset = torch.utils.data.Subset(TBdata,[range(len(TBdata))])
    indices = torch.randperm(len(TBdata)).tolist()
    dataset = torch.utils.data.Subset(TBdata, indices[:])
    indices_ = torch.randperm(len(TBdata_test)).tolist()
    dataset_val = torch.utils.data.Subset(TBdata_test, indices_[:])

    # get the model using our helper function
    #model = get_model_instance_segmentation(num_classes)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=8,
                                             sampler=None,
                                             num_workers=0,
                                             collate_fn=collate_fn)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=8,
                                                 sampler=None,
                                                 num_workers=0,
                                                 collate_fn=collate_fn)

    #Calculated statistics on training data:
    #Transform parameters
    min_size = 550
    max_size = 700
    image_means = [0.9492, 0.9492, 0.9492]
    image_stds = [0.1158, 0.1158, 0.1158]

    if network == 'resnet50':
        backbone = resnet_fpn_backbone('resnet50', True)
        model = FasterRCNN(backbone,
                           num_classes,
                           min_size=min_size,
                           max_size=max_size,
                           image_mean=image_means,
                           image_std=image_stds)

    elif network == 'resnet18':
        backbone = resnet_fpn_backbone('resnet18', True)
        model = FasterRCNN(backbone,
                           num_classes,
                           min_size=min_size,
                           max_size=max_size,
                           image_mean=image_means,
                           image_std=image_stds)

    elif network == 'resnet152':
        backbone = resnet_fpn_backbone('resnet152', True)
        model = FasterRCNN(backbone,
                           num_classes,
                           min_size=min_size,
                           max_size=max_size,
                           image_mean=image_means,
                           image_std=image_stds)

    elif network == 'RPNresnet50':
        backbone = resnet_fpn_backbone('resnet50', True)
        model = RPN_custom(backbone,
                           num_classes,
                           min_size=min_size,
                           max_size=max_size,
                           image_mean=image_means,
                           image_std=image_stds)

    elif network == 'RPNresnet152':
        backbone = resnet_fpn_backbone('resnet152', True)
        model = RPN_custom(backbone,
                           num_classes,
                           min_size=min_size,
                           max_size=max_size,
                           image_mean=image_means,
                           image_std=image_stds)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    num_epochs = 10
    Ls = {
        'total loss': [],
        'loss_classifier': [],
        'loss_box_reg': [],
        'loss_objectness': [],
        'loss_rpn_box_reg': []
    }
    Ls_val = {
        'total loss': [],
        'loss_classifier': [],
        'loss_box_reg': [],
        'loss_objectness': [],
        'loss_rpn_box_reg': []
    }

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model,
                        optimizer,
                        dataloader,
                        device,
                        epoch,
                        print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        #evaluate(model, dataloader_test, device=device)
        Ls_val = record_losses(model, dataloader_val, device, Ls_val, network)

        #record losses
        Ls = record_losses(model, dataloader, device, Ls, network)

    #If folder does not exist already, create it
    output_loc = f'./{network}/'

    if not os.path.exists(output_loc):
        os.makedirs(output_loc)

    torch.save(model.state_dict(), output_loc + 'model.pt')

    print("That's it!")
    return Ls, Ls_val, num_epochs
示例#14
0
            self.targets.append(d)

    def __len__(self):
        return len(self.txt_fld)

    def __getitem__(self, idx):
        # w,h = self.images[idx]
        return self.images[idx], self.targets[idx]


root_url = '/home/dung/DocData/cp/145'
dataset = DocDataset(root_url)
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=0)
# criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)


for i in range(1000):
    print('Epoch {}\n'.format(i))
    for j, (images, targets) in enumerate(data_loader):
        images = images.to(device)
        a = {}
        a['boxes'] = targets['boxes'][0].to(device)
        a['labels'] = targets['labels'][0].to(device)
        output = model(images, [a])
        losses = sum(loss for loss in output.values())
        if j % 30 == 0:
            print('Step {} -- loss_classifier = {} -- loss_box_reg = {} -- loss_objectness = {} -- loss_rpn_box_reg = {}\n'.format(j,
                                                                                                                                   output['loss_classifier'].item(), output['loss_box_reg'].item(), output['loss_objectness'].item(), output['loss_rpn_box_reg'].item()))
        optimizer.zero_grad()
示例#15
0
def main(args):
    detection_util.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)
    torch.multiprocessing.set_sharing_strategy('file_system')

    #train loader
    mean = (0.4914, 0.4822, 0.4465)
    std = (0.2023, 0.1994, 0.2010)
    normalize = transforms.Normalize(mean=mean, std=std)
    train_transform = transforms.Compose([transforms.ToTensor(), normalize])

    train_dataset = SVHNFull(root='./datasets/SVHN_full',
                             transform=train_transform,
                             download=False)
    test_dataset = SVHNFull(root='./datasets/SVHN_full',
                            split='test',
                            transform=train_transform,
                            download=False)
    if args.distributed:
        train_sampler = data.distributed.DistributedSampler(train_dataset)
        test_sampler = data.distributed.DistributedSampler(test_dataset)
    else:
        train_sampler = None
        test_sampler = None
    print(args.batch_size)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=(train_sampler is None),
                                   num_workers=args.workers,
                                   pin_memory=True,
                                   sampler=train_sampler,
                                   collate_fn=train_dataset.collate_fn)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=(test_sampler is None),
                                  num_workers=args.workers,
                                  pin_memory=True,
                                  sampler=test_sampler,
                                  collate_fn=test_dataset.collate_fn)

    pre_mod = ResNetDetection(name='resnet18')
    #pre_mod = SupResNetDetection(name='resnet18')

    if args.pretrained:
        ckpt = torch.load(args.pretrained, map_location='cpu')
        state_dict = ckpt['model']

        new_state_dict = {}
        for k, v in state_dict.items():
            k = k.replace("module.", "")
            new_state_dict[k] = v
        state_dict = new_state_dict
        pre_mod.load_state_dict(state_dict)

    backbone = pre_mod.encoder
    backbone.out_channels = 512
    anchor_generator = AnchorGenerator(sizes=((16, 32, 128), ),
                                       aspect_ratios=((0.5, 1.0), ))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=3,
                                                    sampling_ratio=2)

    model = FasterRCNN(backbone,
                       num_classes=10,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler,
                       rpn_batch_size_per_image=64)
    model.to(device)
    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train(model, optimizer, train_loader, device, epoch, args.print_freq)
        lr_scheduler.step()
        #if epoch > 15:
        evaluate(model, test_loader, device=device)
        detection_util.save_on_master(
            {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch
            }, os.path.join('save/detector/', 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
示例#16
0
def main():
    parser = argparse.ArgumentParser(description='VISUM 2019 competition - baseline training script', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-d', '--data_path', default='/home/master/dataset/train', metavar='', help='data directory path')
    parser.add_argument('-m', '--model_path', default='./baseline.pth', metavar='', help='model file (output of training)')
    parser.add_argument('--epochs', default=50, type=int, metavar='', help='number of epochs')
    parser.add_argument('--lr', default=0.005, type=float, metavar='', help='learning rate')
    parser.add_argument('--l2', default=0.0005, type=float, metavar='', help='L-2 regularization')
    args = vars(parser.parse_args())

    # Data augmentation
    def get_transform(train):
        transforms = []
        # converts the image, a PIL image, into a PyTorch Tensor
        transforms.append(T.ToTensor())
        if train:
            # during training, randomly flip the training images
            # and ground-truth for data augmentation
            transforms.append(T.RandomHorizontalFlip(0.5))
            transforms.append(torchvision.transforms.ColorJitter(contrast=.3))
            transforms.append(torchvision.transforms.ColorJitter(brightness=.4))
        return T.Compose(transforms)

    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    backbone.out_channels = 1280

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)


    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                    num_classes=10,
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler)

    # See the model architecture
    print(model)

    # use our dataset and defined transformations
    dataset = VisumData(args['data_path'], modality='rgb', transforms=get_transform(train=True))
    dataset_val = VisumData(args['data_path'], modality='rgb', transforms=get_transform(train=False))

    # split the dataset in train and test set
    torch.manual_seed(1)
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-100])
    dataset_val = torch.utils.data.Subset(dataset_val, indices[-100:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=0,
        collate_fn=utils.collate_fn)

    data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=2, shuffle=False, num_workers=0,
        collate_fn=utils.collate_fn)

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=args['lr'],
                                momentum=0.9, weight_decay=args['l2'])

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=10,
                                                   gamma=0.5)

    for epoch in range(args['epochs']):
        # train for one epoch, printing every 10 iterations
        epoch_loss = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=nvid)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluator = evaluate(model, data_loader_val, device=device)

    torch.save(model, args['model_path'])
示例#17
0
class FasterRCNNResnet50FPN(LightningModule):
    def __init__(self, conf=None, *args, **kwargs):
        super().__init__()
        self.hparams = conf
        resnest = torch.hub.load('zhanghang1989/ResNeSt',
                                 'resnest50',
                                 pretrained=True)
        backbone = Sequential(*list(resnest.children())[:-3])
        backbone.out_channels = 1024

        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                        output_size=[7, 7],
                                                        sampling_ratio=2)
        self.model = FasterRCNN(backbone,
                                num_classes=2,
                                rpn_anchor_generator=anchor_generator,
                                box_roi_pool=roi_pooler)

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        scheduler = None
        params = [p for p in self.model.parameters() if p.requires_grad]
        optimizer = RangerLars(params)
        # noinspection PyUnresolvedReferences
        if self.hparams.Train.scheduler == 'OneCycleLR':
            scheduler = OneCycleLR(
                optimizer,
                max_lr=self.hparams.Train.lr,
                epochs=self.hparams.Train.epochs,
                steps_per_epoch=self.hparams.Train.steps_per_epoch,
                pct_start=self.hparams.Train.Schedulers.OneCycleLR.pct_start,
                anneal_strategy=self.hparams.Train.Schedulers.OneCycleLR.
                anneal_strategy,
                cycle_momentum=False,
                div_factor=self.hparams.Train.Schedulers.OneCycleLR.div_factor)
        elif self.hparams.Train.scheduler == 'NCycleLR':
            scheduler = NCycleLR(
                optimizer,
                max_lr=self.hparams.Train.lr,
                n=self.hparams.Train.Schedulers.NCycleLR.n,
                lr_factor=self.hparams.Train.Schedulers.NCycleLR.lr_factor,
                epochs=self.hparams.Train.epochs,
                steps_per_cycle=self.hparams.Train.Schedulers.NCycleLR.
                steps_per_cycle,
                pct_start=self.hparams.Train.Schedulers.NCycleLR.pct_start,
                anneal_strategy=self.hparams.Train.Schedulers.NCycleLR.
                anneal_strategy,
                cycle_momentum=False,
                div_factor=self.hparams.Train.Schedulers.NCycleLR.div_factor)
        elif self.hparams.Train.scheduler == 'CyclicLR':
            scheduler = CyclicLR(
                optimizer,
                base_lr=self.hparams.Train.lr / 1e5,
                max_lr=self.hparams.Train.lr,
                step_size_up=self.hparams.Train.steps_per_epoch,
                mode=self.hparams.Train.Schedulers.CyclicLR.mode,
                gamma=self.hparams.Train.Schedulers.CyclicLR.gamma,
                cycle_momentum=False)
        elif self.hparams.Train.scheduler == 'ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(
                optimizer,
                factor=self.hparams.Train.Schedulers.ReduceLROnPlateau.factor,
                patience=self.hparams.Train.Schedulers.ReduceLROnPlateau.
                patience,
                verbose=True)
        schedulers = [{
            'scheduler': scheduler,
            'interval': self.hparams.Train.Schedulers.interval
        }]
        return [optimizer], schedulers

    def training_step(self, batch, batch_idx):
        images, targets, _ = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        # FasterRCNN model returns dict with classification and regression loss
        loss_dict = self.model(images, targets)
        total_loss = sum(loss for loss in loss_dict.values())
        lr = self.trainer.lr_schedulers[0]['scheduler'].optimizer.param_groups[
            0]['lr']
        return {
            'loss': total_loss,
            'log': loss_dict,
            'progress_bar': {
                'lr': lr
            }
        }

    def validation_step(self, batch, batch_idx):
        images, targets, _ = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        outputs = self.model(images, targets)
        precisions = np.zeros((self.hparams.Train.batch_size, ))
        for i, output in enumerate(outputs):
            scores = torch.argsort(output['scores'], descending=True)
            boxes = output['boxes'][scores].data.cpu().numpy().astype(np.int32)
            gt = targets[i]['boxes'].data.cpu().numpy().astype(np.int32)
            precision = calculate_image_precision(gt, boxes)
            precisions[i] = precision
        return {
            'val_loss':
            torch.tensor([1 - precisions], dtype=torch.float32, device='cuda'),
            'val_map':
            torch.tensor([precisions], dtype=torch.float32, device='cuda')
        }

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_precision = torch.stack([x['val_map'] for x in outputs]).mean()
        logs = {'val_loss': avg_loss, 'val_map': avg_precision}
        return {'avg_val_loss': avg_loss, 'log': logs, 'progress_bar': logs}

    def test_step(self, batch, batch_idx):
        if batch_idx == 0:
            images, targets, _ = batch
            targets = [{k: v for k, v in t.items()} for t in targets]
            outputs = self.model(images, targets)
            for i, output in enumerate(outputs):
                image = images[i].permute(1, 2, 0).cpu().numpy()
                scores = torch.argsort(output['scores'], descending=True)
                boxes = output['boxes'][scores].data.cpu().numpy().astype(
                    np.int32)
                gt = targets[i]['boxes'].data.cpu().numpy().astype(np.int32)
                results = get_image_with_results(image, boxes, gt)
                self.logger.experiment.add_image(
                    f"bb_test(RED: Predicted; BLUE: Ground-truth)/image{i}",
                    results,
                    dataformats='HWC')
        return self.validation_step(batch, batch_idx)

    def test_epoch_end(self, outputs):
        test_results = self.validation_epoch_end(outputs)
        test_results = {
            k.replace('val', 'test'): v.cpu().numpy().tolist()
            for k, v in test_results['log'].items()
        }
        return {'log': test_results}
示例#18
0
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
    transforms.Resize(300),
    transforms.CenterCrop(300)
])

dataset = CocoDetection('/datasets/Coco/train2017',
                        '/datasets/Coco/annotations/instances_train2017.json',
                        transform=myTransform)
data = dataset[0]
x, y = data
print(x.shape)
print(y[0].keys())

dataloader = DataLoader(dataset, batch_size=32)
validset = CocoDetection('/datasets/Coco/val2017',
                         '/datasets/Coco/annotations/instances_val2017.json')
validloader = DataLoader(validset, batch_size=32)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


def train_one_epoch(dataloader):
    for i, data in dataloader:
        x, y = data
        print(x.shape)
        print(y.shape)


if __name__ == '__main__':
    train_one_epoch(dataloader)
示例#19
0
BATCH_SIZE = 4
EPOCH = 300
THRESH = 0.9
IOU = 0.3
CLASSES = 3
DEVICE = torch.device("cuda")

anchor_generator = AnchorGenerator(sizes=((32, 64), ),
                                   aspect_ratios=((0.6, 1.0, 1.6), ))
backbone = torchvision.models.vgg19(pretrained=False).features
backbone.out_channels = 512
model = FasterRCNN(backbone,
                   num_classes=CLASSES,
                   rpn_anchor_generator=anchor_generator)
model.to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
train_dataset = Dataset()
test_dataset = Dataset(training=False)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           pin_memory=True,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=BATCH_SIZE,
                                          shuffle=False,
                                          drop_last=False)
for epoch in range(EPOCH):
    for index, (data, target) in enumerate(train_loader):
        model.train()
        data = data.to(DEVICE)
示例#20
0
class VAEFasterRCNN(ObjectDetectionModel):
    def __init__(self, hparams):
        super(VAEFasterRCNN, self).__init__(hparams)

        self.resnet_style = hparams.resnet_style
        self.threshold = hparams.threshold

        self.vae = vae(resnet_style=self.resnet_style, pretrained=False)
        #self.farcnn = fasterrcnn_resnet50_fpn(pretrained_backbone=False, num_classes=1)
        resnet_net = torchvision.models.resnet18(pretrained=False)
        modules = list(resnet_net.children())[:-2]
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 512
        self.farcnn = FasterRCNN(backbone=backbone, num_classes=1)
        #https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn
        self.vae.load_state_dict(
            torch.load("submission2_object_detection_state_dict.pt"))

        self.criterion = self.loss_function

    def loss_function(self, pred_maps, road_images, mu, logvar):
        criterion = nn.BCELoss()
        CE = criterion(pred_maps.squeeze(), road_images.float().squeeze())
        KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
        return 0.9 * CE + 0.1 * KLD, CE, KLD

    def process_batch(self, batch):
        samples, targets, tar_sems, road_images = super(
            VAEFasterRCNN, self).process_batch(batch)
        targets_farcnn = []
        for i in range(len(samples)):
            d = {}
            bbs = targets[i]["bounding_box"]
            gt_boxes = torch.zeros((bbs.shape[0], 4)).to(bbs.device)
            for j, bb in enumerate(bbs):
                gt_boxes[j] = torch.FloatTensor([
                    bb[0].min().item() * 10 + 400,
                    -(bb[1].max().item() * 10) + 400,
                    bb[0].max().item() * 10 + 400,
                    -(bb[1].min().item() * 10) + 400
                ])
                #logging.info("gt_box: ", gt_boxes[j])
            d['boxes'] = gt_boxes
            d['labels'] = torch.zeros((bbs.shape[0]),
                                      dtype=torch.int64).to(bbs.device)
            targets_farcnn.append(d)
        return samples, targets, tar_sems, road_images, targets_farcnn

    def training_step(self, batch, batch_idx):
        samples, targets, tar_sems, road_images, targets_farcnn = self.process_batch(
            batch)

        pred_maps, mu, logvar, farcnn_loss = self.forward(
            samples, targets_farcnn)
        train_loss, CE, KLD = self.criterion(pred_maps, tar_sems, mu, logvar)

        f_loss = 0
        for key, value in farcnn_loss.items():
            f_loss += value.item()

        train_loss += f_loss
        self.logger.log_metrics(
            {
                "train_loss": train_loss / len(samples),
                "train_CE": CE / len(samples),
                "train_KLD": KLD / len(samples),
                "train_farcnn_loss": f_loss
            }, self.global_step)

        return {"loss": train_loss, "n": len(samples)}

    def forward(self, imgs, gt_boxes=None):
        pred_maps, mu, logvar = self.vae(imgs)
        if self.training:
            rcn_out = self.farcnn(
                (pred_maps.unsqueeze(1) > self.threshold).float(), gt_boxes)
        else:
            rcn_out = self.farcnn(
                (pred_maps.unsqueeze(1) > self.threshold).float())

        return pred_maps, mu, logvar, rcn_out

    def training_epoch_end(self, outputs):
        avg_training_loss = 0
        n = 0
        for out in outputs:
            avg_training_loss += out["loss"]
            n += out["n"]

        avg_training_loss /= n

        return {"log": {"avg_train_loss": avg_training_loss}}

    def validation_step(self, batch, batch_idx):
        samples, targets, tar_sems, road_images, targets_farcnn = self.process_batch(
            batch)

        pred_maps, mu, logvar, pred_boxes = self.forward(samples)

        threat_score = self.get_threat_score(pred_boxes, targets)

        return {"val_ts": threat_score, "n": len(samples)}

    def validation_epoch_end(self, outputs):
        avg_val_ts = 0
        n = 0
        for out in outputs:
            avg_val_ts += out["val_ts"]
            n += out["n"]

        if n > 0:
            avg_val_ts /= n
        else:
            avg_val_ts = 0

        return {
            "val_ts": avg_val_ts,
            "log": {
                "avg_val_ts": avg_val_ts
            },
            "progress_bar": {
                "avg_val_ts": avg_val_ts
            }
        }

    def get_threat_score(self, pred_boxes, targets):
        threat_score = 0
        for preds, target in zip(pred_boxes, targets):
            bbs_pred = preds["boxes"]
            if bbs_pred.shape[0] > 0:
                actual_boxes = torch.zeros((bbs_pred.shape[0], 2, 4))
                for i, bb in enumerate(bbs_pred):
                    x_min, x_max = (bb[0] - 400) / 10, (bb[2] - 400) / 10
                    y_min, y_max = -(bb[1] - 400) / 10, -(bb[3] - 400) / 10
                    actual_boxes[i] = torch.FloatTensor(
                        [[x_max, x_max, x_min, x_min], y_max, y_min, y_max,
                         y_min])
                logging.info("Predicted boxes:", actual_boxes)
                logging.info("True boxes: ", target["bounding_box"])
            else:
                actual_boxes = torch.zeros((1, 2, 4))

            ts_road_map = compute_ats_bounding_boxes(
                actual_boxes.cpu(), target["bounding_box"].cpu())
            threat_score += ts_road_map

        return threat_score

    def configure_optimizers(self):
        optimizer = optim.Adam(list(self.vae.parameters()) +
                               list(self.farcnn.parameters()),
                               lr=self.learning_rate,
                               weight_decay=self.weight_decay)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=1,
                                              gamma=0.97)

        return [optimizer], [scheduler]