class ResNet50_FasterRCNN: def __init__(self, pretrained=False): # Building our FasterRCNN model for objects detection backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained) num_classes = 4 + 1 anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250), aspect_ratios=(0.7, 1.0, 1.3)) self.model = FRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator) def train(self): self.model.train() def to(self, device): self.model.to(device) def eval(self): self.model.eval() def parameters(self): return self.model.parameters() def get_state_dict(self): return self.model.state_dict() def set_state_dict(self, state_dict): self.model.load_state_dict(state_dict) def fit_batch(self, images, target): return self.model(images, target) def predict_batch(self, images): return self.model(images)
def train_mask(): torch.manual_seed(1) dataset = unimib_data.UNIMIBDataset(get_transform(train=True)) dataset_test = unimib_data.UNIMIBDataset(get_transform(train=False)) indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:-50]) dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:]) # define training and validation data loaders data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=True, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, shuffle=False, collate_fn=utils.collate_fn) # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') device = "cpu" num_classes = 74 model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) backbone = torchvision.models.mobilenet_v2(pretrained=True).features for param in backbone: param.requires_grad = False backbone.out_channels = 1280 model.backbone = backbone model = FasterRCNN(backbone, num_classes=74) model = torch.nn.Sequential( model.rpn, model.roi_heads ) print(model) # for param in model.backbone: # param.requires_grad = False # model = get_instance_segmentation_model(num_classes) model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler which decreases the learning rate by # 10x every 3 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) num_epochs = 1 for epoch in range(num_epochs): # train for one epoch, printing every 10 iterations # train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10) # update the learning rate # lr_scheduler.step() # evaluate on the test dataset evaluate(model, data_loader_test, device=device)
def train(args): # use_cuda = args.num_gpus > 0 # logger.debug("Number of gpus available - {}".format(args.num_gpus)) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if device.type == 'cuda': torch.cuda.manual_seed(args.seed) train_loader = _get_train_data_loader(args.resize) test_loader = _get_test_data_loader(args.resize) logger.debug("Processes {}/{} ({:.0f}%) of train data".format( len(train_loader.sampler), len(train_loader.dataset), 100. * len(train_loader.sampler) / len(train_loader.dataset))) model = FasterRCNN().to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=args.lr) for epoch in range(1, args.epochs + 1): print("Epoch: ", epoch) model.train() for batch_idx, (batch_images, batch_targets) in enumerate(train_loader): images = list(img.to(device) for img in batch_images) targets = [{k: v.to(device) for k, v in t.items()} for t in batch_targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) model.zero_grad() losses.backward() optimizer.step() if batch_idx % args.log_interval == 0: logger.info( 'Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format( epoch, batch_idx * len(images), len(train_loader.sampler), 100. * batch_idx / len(train_loader), losses.item())) save_model(model, args.model_directory, args.fn)
class FasterRCNN_Encoder(nn.Module): def __init__(self, out_dim=None, fine_tune=False): super(FasterRCNN_Encoder, self).__init__() backbone = resnet_fpn_backbone('resnet50', False) self.faster_rcnn = FasterRCNN(backbone, num_classes=91, rpn_post_nms_top_n_train=200, rpn_post_nms_top_n_test=100) state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=True) self.faster_rcnn.load_state_dict(state_dict) # modify the last linear layer of the ROI pooling if there is # a special requirement of output size if out_dim is not None: self.faster_rcnn.roi_heads.box_head.fc7 = nn.Linear( in_features=1024, out_features=out_dim) # in captioning task, we may not want fine-tune faster-rcnn model if not fine_tune: for param in self.faster_rcnn.parameters(): param.requires_grad = False def forward(self, images, targets=None): ''' Forward propagation of faster-rcnn encoder Args: images: List[Tensor], a list of image data targets: List[Tensor], a list of ground-truth bounding box data, used only in fine-tune Returns: proposal features after ROI pooling and RPN loss ''' images, targets = self.faster_rcnn.transform(images, targets) # the base features produced by backbone network, i.e. resnet50 features = self.faster_rcnn.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) # proposals produced by RPN, i.e. the coordinates of bounding box # which contain foreground objects proposals, proposal_losses = self.faster_rcnn.rpn( images, features, targets) # get the corresponding features of the proposals produced by RPN and perform roi pooling box_features = self.faster_rcnn.roi_heads.box_roi_pool( features, proposals, images.image_sizes) # project the features to shape (batch_size, num_boxes, feature_dim) box_features = self.faster_rcnn.roi_heads.box_head(box_features) return box_features, proposal_losses
def model_creation(pretrain=True, num_classes=5, num_epoch=20, device="cuda:0"): model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) num_classes = 5 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) if device == 'cuda:0': device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device == "cpu": print("cuda is no available") model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # let's train it for 10 epochs return model, optimizer, lr_scheduler, num_epoch,
class FasterRCNNFood: def __init__(self, backbone_name: str, pretrained: bool = True, finetune: bool = True, num_classes: int = 2): self.__pretrained = pretrained self.__num_classes = num_classes self.__model_name = backbone_name backbone = build_backbone(backbone_name, pretrained, finetune) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = torch.optim.Adam(params=self.params, lr=0.005, weight_decay=0.0005) self.lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=3, gamma=0.1) def train(self, data_loader: DataLoader, data_loader_test: DataLoader, num_epochs: int = 10, use_cuda: bool = True, epoch_save_ckpt: Union[int, list] = None, dir: str = None): """ Method to train FasterRCNNFood model. Args: data_loader (torch.utils.data.DataLoader): data loader to train model on data_loader_test (torch.utils.data.DataLoader): data loader to evaluate model on num_epochs (int = 10): number of epoch to train model use_cuda (bool = True): use cuda or not epoch_save_ckpt (list or int): Epoch at which you want to save the model. If -1 save only last epoch. dir (str = "models/): Directory where model are saved under the name "{model_name}_{date}_ep{epoch}.pth" """ if epoch_save_ckpt == -1: epoch_save_ckpt = [num_epochs - 1] if not dir: dir = "models" dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # choose device if use_cuda and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") # define dataset self.model.to(device) writer = SummaryWriter() for epoch in range(num_epochs): # train for one epoch, printing every 50 iterations train_one_epoch(self.model, self.optimizer, data_loader, device, epoch, print_freq=50, writer=writer) # update the learning rate self.lr_scheduler.step() # evaluate on the test dataset evaluate(self.model, data_loader_test, device=device, writer=writer, epoch=epoch) # save checkpoint if epoch in epoch_save_ckpt: self.save_checkpoint(dir.as_posix(), epoch) writer.close() print("That's it!") def save_checkpoint(self, dir: str, epoch: int): """ Save a model checkpoint at a given epoch. Args: dir: dir folder to save the .pth file epoch: epoch the model is """ state = { 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'num_classes': self.__num_classes, 'pretrained': self.__pretrained, "model_name": self.__model_name } now = datetime.now() filename = "{model_name}_{date}_ep{epoch}.pth".format( model_name=self.__model_name, date=now.strftime("%b%d_%H-%M"), epoch=epoch) torch.save(state, Path(dir) / filename) "Checkpoint saved : {}".format(Path(dir) / filename) def predict(self, dataset, idx): img, _ = dataset[idx] img.to("cpu") self.model.eval() self.model.to("cpu") pred = self.model([img]) return img, pred[0] @staticmethod def load_checkpoint(filename: str, cuda: bool = True) -> ("FasterRCNNFood", int): """ Load a model checkpoint to continue training. Args: filename (str): filename/path of the checkpoint.pth cuda (bool = True): use cuda Returns: (FasterRCNNFood) model (int) number of epoch + 1 the model was trained with """ device = torch.device("cuda") if ( cuda and torch.cuda.is_available()) else torch.device("cpu") start_epoch = 0 if Path(filename).exists(): print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename, map_location=device) # Load params pretrained = checkpoint['pretrained'] num_classes = checkpoint["num_classes"] start_epoch = checkpoint['epoch'] model_name = checkpoint['model_name'] # Build model key/architecture model = FasterRCNNFood(model_name, pretrained, num_classes) # Update model and optimizer model.model.load_state_dict(checkpoint['state_dict']) model.optimizer.load_state_dict(checkpoint['optimizer']) model.model = model.model.to(device) # now individually transfer the optimizer parts... for state in model.optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(device) print("=> loaded checkpoint '{}' (epoch {})".format( filename, checkpoint['epoch'])) return model, start_epoch else: print("=> no checkpoint found at '{}'".format(filename)) @staticmethod def load_for_inference(filename: str, cuda: bool = True) -> "FasterRCNNFood": """ Load a model checkpoint to make inference. Args: filename (str): filename/path of the checkpoint.pth cuda (bool = True): use cuda Returns: (FasterRCNNFood) model """ device = torch.device("cuda") if ( cuda and torch.cuda.is_available()) else torch.device("cpu") if Path(filename).exists(): print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename, map_location=device) # Load params pretrained = checkpoint['pretrained'] num_classes = checkpoint["num_classes"] model_name = checkpoint['model_name'] # Build model key/architecture model = FasterRCNNFood(model_name, pretrained, num_classes) # Update model and optimizer model.model.load_state_dict(checkpoint['state_dict']) model.model = model.model.to(device) model.model = model.model.eval() print("=> loaded checkpoint '{}'".format(filename)) return model else: print("=> no checkpoint found at '{}'".format(filename))
# In[10]: device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') # our dataset has two classes only - background and person num_classes = 2 # get the model using our helper function model = get_instance_segmentation_model(num_classes) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler which decreases the learning rate by # 10x every 3 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # In[11]: # let's train it for 10 epochs num_epochs = 10
model_res34 = torchvision.models.resnet34(pretrained=False) network_helpers.copy_weights_between_models(sim_clr, model_res34) modules = list(model_res34.children())[:-1] backbone = nn.Sequential(*modules) backbone.out_channels = 512 model_fnn = FasterRCNN(backbone=backbone, num_classes=10) # ImageNet #in_features = model_fnn.roi_heads.box_predictor.cls_score.in_features #model_fnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=10) model_fnn.to(device) params_fnn = [p for p in model_fnn.parameters() if p.requires_grad] params_cnn_fnn = list(params_cnn) + list(params_fnn) optimizer = optim.SGD(params_cnn_fnn, lr=args.lr, momentum=0.6, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-3, max_lr=6e-3) # lr_scheduler = CosineAnnealingLR(optimizer, 50, eta_min=1e-3, last_epoch=-1) model_save_path = args.save_final_model_path
# Start to traning FASTER RCNN backbone = backboneNet_efficient() # use efficientnet as our backbone backboneFPN = backboneWithFPN(backbone) # add FPN anchor_generator = AnchorGenerator(cfg.anchor_sizes, cfg.aspect_ratios) model_ft = FasterRCNN(backboneFPN, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator, min_size=cfg.min_size, max_size=cfg.max_size) model_ft.to(device) optimizer_ft = optim.SGD(model_ft.parameters(), lr=cfg.learning_rate, momentum=cfg.momentum, weight_decay=cfg.weight_decay) lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=cfg.milestones, gamma=cfg.gamma) model_ft = train_model(model_ft, train_loader, valid_loader, optimizer_ft, lr_scheduler, num_epochs=cfg.epochs)
def main(): model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) num_classes = 2 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # Train classifier device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') dataset = OysterDataset( "/Users/darwin/Downloads/Bay project/Training", "/Users/darwin/Downloads/Bay project/Training/TrainingDetectionLabels2.csv", get_transform(train=True)) dataset_test = OysterDataset( "/Users/darwin/Downloads/Bay project/Training", "/Users/darwin/Downloads/Bay project/Training/TrainingDetectionLabels2.csv", get_transform(train=False)) indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:100]) dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:]) data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) model = get_model_instance_segmentation(num_classes) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.8, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) num_epochs = 10 for epoch in range(num_epochs): train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10) lr_scheduler.step() evaluate(model, data_loader_test, device=device) print("That's it!")
backbone = torch.nn.Sequential(net.conv1,net.bn1,net.relu,net.maxpool, net.layer1,net.layer2,net.layer3,net.layer4) backbone.out_channels = 512 anchor_generator = AnchorGenerator(sizes=((32,64,128,256,512)), aspect_ratios=((0.5,1.0,2.0))) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=91, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model = model.cuda() criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.9,weight_decay=5e-4) solver = Solver(model,"./models/rcnn_4.pth",trainLoader,valLoader,criterion,optimizer, logfile="./logs/rcnn_resnet18.log", print_freq=20,save_name="rcnn") solver.train(4)
class FasterRCNNMODEL: #TODO: Later on enable passing params params def __init__(self, model_params=None): self.params = model_params self.model = None self.optimizer = None self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') def set_backbone(self, backbone): """ backbone is a string containing the backbone we want to use in the model. add more options """ if 'vgg' in backbone.lower(): "to somthing-check for options" elif 'mobilenet_v2' in backbone.lower(): self.backbone = torchvision.models.mobilenet_v2( pretrained=True).features self.backbone.out_channels = 1280 elif 'resnet50' in backbone.lower(): self.backbone = torchvision.models.resnet50( pretrained=True).features self.backbone.out_channels = 256 def set_model(self): """ Set model and determine configuration :return: None, generate self.model to be used for training and testing """ # Default values: box_score_thresh = 0.05, box_nms_thresh = 0.5 kwargs = { 'box_score_thresh': 0.3, 'box_nms_thresh': 0.3, 'box_detections_per_img': 6 } # self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, # pretrained_backbone=True, # **kwargs) self.model = FasterRCNN(self.backbone, num_classes=7, **kwargs) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes = 7 in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # Allow Multiple GPUs: # if torch.cuda.device_count() > 1: # self.model = nn.DataParallel(self.model) self.model = self.model.to(device) if self.params is None: params = [p for p in self.model.parameters() if p.requires_grad] else: # TODO: Enable user defined model params pass self.optimizer = torch.optim.SGD(params, lr=0.01) def train_model(self, train_loader, num_epochs): """ Train (only!) of the model :param train_loader: DataLoader object :param num_epochs: int. Number of epochs to train the model :return: None, """ self.model.train() # Set to training mode for epoch in range(num_epochs): for images, targets in train_loader: images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero Gradients self.optimizer.zero_grad() # self.model = self.model.double() # Calculate Loss loss_dict = self.model(images, targets) # what happens here? losses = sum(loss for loss in loss_dict.values()) losses.backward() # Update weights self.optimizer.step() print('Train Loss = {:.4f}'.format(losses.item())) def train_eval_model(self, train_loader, val_loader, num_epochs): """ Train model and evaluate performance after each epoch :param train_loader: DataLoader object. Training images and targets :param val_loader: DataLoader object. validation images and targets :param num_epochs: int. Number of epochs for training and validation :return: """ # For evaluation imgs_name_list = [] bbox_list = [] labels_list = [] for epoch in range(num_epochs): train_loss = 0 val_loss = 0 self.model.train() # Set to training mode with torch.set_grad_enabled(True): for images, targets in train_loader: # Pass data to GPU images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero Gradients self.optimizer.zero_grad() # self.model = self.model.double() # Calculate Loss loss_dict = self.model(images, targets) # what happens here? losses = sum(loss for loss in loss_dict.values()) train_loss += losses.item() * len(images) # Backward Prop & Update weights losses.backward() self.optimizer.step() print('Train Loss = {:.4f}'.format(train_loss / len(train_loader.dataset))) # TODO: Calculate Dice and IoU loss for it with torch.no_grad(): for idx, (imgs_name, images, targets) in enumerate(val_loader): self.model.train() images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] loss_dict = self.model(images, targets) losses = sum(loss for loss in loss_dict.values()) val_loss += losses.item() * len(images) if epoch == num_epochs - 1: self.model.eval() # Set model to evaluate performance targets = self.model(images) # Think of moving all this into gen_out_file - Looks nicer imgs_name_list.extend(imgs_name) bbox_list.extend([ target['boxes'].int().cpu().tolist() for target in targets ]) labels_list.extend([ target['labels'].int().cpu().tolist() for target in targets ]) """Optional - SEE the performance on the second last batch""" if (epoch == num_epochs - 1) and idx == (len(val_loader) - 2): self.model.eval() # Set model to evaluate performance targets = self.model(images) MiscUtils.view(images, targets, k=len(images), model_type='faster_rcnn') DataUtils.gen_out_file('output_file.txt', imgs_name_list, bbox_list, labels_list) print('Validation Loss = {:.4f}'.format( val_loss / len(val_loader.dataset)))
def main(network): # train on the GPU or on the CPU, if a GPU is not available device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # our dataset has two classes only - background and person num_classes = 2 #dataset = torch.utils.data.Subset(TBdata,[range(len(TBdata))]) indices = torch.randperm(len(TBdata)).tolist() dataset = torch.utils.data.Subset(TBdata, indices[:]) indices_ = torch.randperm(len(TBdata_test)).tolist() dataset_val = torch.utils.data.Subset(TBdata_test, indices_[:]) # get the model using our helper function #model = get_model_instance_segmentation(num_classes) dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, sampler=None, num_workers=0, collate_fn=collate_fn) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=8, sampler=None, num_workers=0, collate_fn=collate_fn) #Calculated statistics on training data: #Transform parameters min_size = 550 max_size = 700 image_means = [0.9492, 0.9492, 0.9492] image_stds = [0.1158, 0.1158, 0.1158] if network == 'resnet50': backbone = resnet_fpn_backbone('resnet50', True) model = FasterRCNN(backbone, num_classes, min_size=min_size, max_size=max_size, image_mean=image_means, image_std=image_stds) elif network == 'resnet18': backbone = resnet_fpn_backbone('resnet18', True) model = FasterRCNN(backbone, num_classes, min_size=min_size, max_size=max_size, image_mean=image_means, image_std=image_stds) elif network == 'resnet152': backbone = resnet_fpn_backbone('resnet152', True) model = FasterRCNN(backbone, num_classes, min_size=min_size, max_size=max_size, image_mean=image_means, image_std=image_stds) elif network == 'RPNresnet50': backbone = resnet_fpn_backbone('resnet50', True) model = RPN_custom(backbone, num_classes, min_size=min_size, max_size=max_size, image_mean=image_means, image_std=image_stds) elif network == 'RPNresnet152': backbone = resnet_fpn_backbone('resnet152', True) model = RPN_custom(backbone, num_classes, min_size=min_size, max_size=max_size, image_mean=image_means, image_std=image_stds) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) num_epochs = 10 Ls = { 'total loss': [], 'loss_classifier': [], 'loss_box_reg': [], 'loss_objectness': [], 'loss_rpn_box_reg': [] } Ls_val = { 'total loss': [], 'loss_classifier': [], 'loss_box_reg': [], 'loss_objectness': [], 'loss_rpn_box_reg': [] } for epoch in range(num_epochs): # train for one epoch, printing every 10 iterations train_one_epoch(model, optimizer, dataloader, device, epoch, print_freq=10) # update the learning rate lr_scheduler.step() # evaluate on the test dataset #evaluate(model, dataloader_test, device=device) Ls_val = record_losses(model, dataloader_val, device, Ls_val, network) #record losses Ls = record_losses(model, dataloader, device, Ls, network) #If folder does not exist already, create it output_loc = f'./{network}/' if not os.path.exists(output_loc): os.makedirs(output_loc) torch.save(model.state_dict(), output_loc + 'model.pt') print("That's it!") return Ls, Ls_val, num_epochs
self.targets.append(d) def __len__(self): return len(self.txt_fld) def __getitem__(self, idx): # w,h = self.images[idx] return self.images[idx], self.targets[idx] root_url = '/home/dung/DocData/cp/145' dataset = DocDataset(root_url) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=True, num_workers=0) # criterion = torch.nn.MSELoss(reduction='sum') optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) for i in range(1000): print('Epoch {}\n'.format(i)) for j, (images, targets) in enumerate(data_loader): images = images.to(device) a = {} a['boxes'] = targets['boxes'][0].to(device) a['labels'] = targets['labels'][0].to(device) output = model(images, [a]) losses = sum(loss for loss in output.values()) if j % 30 == 0: print('Step {} -- loss_classifier = {} -- loss_box_reg = {} -- loss_objectness = {} -- loss_rpn_box_reg = {}\n'.format(j, output['loss_classifier'].item(), output['loss_box_reg'].item(), output['loss_objectness'].item(), output['loss_rpn_box_reg'].item())) optimizer.zero_grad()
def main(args): detection_util.init_distributed_mode(args) print(args) device = torch.device(args.device) torch.multiprocessing.set_sharing_strategy('file_system') #train loader mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1994, 0.2010) normalize = transforms.Normalize(mean=mean, std=std) train_transform = transforms.Compose([transforms.ToTensor(), normalize]) train_dataset = SVHNFull(root='./datasets/SVHN_full', transform=train_transform, download=False) test_dataset = SVHNFull(root='./datasets/SVHN_full', split='test', transform=train_transform, download=False) if args.distributed: train_sampler = data.distributed.DistributedSampler(train_dataset) test_sampler = data.distributed.DistributedSampler(test_dataset) else: train_sampler = None test_sampler = None print(args.batch_size) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=train_dataset.collate_fn) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=(test_sampler is None), num_workers=args.workers, pin_memory=True, sampler=test_sampler, collate_fn=test_dataset.collate_fn) pre_mod = ResNetDetection(name='resnet18') #pre_mod = SupResNetDetection(name='resnet18') if args.pretrained: ckpt = torch.load(args.pretrained, map_location='cpu') state_dict = ckpt['model'] new_state_dict = {} for k, v in state_dict.items(): k = k.replace("module.", "") new_state_dict[k] = v state_dict = new_state_dict pre_mod.load_state_dict(state_dict) backbone = pre_mod.encoder backbone.out_channels = 512 anchor_generator = AnchorGenerator(sizes=((16, 32, 128), ), aspect_ratios=((0.5, 1.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=3, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=10, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, rpn_batch_size_per_image=64) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train(model, optimizer, train_loader, device, epoch, args.print_freq) lr_scheduler.step() #if epoch > 15: evaluate(model, test_loader, device=device) detection_util.save_on_master( { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch }, os.path.join('save/detector/', 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main(): parser = argparse.ArgumentParser(description='VISUM 2019 competition - baseline training script', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-d', '--data_path', default='/home/master/dataset/train', metavar='', help='data directory path') parser.add_argument('-m', '--model_path', default='./baseline.pth', metavar='', help='model file (output of training)') parser.add_argument('--epochs', default=50, type=int, metavar='', help='number of epochs') parser.add_argument('--lr', default=0.005, type=float, metavar='', help='learning rate') parser.add_argument('--l2', default=0.0005, type=float, metavar='', help='L-2 regularization') args = vars(parser.parse_args()) # Data augmentation def get_transform(train): transforms = [] # converts the image, a PIL image, into a PyTorch Tensor transforms.append(T.ToTensor()) if train: # during training, randomly flip the training images # and ground-truth for data augmentation transforms.append(T.RandomHorizontalFlip(0.5)) transforms.append(torchvision.transforms.ColorJitter(contrast=.3)) transforms.append(torchvision.transforms.ColorJitter(brightness=.4)) return T.Compose(transforms) backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=10, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # See the model architecture print(model) # use our dataset and defined transformations dataset = VisumData(args['data_path'], modality='rgb', transforms=get_transform(train=True)) dataset_val = VisumData(args['data_path'], modality='rgb', transforms=get_transform(train=False)) # split the dataset in train and test set torch.manual_seed(1) indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:-100]) dataset_val = torch.utils.data.Subset(dataset_val, indices[-100:]) # define training and validation data loaders data_loader = torch.utils.data.DataLoader( dataset, batch_size=2, shuffle=True, num_workers=0, collate_fn=utils.collate_fn) data_loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=2, shuffle=False, num_workers=0, collate_fn=utils.collate_fn) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args['lr'], momentum=0.9, weight_decay=args['l2']) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) for epoch in range(args['epochs']): # train for one epoch, printing every 10 iterations epoch_loss = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=nvid) # update the learning rate lr_scheduler.step() # evaluate on the test dataset evaluator = evaluate(model, data_loader_val, device=device) torch.save(model, args['model_path'])
class FasterRCNNResnet50FPN(LightningModule): def __init__(self, conf=None, *args, **kwargs): super().__init__() self.hparams = conf resnest = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True) backbone = Sequential(*list(resnest.children())[:-3]) backbone.out_channels = 1024 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=[7, 7], sampling_ratio=2) self.model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) def forward(self, x): return self.model(x) def configure_optimizers(self): scheduler = None params = [p for p in self.model.parameters() if p.requires_grad] optimizer = RangerLars(params) # noinspection PyUnresolvedReferences if self.hparams.Train.scheduler == 'OneCycleLR': scheduler = OneCycleLR( optimizer, max_lr=self.hparams.Train.lr, epochs=self.hparams.Train.epochs, steps_per_epoch=self.hparams.Train.steps_per_epoch, pct_start=self.hparams.Train.Schedulers.OneCycleLR.pct_start, anneal_strategy=self.hparams.Train.Schedulers.OneCycleLR. anneal_strategy, cycle_momentum=False, div_factor=self.hparams.Train.Schedulers.OneCycleLR.div_factor) elif self.hparams.Train.scheduler == 'NCycleLR': scheduler = NCycleLR( optimizer, max_lr=self.hparams.Train.lr, n=self.hparams.Train.Schedulers.NCycleLR.n, lr_factor=self.hparams.Train.Schedulers.NCycleLR.lr_factor, epochs=self.hparams.Train.epochs, steps_per_cycle=self.hparams.Train.Schedulers.NCycleLR. steps_per_cycle, pct_start=self.hparams.Train.Schedulers.NCycleLR.pct_start, anneal_strategy=self.hparams.Train.Schedulers.NCycleLR. anneal_strategy, cycle_momentum=False, div_factor=self.hparams.Train.Schedulers.NCycleLR.div_factor) elif self.hparams.Train.scheduler == 'CyclicLR': scheduler = CyclicLR( optimizer, base_lr=self.hparams.Train.lr / 1e5, max_lr=self.hparams.Train.lr, step_size_up=self.hparams.Train.steps_per_epoch, mode=self.hparams.Train.Schedulers.CyclicLR.mode, gamma=self.hparams.Train.Schedulers.CyclicLR.gamma, cycle_momentum=False) elif self.hparams.Train.scheduler == 'ReduceLROnPlateau': scheduler = ReduceLROnPlateau( optimizer, factor=self.hparams.Train.Schedulers.ReduceLROnPlateau.factor, patience=self.hparams.Train.Schedulers.ReduceLROnPlateau. patience, verbose=True) schedulers = [{ 'scheduler': scheduler, 'interval': self.hparams.Train.Schedulers.interval }] return [optimizer], schedulers def training_step(self, batch, batch_idx): images, targets, _ = batch targets = [{k: v for k, v in t.items()} for t in targets] # FasterRCNN model returns dict with classification and regression loss loss_dict = self.model(images, targets) total_loss = sum(loss for loss in loss_dict.values()) lr = self.trainer.lr_schedulers[0]['scheduler'].optimizer.param_groups[ 0]['lr'] return { 'loss': total_loss, 'log': loss_dict, 'progress_bar': { 'lr': lr } } def validation_step(self, batch, batch_idx): images, targets, _ = batch targets = [{k: v for k, v in t.items()} for t in targets] outputs = self.model(images, targets) precisions = np.zeros((self.hparams.Train.batch_size, )) for i, output in enumerate(outputs): scores = torch.argsort(output['scores'], descending=True) boxes = output['boxes'][scores].data.cpu().numpy().astype(np.int32) gt = targets[i]['boxes'].data.cpu().numpy().astype(np.int32) precision = calculate_image_precision(gt, boxes) precisions[i] = precision return { 'val_loss': torch.tensor([1 - precisions], dtype=torch.float32, device='cuda'), 'val_map': torch.tensor([precisions], dtype=torch.float32, device='cuda') } def validation_epoch_end(self, outputs): avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() avg_precision = torch.stack([x['val_map'] for x in outputs]).mean() logs = {'val_loss': avg_loss, 'val_map': avg_precision} return {'avg_val_loss': avg_loss, 'log': logs, 'progress_bar': logs} def test_step(self, batch, batch_idx): if batch_idx == 0: images, targets, _ = batch targets = [{k: v for k, v in t.items()} for t in targets] outputs = self.model(images, targets) for i, output in enumerate(outputs): image = images[i].permute(1, 2, 0).cpu().numpy() scores = torch.argsort(output['scores'], descending=True) boxes = output['boxes'][scores].data.cpu().numpy().astype( np.int32) gt = targets[i]['boxes'].data.cpu().numpy().astype(np.int32) results = get_image_with_results(image, boxes, gt) self.logger.experiment.add_image( f"bb_test(RED: Predicted; BLUE: Ground-truth)/image{i}", results, dataformats='HWC') return self.validation_step(batch, batch_idx) def test_epoch_end(self, outputs): test_results = self.validation_epoch_end(outputs) test_results = { k.replace('val', 'test'): v.cpu().numpy().tolist() for k, v in test_results['log'].items() } return {'log': test_results}
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.Resize(300), transforms.CenterCrop(300) ]) dataset = CocoDetection('/datasets/Coco/train2017', '/datasets/Coco/annotations/instances_train2017.json', transform=myTransform) data = dataset[0] x, y = data print(x.shape) print(y[0].keys()) dataloader = DataLoader(dataset, batch_size=32) validset = CocoDetection('/datasets/Coco/val2017', '/datasets/Coco/annotations/instances_val2017.json') validloader = DataLoader(validset, batch_size=32) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) def train_one_epoch(dataloader): for i, data in dataloader: x, y = data print(x.shape) print(y.shape) if __name__ == '__main__': train_one_epoch(dataloader)
BATCH_SIZE = 4 EPOCH = 300 THRESH = 0.9 IOU = 0.3 CLASSES = 3 DEVICE = torch.device("cuda") anchor_generator = AnchorGenerator(sizes=((32, 64), ), aspect_ratios=((0.6, 1.0, 1.6), )) backbone = torchvision.models.vgg19(pretrained=False).features backbone.out_channels = 512 model = FasterRCNN(backbone, num_classes=CLASSES, rpn_anchor_generator=anchor_generator) model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=1e-5) train_dataset = Dataset() test_dataset = Dataset(training=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False) for epoch in range(EPOCH): for index, (data, target) in enumerate(train_loader): model.train() data = data.to(DEVICE)
class VAEFasterRCNN(ObjectDetectionModel): def __init__(self, hparams): super(VAEFasterRCNN, self).__init__(hparams) self.resnet_style = hparams.resnet_style self.threshold = hparams.threshold self.vae = vae(resnet_style=self.resnet_style, pretrained=False) #self.farcnn = fasterrcnn_resnet50_fpn(pretrained_backbone=False, num_classes=1) resnet_net = torchvision.models.resnet18(pretrained=False) modules = list(resnet_net.children())[:-2] backbone = nn.Sequential(*modules) backbone.out_channels = 512 self.farcnn = FasterRCNN(backbone=backbone, num_classes=1) #https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn self.vae.load_state_dict( torch.load("submission2_object_detection_state_dict.pt")) self.criterion = self.loss_function def loss_function(self, pred_maps, road_images, mu, logvar): criterion = nn.BCELoss() CE = criterion(pred_maps.squeeze(), road_images.float().squeeze()) KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) return 0.9 * CE + 0.1 * KLD, CE, KLD def process_batch(self, batch): samples, targets, tar_sems, road_images = super( VAEFasterRCNN, self).process_batch(batch) targets_farcnn = [] for i in range(len(samples)): d = {} bbs = targets[i]["bounding_box"] gt_boxes = torch.zeros((bbs.shape[0], 4)).to(bbs.device) for j, bb in enumerate(bbs): gt_boxes[j] = torch.FloatTensor([ bb[0].min().item() * 10 + 400, -(bb[1].max().item() * 10) + 400, bb[0].max().item() * 10 + 400, -(bb[1].min().item() * 10) + 400 ]) #logging.info("gt_box: ", gt_boxes[j]) d['boxes'] = gt_boxes d['labels'] = torch.zeros((bbs.shape[0]), dtype=torch.int64).to(bbs.device) targets_farcnn.append(d) return samples, targets, tar_sems, road_images, targets_farcnn def training_step(self, batch, batch_idx): samples, targets, tar_sems, road_images, targets_farcnn = self.process_batch( batch) pred_maps, mu, logvar, farcnn_loss = self.forward( samples, targets_farcnn) train_loss, CE, KLD = self.criterion(pred_maps, tar_sems, mu, logvar) f_loss = 0 for key, value in farcnn_loss.items(): f_loss += value.item() train_loss += f_loss self.logger.log_metrics( { "train_loss": train_loss / len(samples), "train_CE": CE / len(samples), "train_KLD": KLD / len(samples), "train_farcnn_loss": f_loss }, self.global_step) return {"loss": train_loss, "n": len(samples)} def forward(self, imgs, gt_boxes=None): pred_maps, mu, logvar = self.vae(imgs) if self.training: rcn_out = self.farcnn( (pred_maps.unsqueeze(1) > self.threshold).float(), gt_boxes) else: rcn_out = self.farcnn( (pred_maps.unsqueeze(1) > self.threshold).float()) return pred_maps, mu, logvar, rcn_out def training_epoch_end(self, outputs): avg_training_loss = 0 n = 0 for out in outputs: avg_training_loss += out["loss"] n += out["n"] avg_training_loss /= n return {"log": {"avg_train_loss": avg_training_loss}} def validation_step(self, batch, batch_idx): samples, targets, tar_sems, road_images, targets_farcnn = self.process_batch( batch) pred_maps, mu, logvar, pred_boxes = self.forward(samples) threat_score = self.get_threat_score(pred_boxes, targets) return {"val_ts": threat_score, "n": len(samples)} def validation_epoch_end(self, outputs): avg_val_ts = 0 n = 0 for out in outputs: avg_val_ts += out["val_ts"] n += out["n"] if n > 0: avg_val_ts /= n else: avg_val_ts = 0 return { "val_ts": avg_val_ts, "log": { "avg_val_ts": avg_val_ts }, "progress_bar": { "avg_val_ts": avg_val_ts } } def get_threat_score(self, pred_boxes, targets): threat_score = 0 for preds, target in zip(pred_boxes, targets): bbs_pred = preds["boxes"] if bbs_pred.shape[0] > 0: actual_boxes = torch.zeros((bbs_pred.shape[0], 2, 4)) for i, bb in enumerate(bbs_pred): x_min, x_max = (bb[0] - 400) / 10, (bb[2] - 400) / 10 y_min, y_max = -(bb[1] - 400) / 10, -(bb[3] - 400) / 10 actual_boxes[i] = torch.FloatTensor( [[x_max, x_max, x_min, x_min], y_max, y_min, y_max, y_min]) logging.info("Predicted boxes:", actual_boxes) logging.info("True boxes: ", target["bounding_box"]) else: actual_boxes = torch.zeros((1, 2, 4)) ts_road_map = compute_ats_bounding_boxes( actual_boxes.cpu(), target["bounding_box"].cpu()) threat_score += ts_road_map return threat_score def configure_optimizers(self): optimizer = optim.Adam(list(self.vae.parameters()) + list(self.farcnn.parameters()), lr=self.learning_rate, weight_decay=self.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.97) return [optimizer], [scheduler]