def __init__(self, weight_path, resume, gpu_id, accumulate, fp_16): init_seeds(0) self.fp_16 = fp_16 self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0. self.accumulate = accumulate self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] self.train_dataset = data.Build_Dataset(anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) print('train img size is {}'.format(cfg.TRAIN["TRAIN_IMG_SIZE"])) self.train_dataloader = DataLoader(self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True, pin_memory=True ) self.yolov4 = Build_Model().to(self.device) self.optimizer = optim.SGD(self.yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) self.criterion = YoloV4Loss(anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) self.__load_model_weights(weight_path, resume) self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, T_max=self.epochs*len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"]*len(self.train_dataloader))
def __init__(self, weight_path=None, resume=False, gpu_id=0, accumulate=1, fp_16=False): init_seeds(0) self.fp_16 = fp_16 self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0.0 self.accumulate = accumulate self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] self.showatt = cfg.TRAIN["showatt"] if self.multi_scale_train: print("Using multi scales training") else: print("train img size is {}".format(cfg.TRAIN["TRAIN_IMG_SIZE"])) self.train_dataset = data.Build_Dataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.epochs = (cfg.TRAIN["YOLO_EPOCHS"] if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else cfg.TRAIN["Mobilenet_YOLO_EPOCHS"]) self.eval_epoch = (30 if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else 50) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True, pin_memory=True, ) self.yolov4 = Build_Model(weight_path=weight_path, resume=resume, showatt=self.showatt).to(self.device) self.optimizer = optim.SGD( self.yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"], ) self.criterion = YoloV4Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"], ) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader), ) if resume: self.__load_resume_weights(weight_path)
def __init__(self, weight_path, resume, gpu_id, vis, mode=None): init_seeds(0) self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0. self.epochs = cfg.epoch = 100 self.weight_path = weight_path self.resume = resume self.mode = mode self.multi_scale_train = cfg.MULTI_SCALE_TRAIN print('Loading Datasets...') self.train_dataset = PASCALVOC(img_size=cfg.img_size, root=cfg.root, image_sets=cfg.train_sets, phase='trainval', mean=cfg.means, std=cfg.std) self.val_dataset = PASCALVOC(img_size=cfg.img_size, root=cfg.root, image_sets=cfg.test_sets, phase='test', mean=cfg.means, std=cfg.std) self.train_dataloader = DataLoader(self.train_dataset, batch_size=cfg.batch_size, num_workers=cfg.workers, collate_fn=detection_collate, shuffle=True) if vis: ViewDatasets(self.train_dataloader) self.SSD = SSD(num_classes=cfg.num_classes, num_blocks=cfg.mbox, top_k=cfg.top_k, conf_thresh=cfg.conf_thresh, nms_thresh=cfg.nms_thresh, variance=cfg.variance).to(self.device) self.optimizer = optim.SGD(self.SSD.parameters(), lr=cfg.init_lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) self.criterion = SSD_loss(num_classes=cfg.num_classes, variances=cfg.variance, device=self.device) self.__load_model_weights(self.weight_path, self.resume, self.mode) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.init_lr, lr_min=cfg.end_lr, warmup=cfg.warmup_epoch * len(self.train_dataloader))
def __init__(self, log_dir, resume=False, fine_tune=False): init_seeds(0) if fine_tune: self.__prepare_fine_tune() self.fp_16 = cfg.FP16 self.device = gpu.select_device() self.start_epoch = 0 self.best_mAP = 0. self.accumulate = cfg.TRAIN.ACCUMULATE self.log_dir = log_dir self.weight_path = "/content/drive/MyDrive/YOLO/weights/yolov4.weights" self.multi_scale_train = cfg.TRAIN.MULTI_SCALE_TRAIN if self.multi_scale_train: print('Using multi scales training') else: print('train img size is {}'.format(cfg.TRAIN.TRAIN_IMG_SIZE)) self.train_dataset = data.Build_Train_Dataset( anno_file=cfg.TRAIN.ANNO_FILE, anno_file_type="train", img_size=cfg.TRAIN.TRAIN_IMG_SIZE) self.epochs = cfg.TRAIN.YOLO_EPOCHS if cfg.MODEL.MODEL_TYPE == 'YOLOv4' else cfg.TRAIN.Mobilenet_YOLO_EPOCHS self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.ACCUMULATE, num_workers=cfg.TRAIN.NUMBER_WORKERS, shuffle=True, pin_memory=True) self.yolov4 = Build_Model( weight_path="/content/drive/MyDrive/YOLO/weights/yolov4.weights", resume=resume) self.yolov4 = self.yolov4.to(self.device) self.optimizer = optim.SGD(self.yolov4.parameters(), lr=cfg.TRAIN.LR_INIT, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) self.criterion = YoloV4Loss( anchors=cfg.MODEL.ANCHORS, strides=cfg.MODEL.STRIDES, iou_threshold_loss=cfg.TRAIN.IOU_THRESHOLD_LOSS) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN.LR_INIT, lr_min=cfg.TRAIN.LR_END, warmup=cfg.TRAIN.WARMUP_EPOCHS * len(self.train_dataloader)) if resume: self.__load_resume_weights() if fine_tune: self.__load_best_weights()
def __init__(self, cfg, train_dir, valid_dir, weight_path, resume, gpu_id): init_seeds(0) self.cfg = cfg self.train_dir = train_dir self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0. self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.today = datetime.datetime.today().strftime('%y%m%d') num_weights = len(glob.glob(f'./weight/{self.today}_*')) self.save_weight_dir = f'./weight/{self.today}_{num_weights+1}' os.mkdir(self.save_weight_dir) self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] # self.train_dataset = VocDataset(cfg=cfg, anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.train_dataset = YoloDataset(cfg=cfg, data_dir=self.train_dir, img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.train_dataloader = DataLoader(self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True) self.yolov3 = Yolov3(cfg=cfg).to(self.device) self.optimizer = optim.SGD(self.yolov3.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) #self.optimizer = optim.Adam(self.yolov3.parameters(), lr = lr_init, weight_decay=0.9995) self.criterion = YoloV3Loss(anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) self.__load_model_weights(weight_path, resume) self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, T_max=self.epochs*len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"]*len(self.train_dataloader)) self.valid_dir = valid_dir self.img_valid_dir = os.path.join(self.valid_dir, 'images') self.anno_valid_dir = os.path.join(self.valid_dir, 'labels') self.valid_dataset = YoloDataset(cfg=cfg, data_dir=self.valid_dir, img_size=cfg.EVAL["TEST_IMG_SIZE"]) self.valid_dataloader = DataLoader(self.valid_dataset, batch_size=cfg.EVAL["BATCH_SIZE"], num_workers=cfg.EVAL["NUMBER_WORKERS"], shuffle=False) self.evaluator = YoloEvaluator(self.yolov3, self.cfg, self.img_valid_dir, "eval", anno_dir=self.anno_valid_dir)
def __init__(self, weight_path, resume, gpu_id): init_seeds(0) self.device = gpu.select_device(gpu_id) print(self.device) self.start_epoch = 0 self.best_mAP = 0. self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] if self.multi_scale_train: print('Using multi scales training') else: print('train img size is {}'.format(cfg.TRAIN["TRAIN_IMG_SIZE"])) self.train_dataset = data.Construct_Dataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True) net_model = NPMMRDet() if torch.cuda.device_count() > 1: ## multi GPUs print("Let's use", torch.cuda.device_count(), "GPUs!") net_model = torch.nn.DataParallel(net_model) self.model = net_model.to(self.device) elif torch.cuda.device_count() == 1: self.model = net_model.to(self.device) ## Single GPU self.optimizer = optim.SGD(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) self.criterion = Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) self.__load_model_weights(weight_path, resume) #self.__save_model_weights_best(160) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader))
def __init__(self, model, weight_path, resume, gpu_id): init_seeds(0) self.model = model self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0. self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] self.train_dataset = data.VocDataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True) if self.model == 's': self.yolov3 = Yolov3_S().to(self.device) self.model_postfix = '_s' elif self.model == 'l': self.yolov3 = Yolov3_L().to(self.device) self.model_postfix = '_l' else: self.yolov3 = Yolov3().to(self.device) self.model_postfix = '_m' # self.yolov3.apply(tools.weights_init_normal) self.optimizer = optim.SGD(self.yolov3.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) #self.optimizer = optim.Adam(self.yolov3.parameters(), lr=cfg.TRAIN["LR_INIT"], weight_decay=0.9995) self.criterion = YoloV3Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) self.__load_model_weights(weight_path, resume) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader))
init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错 model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level ###### model.nc = nc #### 阶梯学习率 #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1) ### 余弦学习率 # lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2 # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler = cosine_lr_scheduler.CosineDecayLR(optimizer, T_max=total_epochs * len(dataloader), lr_init=lr0, lr_min=lr0 * 0.01, warmup=2 * len(dataloader)) # 4、训练 print('') # 换行 print('Starting training for %g epochs...' % total_epochs) nb = len(dataloader) mloss = torch.zeros(4).to(device) # mean losses writer = SummaryWriter() # tensorboard --logdir=runs, view at http://localhost:6006/ prebias = start_epoch == 0 for epoch in range(start_epoch, total_epochs): # epoch ------------------------------ model.train() # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval()
def __init__(self, weight_path=None, resume: bool = False, gpu_id: int = 0, accumulate: bool = True, fp_16: bool = False): # PYTHON HASH SEED init_seeds(0) # device self.fp_16: bool = fp_16 self.device: torch.device = gpu.select_device(gpu_id) self.start_epoch: int = 0 self.best_mAP: float = 0.0 # not sure why this is necessary... self.accumulate: bool = accumulate self.weight_path: Path = weight_path self.multi_scale_train: bool = cfg.TRAIN["MULTI_SCALE_TRAIN"] # Show attention modification? self.showatt = cfg.TRAIN["showatt"] # Multi-scale training status if self.multi_scale_train: print("Using multi scales training") else: print(f"train img size is {cfg.TRAIN['TRAIN_IMG_SIZE']}") # Build Dataset using helper function. self.train_dataset = data.Build_Dataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.epochs = (cfg.TRAIN["YOLO_EPOCHS"] if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else cfg.TRAIN["Mobilenet_YOLO_EPOCHS"]) self.eval_epoch = (30 if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else 50) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True, pin_memory=True, ) self.yolov4 = Build_Model(weight_path=weight_path, resume=resume, showatt=self.showatt).to(self.device) self.optimizer = optim.SGD( self.yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"], ) self.criterion = YoloV4Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"], ) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader), ) if resume: self.__load_resume_weights(weight_path)
yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"], ) criterion = YoloV4Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"], ) scheduler = cosine_lr_scheduler.CosineDecayLR( optimizer, T_max=epochs * len(train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(train_dataloader), ) # Training for epoch in range(start_epoch, epochs): yolov4.train() mloss = torch.zeros(4) for i, data in enumerate(train_dataloader): scheduler.step( len(train_dataloader) / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) imgs = data[0] label_sbbox = data[1]