def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=2e-3) arg('--workers', type=int, default=4) arg('--epochs', type=int, default=5) arg('--mixup-alpha', type=float, default=0) arg('--cutmix-alpha', type=float, default=0) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--radam', action='store_true') arg('--run-name', type=str, default='') arg('--lookahead-k', type=int, default=-1) arg('--lookahead-alpha', type=float, default=0.5) arg('--from-checkpoint', type=str, default='') arg('--find-lr', action='store_true') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() model = get_model(args.arch) if use_cuda: model = model.cuda() criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss()) (CACHE_DIR / 'params.json').write_text( json.dumps(vars(args), indent=4, sort_keys=True)) df_train, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) train_transform = get_train_transform(int(args.size * 1.25), args.size) test_transform = get_test_transform(int(args.size * 1.25), args.size) train_loader = make_loader(args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(train_loader.dataset):,} items in train, ' f'{len(valid_loader.dataset):,} in valid') if args.find_lr: find_lr(args, model, train_loader, criterion) else: if args.from_checkpoint: resume_training(args, model, train_loader, valid_loader) else: train_from_scratch(args, model, train_loader, valid_loader, criterion)
def initialize_datasets(self): self._dataset_train = FieldsDataset(self.train_folder, transforms.get_train_transform()) self._dataset_valid = FieldsDataset(self.valid_folder, transforms.get_test_transform()) lgblkb_tools.logger.info( f"Length of Training dataset: {len(self._dataset_train)}") lgblkb_tools.logger.info( f"Length of Validation dataset: {len(self._dataset_valid)}") if self.test_folder is not None: self._dataset_test = FieldsDataset(self.test_folder, transforms.get_test_transform()) lgblkb_tools.logger.info( f"Length of Testing dataset: {len(self._dataset_test)}")
def main(): # load and split data print('>>> splitting data into validation and training datasets') annotations = '/Users/Patrick/workspace/wheat/data/global-wheat-detection/train.csv' images_train_dir = '/Users/Patrick/workspace/wheat/data/global-wheat-detection/train' bbox_df = pd.read_csv(annotations) bbox_df_train, bbox_df_val = pre_process(bbox_df) # create pytorch train and validate datasets train_dataset = WheatDataset(bbox_df_train, images_train_dir, get_train_transform()) valid_dataset = WheatDataset(bbox_df_val, images_train_dir, get_valid_transform()) train_data_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) valid_data_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) # load a model; pre-trained on COCO print('>>> loading model') device = torch.device('cpu') model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler = None num_epochs = 2 itr = 1 print('>>> begin training') for epoch in range(num_epochs): for images, targets, image_ids in train_data_loader: images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) loss_value = losses.item() optimizer.zero_grad() losses.backward() optimizer.step() if itr % 50 == 0: print(f"Iteration #{itr} loss: {loss_value}") itr += 1 # update the learning rate if lr_scheduler is not None: lr_scheduler.step() torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn.pth') print('>>> saved model')
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=2e-3) arg('--workers', type=int, default=4) arg('--epochs', type=int, default=5) arg('--mixup-alpha', type=float, default=0) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--from-checkpoint', type=str, default='') arg('--find-lr', action='store_true') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() if args.arch == 'seresnext50': model = get_seresnet_model(arch="se_resnext50_32x4d", n_classes=N_CLASSES, pretrained=False) elif args.arch == 'seresnext101': model = get_seresnet_model(arch="se_resnext101_32x4d", n_classes=N_CLASSES, pretrained=False) elif args.arch.startswith("densenet"): model = get_densenet_model(arch=args.arch) elif args.arch.startswith("efficientnet"): model = get_efficientnet_model(arch=args.arch, pretrained=False) else: raise ValueError("No such model") if use_cuda: model = model.cuda() criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss()) (CACHE_DIR / 'params.json').write_text( json.dumps(vars(args), indent=4, sort_keys=True)) df_train, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) train_transform = get_train_transform(int(args.size * 1.25), args.size) test_transform = get_test_transform(int(args.size * 1.25), args.size) train_loader = make_loader(args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(train_loader.dataset):,} items in train, ' f'{len(valid_loader.dataset):,} in valid') if args.find_lr: find_lr(args, model, train_loader, criterion) else: if args.from_checkpoint: resume_training(args, model, train_loader, valid_loader) else: train_from_scratch(args, model, train_loader, valid_loader, criterion)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--data_path', type=str, default='data') arg('--model', type=str, default='pnasnet5large') arg('--exp-name', type=str, default='pnasnet5large_2') arg('--batch-size', type=int, default=32) arg('--lr', type=float, default=1e-2) arg('--patience', type=int, default=4) arg('--n-epochs', type=int, default=15) arg('--n-folds', type=int, default=10) arg('--fold', type=int, default=0) arg('--random-seed', type=int, default=314159) arg('--num-workers', type=int, default=6) arg('--gpus', type=str, default='0') arg('--resize', type=int, default=331) arg('--crop', type=int, default=331) arg('--scale', type=str, default='0.4, 1.0') arg('--mean', type=str, default='0.485, 0.456, 0.406') arg('--std', type=str, default='0.229, 0.224, 0.225') args = parser.parse_args() print(args) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus # os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '1' # os.environ['MXNET_UPDATE_ON_KVSTORE'] = "0" # os.environ['MXNET_EXEC_ENABLE_ADDTO'] = "1" # os.environ['MXNET_USE_TENSORRT'] = "0" # os.environ['MXNET_GPU_WORKER_NTHREADS'] = "2" # os.environ['MXNET_GPU_COPY_NTHREADS'] = "1" # os.environ['MXNET_OPTIMIZER_AGGREGATION_SIZE'] = "54" random_seed = args.random_seed set_random_seed(random_seed) path_to_data = Path(args.data_path) labels = pd.read_csv(path_to_data / 'labels.csv') num_classes = len(labels) train = pd.read_csv(path_to_data / 'train.csv.zip') n_folds = args.n_folds make_folds(train, n_folds, random_seed) mlb = MultiLabelBinarizer([str(i) for i in range(num_classes)]) s = train['attribute_ids'].str.split() res = pd.DataFrame(mlb.fit_transform(s), columns=mlb.classes_, index=train.index) train = pd.concat([res, train['id'] + '.png', train['fold']], axis=1) gpu_count = len(args.gpus.split(',')) batch_size = args.batch_size resize = args.resize crop = args.crop scale = tuple(float(x) for x in args.scale.split(',')) mean = [float(x) for x in args.mean.split(',')] std = [float(x) for x in args.std.split(',')] # jitter_param = 0.4 # lighting_param = 0.1 labels_ids = [str(i) for i in range(num_classes)] num_workers = args.num_workers fold = args.fold train_transformer = get_train_transform(resize=resize, crop=crop, scale=scale, mean=mean, std=std) train_loader = mx.gluon.data.DataLoader(MXDataset( path_to_data / 'train', train[train['fold'] != fold].copy(), labels_ids, train_transformer), batch_size=batch_size * gpu_count, shuffle=True, num_workers=num_workers, pin_memory=True) test_transformer = get_test_transform(resize=resize, crop=crop, mean=mean, std=std) dev_loader = mx.gluon.data.DataLoader(MXDataset( path_to_data / 'train', train[train['fold'] == fold].copy(), labels_ids, test_transformer), batch_size=batch_size * gpu_count, shuffle=False, num_workers=num_workers, pin_memory=True) fp16 = True if args.model == 'pnasnet5large': net = get_pnasnet5large(num_classes) else: raise (f'No such model {args.model}') if fp16: net.cast('float16') ctx = [mx.gpu(i) for i in range(gpu_count)] net.collect_params().reset_ctx(ctx) epoch_size = len(train_loader) lr = args.lr * batch_size / 256 steps = [step * epoch_size for step in [7, 9]] factor = 0.5 warmup_epochs = 5 warmup_mode = 'linear' schedule = mx.lr_scheduler.MultiFactorScheduler( step=steps, factor=factor, base_lr=lr, warmup_steps=warmup_epochs * epoch_size, warmup_mode=warmup_mode) if fp16: weight = 128 opt = mx.optimizer.Adam( multi_precision=True, learning_rate=lr, rescale_grad=1 / weight, lr_scheduler=schedule, ) else: opt = mx.optimizer.Adam( learning_rate=lr, lr_scheduler=schedule, ) trainer = mx.gluon.Trainer(net.collect_params(), opt) if fp16: loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss(weight=weight) else: loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() path_to_models = Path('models') path_to_model = path_to_models / args.exp_name path_to_exp = path_to_model / f'fold_{fold}' if not path_to_exp.exists(): path_to_exp.mkdir(parents=True) patience = args.patience lr_reset_epoch = 1 lr_changes = 0 max_lr_changes = 2 n_epochs = args.n_epochs best_dev_f2 = th2 = 0 train_losses = [] dev_losses, dev_f2s, dev_ths = [], [], [] dev_met1, dev_met2 = [], [] for epoch in range(1, n_epochs + 1): train_loss, all_predictions, all_targets = epoch_step( train_loader, desc=f'[ Training {epoch}/{n_epochs}.. ]', fp16=fp16, ctx=ctx, net=net, loss=loss, trainer=trainer) train_losses.append(train_loss) dev_loss, all_predictions, all_targets = epoch_step( dev_loader, desc=f'[ Validating {epoch}/{n_epochs}.. ]', fp16=fp16, ctx=ctx, net=net, loss=loss) dev_losses.append(dev_loss) metrics = {} argsorted = all_predictions.argsort(axis=1) for threshold in [0.01, 0.05, 0.1, 0.15, 0.2]: metrics[f'valid_f2_th_{threshold:.2f}'] = get_score( binarize_prediction(all_predictions, threshold, argsorted), all_targets) dev_met1.append(metrics) dev_f2 = 0 for th in dev_met1[-1]: if dev_met1[-1][th] > dev_f2: dev_f2 = dev_met1[-1][th] th2 = th all_predictions = all_predictions / all_predictions.max(1, keepdims=True) metrics = {} argsorted = all_predictions.argsort(axis=1) for threshold in [0.05, 0.1, 0.2, 0.3, 0.4]: metrics[f'valid_norm_f2_th_{threshold:.2f}'] = get_score( binarize_prediction(all_predictions, threshold, argsorted), all_targets) dev_met2.append(metrics) for th in dev_met2[-1]: if dev_met2[-1][th] > dev_f2: dev_f2 = dev_met2[-1][th] th2 = th dev_f2s.append(dev_f2) dev_ths.append(th2) if dev_f2 > best_dev_f2: best_dev_f2 = dev_f2 best_th = th2 if fp16: net.cast('float32') net.save_parameters((path_to_exp / 'model').as_posix()) net.cast('float16') else: net.save_parameters((path_to_exp / 'model').as_posix()) save_dict( { 'dev_loss': dev_loss, 'dev_f2': best_dev_f2, 'dev_th': best_th, 'epoch': epoch, 'dev_f2s': dev_f2s, 'dev_ths': dev_ths, 'dev_losses': dev_losses, 'dev_met1': dev_met1, 'dev_met2': dev_met2, }, path_to_exp / 'meta_data.pkl') elif (patience and epoch - lr_reset_epoch > patience and max(dev_f2s[-patience:]) < best_dev_f2): # "patience" epochs without improvement lr_changes += 1 if lr_changes > max_lr_changes: break lr *= factor print(f'lr updated to {lr}') lr_reset_epoch = epoch if fp16: weight = 128 opt = mx.optimizer.Adam(multi_precision=True, learning_rate=lr, rescale_grad=1 / weight) else: opt = mx.optimizer.Adam(learning_rate=lr) trainer = mx.gluon.Trainer(net.collect_params(), opt) plot_all(path_to_exp, train_losses, dev_losses, dev_f2s, dev_ths, dev_met1, dev_met2)
lable_path_train = '{DIR_INPUT}/train/_annotations.txt' num_classes = 2 from transforms import get_train_transform, get_valid_transform from dataset import PKLotDataset, dataframe_from_yolov4_format, expand_bbox, get_data_labeling df = get_data_labeling(lable_path_train) image_ids = df['image_id'].unique() valid_df = df[df['image_id'].isin(image_ids[-1500:])] train_df = df[df['image_id'].isin(image_ids[:-1500])] train_dataset = PKLotDataset(train_df, DIR_TRAIN, get_train_transform()) valid_dataset = PKLotDataset(valid_df, DIR_TRAIN, get_valid_transform()) # load a model; pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) print("Model loaded") num_classes = 2 # 1 class (car) + background # get number of input features for the classifier