def run(config): # train_dir = config.train.dir # model_segmenter = get_model(config.model_segmenter.name) model_segmenter = XResNet2() #NetX2()#LinkNet(1) if torch.cuda.is_available(): model_segmenter = model_segmenter.cuda() criterion_segmenter = get_loss(config.loss_segmenter) optimizer_segmenter = get_optimizer(config.optimizer_segmenter.name, model_segmenter.parameters(), config.optimizer_segmenter.params) #### checkpoint_segmenter = get_initial_checkpoint(config.train_segmenter.dir) if checkpoint_segmenter is not None: last_epoch, step = load_checkpoint(model_segmenter, optimizer_segmenter, checkpoint_segmenter) else: last_epoch, step = -1, -1 print('from segmenter checkpoint: {} last epoch:{}'.format( checkpoint_segmenter, last_epoch)) # scheduler = get_scheduler(config, optimizer, last_epoch) print('config.train ', config.train) writer = SummaryWriter(config.train.writer_dir) scheduler = 'none' # train_classifier_dataloaders = get_dataloader(config.data_classifier, './data/data_train.csv',config.train_classifier.batch_size, 'train',config.transform_classifier.num_preprocessor, get_transform(config.transform_classifier, 'train')) # eval_classifier_dataloaders = get_dataloader(config.data_classifier, './data/data_val.csv',config.eval_classifier.batch_size, 'val', config.transform_classifier.num_preprocessor, get_transform(config.transform_classifier, 'val')) # test_dataloaders = get_dataloader(config.data_classifier,'./data/data_test.csv', get_transform(config, 'test')) # train_classifier(config, model_classifier, train_classifier_dataloaders,eval_classifier_dataloaders, criterion_classifier, optimizer_classifier, scheduler, # writer, last_epoch+1) criterion_segmenter = nn.MSELoss() train_segmenter_dataloaders = get_dataloader( config.train_segmenter.batch_size, 'train') eval_segmenter_dataloaders = get_dataloader( config.train_segmenter.batch_size, 'val') train_segmenter(config, model_segmenter, train_segmenter_dataloaders, eval_segmenter_dataloaders, criterion_segmenter, optimizer_segmenter, scheduler, writer, last_epoch + 1)
def main(): args = parse_args() if args.config_file is None: raise Exception('no configuration file') config = utils.config.load(args.config_file) model_segmenter = XResNet() if torch.cuda.is_available(): model_segmenter = model_segmenter.cuda() optimizer_segmenter = get_optimizer(config.optimizer_segmenter.name, model_segmenter.parameters(), config.optimizer_segmenter.params) #### checkpoint = get_model_saved(config.train_segmenter.dir, 4809) best_epoch, step = load_checkpoint(model_segmenter, optimizer_segmenter, checkpoint) test_segmenter_dataloaders = get_test_dataloader(10) test_segmenter(config, model_segmenter, test_segmenter_dataloaders)
def run(config_file): config = load_config(config_file) config.work_dir = '/home/koga/workspace/kaggle_bengali/result/' + config.work_dir os.makedirs(config.work_dir, exist_ok=True) os.makedirs(config.work_dir + "/checkpoints", exist_ok=True) print('working directory:', config.work_dir) logger = get_logger(config.work_dir + "log.txt") all_transforms = {} all_transforms['train'] = Transform( size=config.data.image_size, affine=config.transforms.affine, autoaugment_ratio=config.transforms.autoaugment_ratio, threshold=config.transforms.threshold, sigma=config.transforms.sigma, blur_ratio=config.transforms.blur_ratio, noise_ratio=config.transforms.noise_ratio, cutout_ratio=config.transforms.cutout_ratio, grid_distortion_ratio=config.transforms.grid_distortion_ratio, random_brightness_ratio=config.transforms.random_brightness_ratio, piece_affine_ratio=config.transforms.piece_affine_ratio, ssr_ratio=config.transforms.ssr_ratio, grid_mask_ratio=config.transforms.grid_mask_ratio, augmix_ratio=config.transforms.augmix_ratio, ) all_transforms['valid'] = Transform(size=config.data.image_size) dataloaders = { phase: make_loader( phase=phase, df_path=config.train.dfpath, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx, fold_csv=config.data.params.fold_csv, transforms=all_transforms[phase], # debug=config.debug crop=config.transforms.crop) for phase in ['train', 'valid'] } model = MODEL_LIST[config.model.version](back_bone=config.model.back_bone, out_dim=1295) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) config.optimizer.params.lr *= torch.cuda.device_count() torch.backends.cudnn.benchmark = True model = model.to(device) criterion = get_criterion(config) optimizer = get_optimizer(config, model) scheduler = get_scheduler(optimizer, config) accumlate_step = 1 if config.train.accumulation_size > 0: accumlate_step = config.train.accumulation_size // config.train.batch_size best_valid_recall = 0.0 if config.train.resume: print('resume checkpoints') checkpoint = torch.load("/home/koga/workspace/kaggle_bengali/result/" + config.train.path) model.load_state_dict(fix_model_state_dict(checkpoint['checkpoint'])) # model.load_state_dict(checkpoint['checkpoint']) # best_valid_recall = checkpoint['best_valid_recall'] # if config.train.earlyStopping: # early_stopping = EarlyStopping(patience=patience, verbose=True) valid_recall = 0.0 for epoch in range(1, config.train.num_epochs + 1): print(f'epoch {epoch} start') logger.info(f'epoch {epoch} start ') metric_train = do_train(model, dataloaders["train"], criterion, optimizer, device, config, epoch, accumlate_step) torch.cuda.empty_cache() metrics_eval = do_eval(model, dataloaders["valid"], criterion, device) torch.cuda.empty_cache() valid_recall = metrics_eval["valid_metric"] scheduler.step(metrics_eval["valid_loss"]) print(f'epoch: {epoch} ', metric_train, metrics_eval) logger.info(f'epoch: {epoch} {metric_train} {metrics_eval}') if valid_recall > best_valid_recall: print(f"save checkpoint: best_recall:{valid_recall}") logger.info(f"save checkpoint: best_recall:{valid_recall}") torch.save( { 'checkpoint': model.state_dict(), 'epoch': epoch, 'best_valid_recall': valid_recall, }, config.work_dir + "/checkpoints/" + f"{epoch}.pth") best_valid_recall = valid_recall torch.cuda.empty_cache() gc.collect()
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args = get_args() cfg = Config.fromfile(args.config) cfg.device = device train = pd.read_csv(cfg.train_csv) camera_matrix_inv = np.linalg.inv(kaggle.camera_matrix) if 0: points_df = pd.DataFrame() for col in ['x', 'y', 'z', 'yaw', 'pitch', 'roll']: arr = [] for ps in train['PredictionString']: coords = kaggle.str2coords(ps) arr += [c[col] for c in coords] points_df[col] = arr log.info(f'len(points_df): {len(points_df)}') log.info(points_df.head()) img = imread(opj(cfg.train_images, train.iloc[0]['ImageId'] + '.jpg')) # plt.figure(figsize=(15,8)) # plt.imshow(img) # plt.show() # log.info(train.head()) # log.info(kaggle.camera_matrix) pred_string = train.iloc[0]['PredictionString'] coords = kaggle.str2coords(pred_string) # log.info(coords) lens = [len(kaggle.str2coords(s)) for s in train['PredictionString']] ############ plt.figure(figsize=(15, 6)) sns.countplot(lens) # plt.xlabel('Number of cars in image') # plt.show() plt.savefig('eda/number_cars_in_image.png') ############ plt.figure(figsize=(15, 6)) sns.distplot(functools.reduce(lambda a, b: a + b, [[c['x'] for c in kaggle.str2coords(s)] for s in train['PredictionString']]), bins=500) # sns.distplot([kaggle.str2coords(s)[0]['x'] for s in train['PredictionString']]); plt.xlabel('x') # plt.show() plt.savefig('eda/x.png') ############ plt.figure(figsize=(15, 6)) sns.distplot(functools.reduce(lambda a, b: a + b, [[c['y'] for c in kaggle.str2coords(s)] for s in train['PredictionString']]), bins=500) plt.xlabel('y') # plt.show() plt.savefig('eda/y.png') ############ plt.figure(figsize=(15, 6)) sns.distplot(functools.reduce(lambda a, b: a + b, [[c['z'] for c in kaggle.str2coords(s)] for s in train['PredictionString']]), bins=500) plt.xlabel('z') # plt.show() plt.savefig('eda/z.png') ############ plt.figure(figsize=(15, 6)) sns.distplot( functools.reduce(lambda a, b: a + b, [[c['yaw'] for c in kaggle.str2coords(s)] for s in train['PredictionString']])) plt.xlabel('yaw') # plt.show() plt.savefig('eda/yaw.png') ############ plt.figure(figsize=(15, 6)) sns.distplot( functools.reduce(lambda a, b: a + b, [[c['roll'] for c in kaggle.str2coords(s)] for s in train['PredictionString']])) plt.xlabel('roll') # plt.show() plt.savefig('eda/roll.png') ############ plt.figure(figsize=(15, 6)) sns.distplot( functools.reduce(lambda a, b: a + b, [[c['pitch'] for c in kaggle.str2coords(s)] for s in train['PredictionString']])) plt.xlabel('pitch') # plt.show() plt.savefig('eda/pitch.png') ############ plt.figure(figsize=(15, 6)) sns.distplot( functools.reduce(lambda a, b: a + b, [[ kaggle.rotate(c['roll'], np.pi) for c in kaggle.str2coords(s) ] for s in train['PredictionString']])) plt.xlabel('roll rotated by pi') # plt.show() plt.savefig('eda/roll_rotated_by_pi.png') plt.figure(figsize=(14, 14)) plt.imshow( imread(opj(cfg.train_images, train.iloc[2217]['ImageId'] + '.jpg'))) plt.scatter(*kaggle.get_img_coords( train.iloc[2217]['PredictionString']), color='red', s=100) # plt.show() # log.info(kaggle.get_img_coords(train.iloc[2217]['PredictionString'])) ############ xs, ys = [], [] for ps in train['PredictionString']: x, y = kaggle.get_img_coords(ps) xs += list(x) ys += list(y) plt.figure(figsize=(18, 18)) plt.imshow(imread( opj(cfg.train_images, train.iloc[2217]['ImageId'] + '.jpg')), alpha=0.3) plt.scatter(xs, ys, color='red', s=10, alpha=0.2) # plt.show() plt.savefig('eda/xs-ys_distribution.png') ############ # view distribution from the sky road_width = 3 road_xs = [ -road_width, road_width, road_width, -road_width, -road_width ] road_ys = [0, 0, 500, 500, 0] plt.figure(figsize=(16, 16)) plt.axes().set_aspect(1) plt.xlim(-50, 50) plt.ylim(0, 100) # View road plt.fill(road_xs, road_ys, alpha=0.2, color='gray') plt.plot([road_width / 2, road_width / 2], [0, 100], alpha=0.4, linewidth=4, color='white', ls='--') plt.plot([-road_width / 2, -road_width / 2], [0, 100], alpha=0.4, linewidth=4, color='white', ls='--') # View cars # plt.scatter(points_df['x'], np.sqrt(points_df['z']**2 + points_df['y']**2), color='red', s=10, alpha=0.1) # plt.savefig('eda/view_from_sky.png') ############ fig = px.scatter_3d(points_df, x='x', y='y', z='z', color='pitch', range_x=(-50, 50), range_y=(0, 50), range_z=(0, 250), opacity=0.1) # fig.show() zy_slope = LinearRegression() X = points_df[['z']] y = points_df[['y']] zy_slope.fit(X, y) print('MAE without x:', mean_absolute_error(y, zy_slope.predict(X))) # Will use this model later xzy_slope = LinearRegression() X = points_df[['x', 'z']] y = points_df['y'] xzy_slope.fit(X, y) print('MAE with x:', mean_absolute_error(y, xzy_slope.predict(X))) print('\ndy/dx = {:.3f} \ndy/dz = {:.3f}'.format(*xzy_slope.coef_)) plt.figure(figsize=(16, 16)) plt.xlim(0, 500) plt.ylim(0, 100) plt.scatter(points_df['z'], points_df['y'], label='Real points') X_line = np.linspace(0, 500, 10) plt.plot(X_line, zy_slope.predict(X_line.reshape(-1, 1)), color='orange', label='Regression') plt.legend() plt.xlabel('z coordinate') plt.ylabel('y coordinate') plt.savefig('eda/linear_regression.png') # 3d view n_rows = 6 for idx in range(n_rows): fig, axes = plt.subplots(1, 2, figsize=(20, 20)) img = imread( opj(cfg.train_images, train['ImageId'].iloc[idx] + '.jpg')) axes[0].imshow(img) img_vis = kaggle.visualize( img, kaggle.str2coords(train['PredictionString'].iloc[idx])) axes[1].imshow(img_vis) # plt.show() plt.savefig(f'eda/img-view_coords_{idx}.png') if 0: img0 = imread(opj(cfg.train_images, train.iloc[0]['ImageId'] + '.jpg')) img = kaggle.preprocess_image(img0) print(train.iloc[0]['PredictionString']) mask, regr = kaggle.get_mask_and_regr( img0, train.iloc[0]['PredictionString']) # print('img.shape', img.shape, 'std:', np.std(img)) # print('mask.shape', mask.shape, 'std:', np.std(mask)) # print('regr.shape', regr.shape, 'std:', np.std(regr)) plt.figure(figsize=(16, 16)) plt.title('Processed image') plt.imshow(img) # plt.show() plt.savefig('eda/processed_image.png') plt.figure(figsize=(16, 16)) plt.title('Detection Mask') plt.imshow(mask) # plt.show() plt.savefig('eda/detection_mask.png') plt.figure(figsize=(16, 16)) plt.title('Yaw values') plt.imshow(regr[:, :, -2]) # plt.show() plt.savefig('eda/yaw_values.png') ############# if 0: regr_model = kaggle.get_regr_model(train) for idx in range(2): fig, axes = plt.subplots(1, 2, figsize=(20, 20)) for ax_i in range(2): img0 = imread( opj(cfg.train_images, train['ImageId'].iloc[idx] + '.jpg')) if ax_i == 1: img0 = img0[:, ::-1] img = kaggle.preprocess_image(img0, ax_i == 1) mask, regr = kaggle.get_mask_and_regr( img0, train['PredictionString'][idx], ax_i == 1) regr = np.rollaxis(regr, 2, 0) coords = kaggle.extract_coords( np.concatenate([mask[None], regr], 0), regr_model, ax_i == 1) axes[ax_i].set_title('Flip = {}'.format(ax_i == 1)) axes[ax_i].imshow(kaggle.visualize(img0, coords)) # plt.show() plt.savefig(f'eda/{idx}_{ax_i}.png') if 0: dataset = dataset_factory.CarDataset(cfg.data.train) img, mask, regr = dataset[0] plt.figure(figsize=(16, 16)) plt.imshow(np.rollaxis(img, 0, 3)) # plt.show() plt.savefig(f'eda/img.png') plt.figure(figsize=(16, 16)) plt.imshow(mask) # plt.show() plt.savefig(f'eda/mask.png') plt.figure(figsize=(16, 16)) plt.imshow(regr[:, :, -2]) # plt.show() plt.savefig(f'eda/regr.png') ######### if 1: # initial ----------------------------------- best = { 'loss': float('inf'), 'score': 0.0, 'epoch': -1, } train_loader = dataset_factory.get_dataloader(cfg.data.train) valid_loader = dataset_factory.get_dataloader(cfg.data.valid) test_loader = dataset_factory.get_dataloader(cfg.data.test) for i, (img, mask, regr) in enumerate(tqdm(test_loader)): print(i) if i == 3: break model = model_factory.get_model(cfg) optimizer = optimizer_factory.get_optimizer(model, cfg) scheduler = scheduler_factory.get_scheduler(cfg, optimizer, best['epoch'])
def run(config_file): config = load_config(config_file) config.work_dir = '/home/koga/workspace/kaggle_bengali/result/'+config.work_dir os.makedirs(config.work_dir, exist_ok=True) os.makedirs(config.work_dir + "/checkpoints", exist_ok=True) print('working directory:', config.work_dir) logger = get_logger(config.work_dir+"log.txt") all_transforms = {} all_transforms['train'] = Transform( size=config.data.image_size, affine=config.transforms.affine, autoaugment_ratio=config.transforms.autoaugment_ratio, threshold=config.transforms.threshold, sigma=config.transforms.sigma, blur_ratio=config.transforms.blur_ratio, noise_ratio=config.transforms.noise_ratio, cutout_ratio=config.transforms.cutout_ratio, grid_distortion_ratio=config.transforms.grid_distortion_ratio, random_brightness_ratio=config.transforms.random_brightness_ratio, piece_affine_ratio=config.transforms.piece_affine_ratio, ssr_ratio=config.transforms.ssr_ratio, grid_mask_ratio=config.transforms.grid_mask_ratio, augmix_ratio=config.transforms.augmix_ratio, ) all_transforms['valid'] = Transform(size=config.data.image_size) dataloaders = { phase: make_loader( phase=phase, df_path=config.train.dfpath, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx, fold_csv=config.data.params.fold_csv, transforms=all_transforms[phase], # debug=config.debug crop=config.transforms.crop ) for phase in ['train', 'valid'] } model_root = MODEL_LIST['Resnet34_3model'](pretrained=config.model.pretrained, out_dim=168) model_vowel = MODEL_LIST['Resnet34_3model'](pretrained=config.model.pretrained, out_dim=11) model_const = MODEL_LIST['Resnet34_3model'](pretrained=config.model.pretrained, out_dim=7) model_root = model_root.to(device) model_vowel = model_vowel.to(device) model_const = model_const.to(device) model_list = [model_root, model_vowel, model_const] criterion = get_criterion(config) optimizer_root = get_optimizer(config, model_root) optimizer_vowel = get_optimizer(config, model_vowel) optimizer_const = get_optimizer(config, model_const) optimizer_list = [optimizer_root, optimizer_vowel, optimizer_const] scheduler_root = get_scheduler(optimizer_root, config) scheduler_vowel = get_scheduler(optimizer_vowel, config) scheduler_const = get_scheduler(optimizer_const, config) scheduler_list = [scheduler_root, scheduler_vowel, scheduler_const] accumlate_step = 1 if config.train.accumulation_size > 0: accumlate_step = config.train.accumulation_size // config.train.batch_size best_valid_recall = 0.0 if config.train.resume: print('resume checkpoints') checkpoint = torch.load("/home/koga/workspace/kaggle_bengali/result/" + config.train.path) model.load_state_dict(fix_model_state_dict(checkpoint['checkpoint'])) valid_recall = 0.0 for epoch in range(1, config.train.num_epochs+1): print(f'epoch {epoch} start') logger.info(f'epoch {epoch} start ') metric_train = do_train(model_list, dataloaders["train"], criterion, optimizer_list, device, config, epoch, accumlate_step) torch.cuda.empty_cache() metrics_eval = do_eval(model_list, dataloaders["valid"], criterion, device) torch.cuda.empty_cache() valid_recall = metrics_eval["valid_recall"] scheduler_list[0].step(metrics_eval["valid_recall"]) scheduler_list[1].step(metrics_eval["valid_recall"]) scheduler_list[2].step(metrics_eval["valid_recall"]) print(f'epoch: {epoch} ', metric_train, metrics_eval) logger.info(f'epoch: {epoch} {metric_train} {metrics_eval}') if valid_recall > best_valid_recall: print(f"save checkpoint: best_recall:{valid_recall}") logger.info(f"save checkpoint: best_recall:{valid_recall}") torch.save({ 'checkpoint_root': model_list[0].state_dict(), 'checkpoint_vowel': model_list[1].state_dict(), 'checkpoint_const': model_list[2].state_dict(), 'epoch': epoch, 'best_valid_recall': valid_recall, }, config.work_dir + "/checkpoints/" + f"{epoch}.pth") best_valid_recall = valid_recall torch.cuda.empty_cache() gc.collect()
def do_train(cfg, model): # get criterion ----------------------------- criterion = criterion_factory.get_criterion(cfg) # get optimization -------------------------- optimizer = optimizer_factory.get_optimizer(model, cfg) # initial ----------------------------------- best = { 'loss': float('inf'), 'score': 0.0, 'epoch': -1, } # resume model ------------------------------ if cfg.resume_from: log.info('\n') log.info(f're-load model from {cfg.resume_from}') detail = util.load_model(cfg.resume_from, model, optimizer, cfg.device) best.update({ 'loss': detail['loss'], 'score': detail['score'], 'epoch': detail['epoch'], }) # scheduler --------------------------------- scheduler = scheduler_factory.get_scheduler(cfg, optimizer, best['epoch']) # fp16 -------------------------------------- if cfg.apex: amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # setting dataset --------------------------- loader_train = dataset_factory.get_dataloader(cfg.data.train) loader_valid = dataset_factory.get_dataloader(cfg.data.valid) # start trainging --------------------------- start_time = datetime.now().strftime('%Y/%m/%d %H:%M:%S') log.info('\n') log.info(f'** start train [fold{cfg.fold}th] {start_time} **\n') log.info( 'epoch iter rate | smooth_loss/score | valid_loss/score | best_epoch/best_score | min' ) log.info( '-------------------------------------------------------------------------------------------------' ) for epoch in range(best['epoch'] + 1, cfg.epoch): end = time.time() util.set_seed(epoch) ## train model -------------------------- train_results = run_nn(cfg.data.train, 'train', model, loader_train, criterion=criterion, optimizer=optimizer, apex=cfg.apex, epoch=epoch) ## valid model -------------------------- with torch.no_grad(): val_results = run_nn(cfg.data.valid, 'valid', model, loader_valid, criterion=criterion, epoch=epoch) detail = { 'score': val_results['score'], 'loss': val_results['loss'], 'epoch': epoch, } if val_results['loss'] <= best['loss']: best.update(detail) util.save_model(model, optimizer, detail, cfg.fold[0], os.path.join(cfg.workdir, 'checkpoint')) log.info('%5.1f %5d %0.6f | %0.4f %0.4f | %0.4f %6.4f | %6.1f %6.4f | %3.1f min' % \ (epoch+1, len(loader_train), util.get_lr(optimizer), train_results['loss'], train_results['score'], val_results['loss'], val_results['score'], best['epoch'], best['score'], (time.time() - end) / 60)) scheduler.step( val_results['loss']) # if scheduler is reducelronplateau # scheduler.step() # early stopping------------------------- if cfg.early_stop: if epoch - best['epoch'] > cfg.early_stop: log.info(f'=================================> early stopping!') break time.sleep(0.01)
def train(config_yml, working_root=str(this_file_dir / '..')): """画像分類モデルを学習するエンドポイント Args: config_yml (str): コンフィグ用のyamlファイル working_root (str, optional): どこを起点としてデータを参照するか. Defaults to str(this_file_dir / '..'). """ with open(config_yml, 'r') as f: config = yaml.safe_load(f) # ==== # データ用意 # ==== train_loader = get_dataloader_through_dataset(config['data']['train'], working_root) test_loader = get_dataloader_through_dataset( config['data']['eval'], working_root, ) # ネットワーク用意 net = get_model(config['model']) if config['model'].get('model_state_dict'): model_state_dict_path = Path( working_root) / config['model']['model_state_dict'] load_weight(net, str(model_state_dict_path)) # optimizer定義 optimizer = get_optimizer(net, config['optimizer']) # 損失関数定義 criterion = get_loss(config['loss']) # データを保存する機能を持つオブジェクト datasaver = DataSaver(config['output_data']) datasaver.save_config(config_yml) # ====== # メインループ # ====== device = 'cuda' if torch.cuda.is_available() else 'cpu' if 'cuda' in config: device = 'cuda' if config['cuda'] == True else 'cpu' net.to(device) num_epochs = config['num_epochs'] for epoch in range(num_epochs): print(epoch) metrics_dict = {} #train print('train phase') metrics = run_train(net, train_loader, criterion, optimizer, device) metrics_dict.update(metrics) # eval print('eval phase') metrics, result_detail = run_eval(net, test_loader, criterion, device) metrics_dict.update(metrics) # 評価指標の記録 datasaver.save_metrics(metrics_dict, epoch) datasaver.save_model(net, epoch) datasaver.save_result_detail(result_detail, epoch)