def main(): # Clear log dir first helpers.clear_logs_folder() # Hyperparameters input_img_resize = (16, 16) # The resize size of the input images of the neural net output_img_resize = (16, 16) # The resize size of the output images of the neural net batch_size = 100 epochs = 50 # 100 threshold = 1.5 # mask is 1 background and 2 infarct validation_size = 0.1 sample_size = None # -- Optional parameters threads = cpu_count() use_cuda = torch.cuda.is_available() script_dir = os.path.dirname(os.path.abspath(__file__)) # Fetch the datasets ds_fetcher = DatasetFetcher() ds_fetcher.fetch_dataset() # Get the path to the files for the neural net # We don't want to split train/valid for KFold crossval X_train, y_train, X_valid, y_valid, z_train, z_valid= ds_fetcher.get_train_files(sample_size=sample_size, validation_size=validation_size) full_x_test = ds_fetcher.get_test_files(sample_size) # -- Computed parameters # Get the original images size (assuming they are all the same size) origin_img_size = ds_fetcher.get_image_size(X_train[0]) # The image kept its aspect ratio so we need to recalculate the img size for the nn # Seems not necessary # img_resize_centercrop = transformer.get_center_crop_size(X_train[0], img_resize) # Training callbacks tb_viz_cb = TensorboardVisualizerCallback(os.path.join(script_dir, '../logs/tb_viz_' + helpers.get_model_timestamp())) tb_logs_cb = TensorboardLoggerCallback(os.path.join(script_dir, '../logs/tb_logs_' + helpers.get_model_timestamp())) model_saver_cb = ModelSaverCallback( os.path.join(script_dir, '../output/models/model_' + helpers.get_model_timestamp()), verbose=True) # Testing callbacks pred_thresh = 0.5 pred_saver_cb = PredictionsSaverCallback(os.path.join(script_dir, '../output/submit_'+ helpers.get_model_timestamp() + '.csv.gz'), origin_img_size, pred_thresh) # Define our neural net architecture net = unet.UNet1024((1, *input_img_resize)) classifier = nn.classifier.InfarctClassifier(net, epochs) img_aug = random_shift_scale_rotate # Image augmentation with shift, scaling and rotation train_ds = TrainImageDataset(X_train, y_train, z_train, input_img_resize, X_transform=img_aug) train_loader = DataLoader(train_ds, batch_size, sampler=RandomSampler(train_ds), num_workers=threads, pin_memory=use_cuda) valid_ds = TrainImageDataset(X_valid, y_valid, z_valid, input_img_resize, threshold=threshold) valid_loader = DataLoader(valid_ds, batch_size, sampler=SequentialSampler(valid_ds), num_workers=threads, pin_memory=use_cuda) print("Training on {} samples and validating on {} samples " .format(len(train_loader.dataset), len(valid_loader.dataset))) # Train the classifier classifier.train(train_loader, valid_loader, epochs, callbacks=[tb_viz_cb, tb_logs_cb, model_saver_cb]) test_ds = TestImageDataset(full_x_test, input_img_resize) test_loader = DataLoader(test_ds, batch_size, sampler=SequentialSampler(test_ds), num_workers=threads, pin_memory=use_cuda) # Predict & save classifier.predict(test_loader, callbacks=[pred_saver_cb]) pred_saver_cb.close_saver()
def main(): # Clear log dir first helpers.clear_logs_folder() # Hyperparameters img_resize = (1024, 1024) batch_size = 2 # 2 epochs = 50 if opt.mode == 'TEST': batch_size = 1 epochs = 1 threshold = 0.5 validation_size = 0.2 sample_size = None # Put None to work on full dataset # Training on 4576 samples and validating on 512 samples # -- Optional parameters threads = cpu_count() use_cuda = torch.cuda.is_available() script_dir = os.path.dirname(os.path.abspath(__file__)) # Download the datasets ds_fetcher = DatasetFetcher(opt) ds_fetcher.download_dataset(False) # Get the path to the files for the neural net # We don't want to split train/valid for KFold crossval X_train, y_train, X_valid, y_valid = ds_fetcher.get_train_files( sample_size=sample_size, validation_size=validation_size) full_x_test = ds_fetcher.get_test_files(sample_size) # -- Computed parameters # Get the original images size (assuming they are all the same size) origin_img_size = ds_fetcher.get_image_size(X_train[0]) # The image kept its aspect ratio so we need to recalculate the img size for the nn img_resize_centercrop = transformer.get_center_crop_size( X_train[0], img_resize) # Training callbacks tb_viz_cb = TensorboardVisualizerCallback( os.path.join(script_dir, '../logs/tb_viz')) tb_logs_cb = TensorboardLoggerCallback( os.path.join(script_dir, '../logs/tb_logs')) model_saver_cb = ModelSaverCallback(os.path.join( script_dir, '../output/models/model_' + helpers.get_model_timestamp()), verbose=True) # Testing callbacks pred_saver_cb = PredictionsSaverCallback( os.path.join(script_dir, '../output/submit.csv.gz'), origin_img_size, threshold) # Define our neural net architecture net = unet.UNet1024((3, *img_resize_centercrop)) classifier = nn.classifier.CarvanaClassifier(net, epochs, opt) if opt.model != "": classifier.restore_model(opt.model) classifier.net.eval() train_ds = TrainImageDataset(X_train, y_train, img_resize, X_transform=aug.augment_img) train_loader = DataLoader(train_ds, batch_size, sampler=RandomSampler(train_ds), num_workers=threads, pin_memory=use_cuda) valid_ds = TrainImageDataset(X_valid, y_valid, img_resize, threshold=threshold) valid_loader = DataLoader(valid_ds, batch_size, sampler=SequentialSampler(valid_ds), num_workers=threads, pin_memory=use_cuda) print("Training on {} samples and validating on {} samples ".format( len(train_loader.dataset), len(valid_loader.dataset))) if opt.mode == 'TRAIN': classifier.train(train_loader, valid_loader, epochs, callbacks=[tb_viz_cb, tb_logs_cb, model_saver_cb]) else: classifier.train(train_loader, valid_loader, epochs, callbacks=[]) test_ds = TestImageDataset(full_x_test, img_resize) test_loader = DataLoader(test_ds, batch_size, sampler=SequentialSampler(test_ds), num_workers=threads, pin_memory=use_cuda) # Predict & save classifier.predict(test_loader, callbacks=[pred_saver_cb]) pred_saver_cb.close_saver()
def __call__(self): self.ds = Dataset(RS=self.RS, proj_folder=self.folder_name, pseudo_file=self.pseudo_file, silence_binary=self.silence_binary_flag, include_double_words=include_double_words) test_dataset = self.ds.test if self.predict_custom_flag: test_dataset = self.custom_dataset.test test_loader_list = self.generate_tta_loader_list(test_dataset) for fold_num in [4, 3, 2, 1, 0]: #np.arange(self.num_folds): #train train_ids = self.ds.train_ids_list[fold_num] outfile = open(self.splits_folder + 'train_%d.txt' % fold_num, 'w') for item in train_ids: outfile.write("%s\n" % str(item)) valid_ids = self.ds.val_ids_list[fold_num] outfile = open(self.splits_folder + 'val_%d.txt' % fold_num, 'w') for item in valid_ids: outfile.write("%s\n" % str(item)) print('Train index', train_ids[:15]) if mode == '31class': if not include_double_words: train_ds = ImageDataset(self.ds.train[self.ds.train[ config.id_col].isin(train_ids)], include_target=True, X_transform=aug.data_transformer) val_ds = ImageDataset(self.ds.train[self.ds.train[ config.id_col].isin(valid_ids)], include_target=True, X_transform=None) train_loader = DataLoader(train_ds, batch_size, sampler=RandomSampler(train_ds), num_workers=config.THREADS, pin_memory=config.USE_CUDA) valid_loader = DataLoader(val_ds, batch_size, num_workers=config.THREADS, pin_memory=config.USE_CUDA) else: train_ds = ImageDataset(self.ds.train, include_target=True, X_transform=aug.data_transformer) val_ds = ImageDataset(self.ds.train, include_target=True, X_transform=None) train_loader = DataLoader( train_ds, batch_size, sampler=UnknownsRandomSampler(self.ds.train[ self.ds.train[config.id_col].isin(train_ids)]), num_workers=config.THREADS, pin_memory=config.USE_CUDA) valid_loader = DataLoader( val_ds, batch_size, sampler=UnknownsRandomSampler(self.ds.train[ self.ds.train[config.id_col].isin(valid_ids)]), num_workers=config.THREADS, pin_memory=config.USE_CUDA) elif mode == 'binary': train_ds = ImageDataset(self.ds.train, include_target=True, X_transform=aug.data_transformer) val_ds = ImageDataset(self.ds.train, include_target=True, X_transform=None) train_loader = DataLoader( train_ds, batch_size, sampler=SilenceBinaryRandomSampler(self.ds.train[ self.ds.train[config.id_col].isin(train_ids)]), num_workers=config.THREADS, pin_memory=config.USE_CUDA) valid_loader = DataLoader( val_ds, batch_size, sampler=SilenceBinaryRandomSampler(self.ds.train[ self.ds.train[config.id_col].isin(valid_ids)]), num_workers=config.THREADS, pin_memory=config.USE_CUDA) valid_loader_oof_list = self.generate_tta_loader_list( self.ds.train[self.ds.train[config.id_col].isin(valid_ids)]) #train if self.train_flag: classifier = nn.classifier.Classifier( net_tuple=net_tuple, train_loader=train_loader, valid_loader_oof_list=valid_loader_oof_list, valid_loader=valid_loader, test_loader_list=test_loader_list, output_folder=self.folder_name, fold_num=fold_num, load_model_from_file=None, mode=self.mode) classifier.train(self.epochs) else: #load model and predict classifier = nn.classifier.Classifier( net_tuple=net_tuple, train_loader=train_loader, valid_loader_oof_list=valid_loader_oof_list, valid_loader=valid_loader, test_loader_list=test_loader_list, output_folder=self.folder_name, fold_num=fold_num, load_model_from_file=self.model_weights_dict[fold_num], mode=self.mode) #predict oof_train, test_pred_sub, self.aug_col_list = classifier.predict() if self.train_flag: training_log_info = classifier.training_log_info self.best_weights.append( training_log_info.head(1)['weight'].item()) self.training_logs.append(training_log_info) del classifier gc.collect #concat oof for train if isinstance(self.train_prediction, pd.DataFrame): self.train_prediction = pd.concat( [self.train_prediction, oof_train]) else: self.train_prediction = oof_train #merge test predictions if isinstance(self.test_prediction, pd.DataFrame): self.test_prediction = self.test_prediction.merge( test_pred_sub, on='id') else: self.test_prediction = test_pred_sub #calculate score across folds self.errors_dict = {} loss_list = np.array( [x.head(1)['valid_loss'].item() for x in self.training_logs]) self.errors_dict['val_std'] = np.std(loss_list) self.errors_dict['val_mean'] = np.mean(loss_list) self.errors_dict['best_dict'] = self.best_weights with open(self.folder_name + 'results.json', 'w') as fp: json.dump(self.errors_dict, fp)