def __get_metrics_map(self): from fastai.metrics import rmse, mse, mae, accuracy, FBeta, RocAucBinary, Precision, Recall, R2Score from .fastai_helpers import medae from .quantile_helpers import HuberPinballLoss metrics_map = { # Regression 'root_mean_squared_error': rmse, 'mean_squared_error': mse, 'mean_absolute_error': mae, 'r2': R2Score(), 'median_absolute_error': medae, # Classification 'accuracy': accuracy, 'f1': FBeta(beta=1), 'f1_macro': FBeta(beta=1, average='macro'), 'f1_micro': FBeta(beta=1, average='micro'), 'f1_weighted': FBeta(beta=1, average='weighted'), # this one has some issues 'roc_auc': RocAucBinary(), 'precision': Precision(), 'precision_macro': Precision(average='macro'), 'precision_micro': Precision(average='micro'), 'precision_weighted': Precision(average='weighted'), 'recall': Recall(), 'recall_macro': Recall(average='macro'), 'recall_micro': Recall(average='micro'), 'recall_weighted': Recall(average='weighted'), 'log_loss': None, 'pinball_loss': HuberPinballLoss(quantile_levels=self.quantile_levels) # Not supported: pac_score } return metrics_map
def __get_objective_func_name(self): from fastai.metrics import root_mean_squared_error, mean_squared_error, mean_absolute_error, accuracy, FBeta, AUROC, Precision, Recall, r2_score metrics_map = { # Regression 'root_mean_squared_error': root_mean_squared_error, 'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error, 'r2': r2_score, # Not supported: median_absolute_error # Classification 'accuracy': accuracy, 'f1': FBeta(beta=1), 'f1_macro': FBeta(beta=1, average='macro'), 'f1_micro': FBeta(beta=1, average='micro'), 'f1_weighted': FBeta(beta=1, average='weighted'), # this one has some issues 'roc_auc': AUROC(), 'precision': Precision(), 'precision_macro': Precision(average='macro'), 'precision_micro': Precision(average='micro'), 'precision_weighted': Precision(average='weighted'), 'recall': Recall(), 'recall_macro': Recall(average='macro'), 'recall_micro': Recall(average='micro'), 'recall_weighted': Recall(average='weighted'), 'log_loss': None, # Not supported: pac_score } # Unsupported metrics will be replaced by defaults for a given problem type objective_func_name = self.stopping_metric.name if objective_func_name not in metrics_map.keys(): if self.problem_type == REGRESSION: objective_func_name = 'mean_squared_error' else: objective_func_name = 'log_loss' logger.warning(f'Metric {self.stopping_metric.name} is not supported by this model - using {objective_func_name} instead') if objective_func_name in metrics_map.keys(): nn_metric = metrics_map[objective_func_name] else: nn_metric = None return nn_metric, objective_func_name
def createmodel(self, quantize=True): """Creates the model and attaches with the dataloader. By default it sets up the model for quantization aware training. Parameters ---------- quantize : bool, optional To quantize or not, by default True """ print("Creating model..") vision.learner.create_body = self.create_custom_body self.learn = cnn_learner( self.data, models.mobilenet_v2, pretrained=True, metrics=[error_rate, FBeta(beta=1), Precision(), Recall(), AUROC()], split_on=custom_split, model_dir=self.model_dir, ) if quantize: self.learn.model[0].qconfig = torch.quantization.default_qat_qconfig self.learn.model = torch.quantization.prepare_qat( self.learn.model, inplace=True )
def train(data): learn = cnn_learner(data, models.resnet18, metrics=[Precision(), Recall()]) learn.callback_fns.append( partial(LearnerTensorboardWriter, base_dir=Path("data/tensorboard/camlytics_fastai"), name="deterministic-data")) learn.fit_one_cycle(20, 1e-2) learn.save("finetune-epoch-20") import pdb pdb.set_trace()
cat_names = ['eras'] emb_szs = {} for name in cat_names: emb_szs[name] = 2 data = data.drop([], axis=1) sample = data.sample(50000) data = TabularDataBunch.from_df(os.getcwd(), data, dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_names) print(data.train_ds.cont_names) precision = Precision() recall = Recall() mattcor = MatthewsCorreff() learn = tabular_learner(data, layers=[100, 300, 300, 300, 100], emb_szs=emb_szs, metrics=[accuracy, precision, recall, mattcor], ps=[0.5, 0.5, 0.5, 0.5, 0.5]) learn.lr_find() learn.recorder.plot() learn.fit_one_cycle(10, 1e-2) learn.recorder.plot_metrics() preds, y, losses = learn.get_preds(with_loss=True) interp = ClassificationInterpretation(learn, preds, y, losses)
# Encode and transform the data # Since the target classification is categorical, we need to have a way to interpret the neural network's output. # In this case, we are assigning each possible output with an integer 0..n-1, another option would be to use one-hot # encoding here. This may be explored further. encoder = LabelEncoder() encoder.fit(df_y) data_y = encoder.transform(df_y) # Normalize the x data and rename for consistency data_x = (df - df.mean()) / (df.max() - df.min()) data_x = data_x.values # Set up the metrics we want to collect. I wanted TP,TN,FP,FN but that wasn't available. Recall and precision are still # extremely helpful for evaluating the model metrics = [accuracy, Recall(), Precision()] # Keep track of which fold we are on fold_num = 1 total_folds = 10 # Get the indices for the fold and train on that fold # Our goal here is to implement statified 10-fold cross validation for train_idx, test_idx in StratifiedKFold(n_splits=total_folds, shuffle=True, random_state=1).split(data_x, data_y): # This will create the datafold the way we need to hand it to the tabular learner class data_fold = (TabularList.from_df(data, path=path, cont_names=cont_names, procs=procs) .split_by_idxs(train_idx, test_idx) .label_from_df(cols=dep_var) .databunch()) print('Fold {}/{}'.format(fold_num, total_folds))
x = F.relu(self.fc1(x)) x = F.log_softmax(self.fc2(x), dim=1) return x # %% net = ConvNet() print(summary(net, torch.zeros((1, 1, 28, 28)), show_input=True)) print(summary(net, torch.zeros((1, 1, 28, 28)), show_input=False)) # %% learner = Learner( mnist_dls, ConvNet(), loss_func=F.nll_loss, metrics=[accuracy, Precision(average="macro"), Recall(average="macro")], ) # %%[markdown] # # These are too many epochs, but we want to see the behavior of the net when it # is trained for some time. # %% learner.fit(n_epoch=20) # %% pprint(list(learner.metrics)) # %% model = learner.model
def train(learn): import fastprogress # import cv2 as cv import numpy as np import pandas as pd import fastai from sklearn.model_selection import KFold from wandb.fastai import WandbCallback import wandb from torch import nn import torch.nn.functional as F ALPHA = 2.0 BETA = 10000.5 GAMMA = 10 import pretrainedmodels def resnext50_32x4d(pretrained=False): pretrained = 'imagenet' if pretrained else None model = pretrainedmodels.se_resnext50_32x4d(pretrained=pretrained) return nn.Sequential(*list(model.children())) class FocalLoss(nn.Module): def __init__(self, alpha=1., gamma=2.): super().__init__() self.alpha = alpha self.gamma = gamma def forward(self, inputs, targets, **kwargs): CE_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets) pt = torch.exp(-CE_loss) F_loss = self.alpha * ((1-pt)**self.gamma) * CE_loss return F_loss.mean() print(1) path = '/cs/home/khfy6uat/data/classification_1024/classifier_data' fastprogress.fastprogress.NO_BAR = True master_bar, progress_bar = fastprogress.fastprogress.force_console_behavior() fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar fastai.basic_data.master_bar, fastai.basic_data.progress_bar = master_bar, progress_bar dataclass.master_bar, dataclass.progress_bar = master_bar, progress_bar fastai.core.master_bar, fastai.core.progress_bar = master_bar, progress_bar # def resnext50_32x4d(pretrained=False): # pretrained = 'imagenet' if pretrained else None # model = pretrainedmodels.se_resnext50_32x4d(pretrained=pretrained) # return nn.Sequential(*list(model.children())) # class FocalLoss(nn.Module): # def __init__(self, alpha=1., gamma=2.): # super().__init__() # self.alpha = alpha # self.gamma = gamma # def forward(self, inputs, targets, **kwargs): # CE_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets) # pt = torch.exp(-CE_loss) # F_loss = self.alpha * ((1-pt)**self.gamma) * CE_loss # return F_loss.mean() sz=0 sz1=0 # Default values for hyper-parameters we're going to sweep over config_defaults = { 'epochs': 2, 'batch_size': 12, 'weight_decay': 0.0005, 'learning_rate': 1e-3, 'seed': 42, 'encoder_size':128, 'decoder_size':224 } # Initialize a new wandb run wandb.init(config=config_defaults) # Config is a variable that holds and saves hyperparameters and inputs config = wandb.config sz=(config.encoder_size) sz1=(config.decoder_size) tfms = get_transforms(do_flip=True, flip_vert=False, max_lighting=0.1, max_zoom=1.05, max_warp=0., xtra_tfms=[rand_crop(), rand_zoom(1, 1.5), symmetric_warp(magnitude=(-0.2, 0.2))]) print('right before data') data = (ImageList.from_folder(path) .split_by_rand_pct(seed=10) .label_from_folder() .transform(tfms, size=sz) .databunch(bs=12).normalize(imagenet_stats)) print('right before learner') wandbclc=partial(WandbCallback,log="all",input_type='images',monitor='recall',mode='max') per = Precision() rec= Recall() # learn = cnn_learner(data, resnext50_32x4d, pretrained=True, cut=-2, # split_on=lambda m: (m[0][3], m[1]), # metrics=[per,rec,error_rate],callback_fns=[wandbclc]) print(learn.data.train_dl.batch_size) lr=config.learning_rate print(config.epochs) learn.fit_one_cycle(config.epochs, max_lr=slice(lr), wd=1e-5) learn.unfreeze(); learn = learn.clip_grad(); lr = [lr/200, lr/20, lr/10] learn.fit_one_cycle(config.epochs, max_lr=slice(lr), wd=1e-5) if (sz1 > 0): SZ = sz1 cutout_frac = 0.20 p_cutout = 0.75 cutout_sz = round(SZ*cutout_frac) cutout_tfm = cutout(n_holes=(1,1), length=(cutout_sz, cutout_sz), p=p_cutout) tfms = get_transforms(do_flip=True, max_rotate=15, flip_vert=False, max_lighting=0.1, max_zoom=1.05, max_warp=0., xtra_tfms=[rand_crop(), rand_zoom(1, 1.5), symmetric_warp(magnitude=(-0.2, 0.2)), cutout_tfm]) data = (ImageList.from_folder(path) .split_by_rand_pct(seed=10) .label_from_folder() .transform(tfms, size=sz1) .databunch(bs=12).normalize(imagenet_stats)) learn.data=data learn.fit_one_cycle(config.epochs, max_lr=slice(lr), wd=1e-5) learn.unfreeze(); learn = learn.clip_grad(); lr = [lr/200, lr/20, lr/10] learn.fit_one_cycle(config.epochs, max_lr=slice(lr), wd=1e-5)
data.show_batch(rows=3, figsize=(12,9)) # displays a batch of the training set # %% data # displays the data details # %% # class names and number of classes print(data.classes) len(data.classes),data.c # %% f_score = partial(fbeta, thresh=0.2, beta = 0.5) per = Precision() rec= Recall() # %% import pretrainedmodels # model library # %% import pretrainedmodels def resnext50_32x4d(pretrained=True): pretrained = 'imagenet' if pretrained else None model = pretrainedmodels.se_resnext50_32x4d(pretrained=pretrained) return nn.Sequential(*list(model.children()))
# %% if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") # %% mnist_dls = ImageDataLoaders.from_folder( mnist_dir, train="training", valid="testing", device=device, batch_tfms=aug_transforms(mult=2, do_flip=False), item_tfms=Resize(224), ) # %% resnet_learner = cnn_learner( mnist_dls, resnet18, metrics=[accuracy, Precision(average="macro"), Recall(average="macro")], ) # %% with resnet_learner.no_bar(): resnet_learner.fine_tune(1) # %%