def describe_parameters(cls, learner, lr=None, wd=None, **kwargs): # pylint: disable=unused-argument # I don't like that I'm copying the fastai code, but I don't have any way to intercept the state that fit computes internally # Also, we should be able to expand with additional parameters as needed. lr = ifnone(lr, defaults.lr) wd = ifnone(wd, learner.wd) bs = learner.data.train_dl.batch_size return f"lr={lr} wd={wd} bs={bs}"
def __init__(self, data, scales=None, ratios=None, backbone=None, pretrained_path=None, *args, **kwargs): # Set default backbone to be 'resnet50' if backbone is None: backbone = models.resnet50 super().__init__(data, backbone) n_bands = len(getattr(self._data, '_extract_bands', [0, 1, 2])) _backbone = self._backbone if hasattr(self, '_orig_backbone'): _backbone = self._orig_backbone # Check if a backbone provided is compatible, use resnet50 as default if not self._check_backbone_support(_backbone): raise Exception( f"Enter only compatible backbones from {', '.join(self.supported_backbones)}" ) self.name = "RetinaNet" self._code = code self.scales = ifnone(scales, [1, 2**(-1 / 3), 2**(-2 / 3)]) self.ratios = ifnone(ratios, [1 / 2, 1, 2]) self._n_anchors = len(self.scales) * len(self.ratios) self._data = data self._chip_size = (data.chip_size, data.chip_size) # Cut-off the backbone before the penultimate layer self._encoder = create_body(self._backbone, -2) # Initialize the model, loss function and the Learner object self._model = RetinaNetModel(self._encoder, n_classes=data.c - 1, final_bias=-4, chip_size=self._chip_size, n_anchors=self._n_anchors, n_bands=n_bands) self._loss_f = RetinaNetFocalLoss(sizes=self._model.sizes, scales=self.scales, ratios=self.ratios) self.learn = Learner(data, self._model, loss_func=self._loss_f) self.learn.split([self._model.encoder[6], self._model.c5top5]) self.learn.freeze() if pretrained_path is not None: self.load(str(pretrained_path)) self._arcgis_init_callback() # make first conv weights learnable
def _svalidate(self, dl=None, callbacks=None, metrics=None): "Validate on `dl` with potential `callbacks` and `metrics`." dl = ifnone(dl, self.data.valid_dl) metrics = ifnone(metrics, self.metrics) cb_handler = CallbackHandler(self.callbacks + ifnone(callbacks, []), metrics) cb_handler.on_train_begin(1, None, metrics) cb_handler.on_epoch_begin() val_metrics = silent_validate(self.model, dl, self.loss_func, cb_handler) cb_handler.on_epoch_end(val_metrics) return cb_handler.state_dict['last_metrics']
def batch_stats(self, funcs: Collection[Callable] = None, ds_type: DatasetType = DatasetType.Train) -> Tensor: "Grab a batch of data and call reduction function `func` per channel" funcs = ifnone(funcs, [torch.mean, torch.std]) x = self.one_batch(ds_type=ds_type, denorm=False)[0].cpu() return [func(channel_view(x), 1) for func in funcs]
def loss_batch(model:nn.Module, xb:Tensor, yb:Tensor, loss_func:OptLossFunc=None, opt:OptOptimizer=None, cb_handler:Optional[CallbackHandler]=None)->Tuple[Union[Tensor,int,float,str]]: "Calculate loss and metrics for a batch, call out to callbacks as necessary." cb_handler = ifnone(cb_handler, CallbackHandler()) device = xb.device # Translate from fastai box format to torchvision. batch_sz = len(xb) images = xb targets = [] for i in range(batch_sz): boxes = yb[0][i] labels = yb[1][i] boxes = to_box_pixel(boxes, *images[0].shape[1:3]) targets.append(BoxList(boxes, labels=labels)) out = None loss = torch.Tensor([0.0]).to(device=device) if model.training: loss_dict = model(images, targets) loss = loss_dict['total_loss'] cb_handler.state_dict['loss_dict'] = loss_dict else: out = model(images) out = cb_handler.on_loss_begin(out) if opt is not None: loss,skip_bwd = cb_handler.on_backward_begin(loss) if not skip_bwd: loss.backward() if not cb_handler.on_backward_end(): opt.step() if not cb_handler.on_step_end(): opt.zero_grad() return loss.detach().cpu()
def loss_batch( model: nn.Module, xb: Tensor, yb: Tensor, loss_func: OptLossFunc = None, opt: OptOptimizer = None, cb_handler: Optional[CallbackHandler] = None ) -> Tuple[Union[Tensor, int, float, str]]: "Calculate loss and metrics for a batch, call out to callbacks as necessary." cb_handler = ifnone(cb_handler, CallbackHandler()) if not is_listy(xb): xb = [xb] if not is_listy(yb): yb = [yb] out = model(*xb) out = cb_handler.on_loss_begin(out) if not loss_func: return to_detach(out), yb[0].detach() loss = loss_func(out, *yb) if opt is not None: loss, skip_bwd = cb_handler.on_backward_begin(loss) if not skip_bwd: loss.backward() if not cb_handler.on_backward_end(): opt.step() if not cb_handler.on_step_end(): opt.zero_grad() return loss.detach().cpu()
def __init__(self, emb_szs, n_cont, out_sz, layers, emb_drop=0., window=24, filters=[1, 2, 3, 4, 5, 6], y_range=None, use_bn=False, ps=None, bn_final=False): super().__init__() # TODO: Use the filters arg to generate the conv_layers dynamically # Wavenet model layers self.c1a = conv_layer(window=window // 2, ks=1, dilation=1) self.c1b = conv_layer(window=window // 4, ks=1, dilation=2) self.c2a = conv_layer(window=window // 2, ks=2, dilation=1) self.c2b = conv_layer(window=window // 4, ks=2, dilation=2) self.c3a = conv_layer(window=window // 2, ks=3, dilation=1) self.c3b = conv_layer(window=window // 4, ks=3, dilation=2) self.c4a = conv_layer(window=window // 2, ks=4, dilation=1) self.c4b = conv_layer(window=window // 4, ks=4, dilation=2) self.c5a = conv_layer(window=window // 2, ks=5, dilation=1) self.c5b = conv_layer(window=window // 4, ks=5, dilation=2) self.c6a = conv_layer(window=window // 2, ks=6, dilation=1) self.c6b = conv_layer(window=window // 4, ks=6, dilation=2) num_wave_outputs = (len(filters) * (window // 2)) + (len(filters) * (window // 4)) # Fastai's Mixed Input model ps = ifnone(ps, [0] * len(layers)) ps = listify(ps, layers) self.embeds = nn.ModuleList([embedding(ni, nf) for ni, nf in emb_szs]) self.emb_drop = nn.Dropout(emb_drop) self.bn_cont = nn.BatchNorm1d(n_cont) n_emb = sum(e.embedding_dim for e in self.embeds) self.n_emb, self.n_cont, self.y_range = n_emb, n_cont, y_range sizes = self.get_sizes(layers, out_sz) actns = [nn.ReLU(inplace=True)] * (len(sizes) - 2) + [None] layers = [] for i, (n_in, n_out, dp, act) in enumerate( zip(sizes[:-2], sizes[1:-1], [0.] + ps, actns)): layers += bn_drop_lin(n_in, n_out, bn=use_bn and i != 0, p=dp, actn=act) if bn_final: layers.append(nn.BatchNorm1d(sizes[-1])) self.layers = nn.Sequential(*layers) # Final layer self.f = Flatten() self.lin = nn.Linear(sizes[-2] + num_wave_outputs, out_sz, bias=False) self.sizes = sizes self.num_wave_outputs = num_wave_outputs
def set_data(self, tr_data, val_data, bs=None): """Set data sources for this learner.""" tr_ds = self.create_dataset(tr_data) val_ds = self.create_dataset(val_data) bs = ifnone(bs, defaults.batch_size) self.data = DataBunch(tr_ds.as_loader(bs=bs), val_ds.as_loader(bs=bs)) if 'data' in self.parameters: del self.parameters['data'] # force recomputation
def trained_learner(model_cls, env, s_format, experience, bs=64, layers=None, render='rgb_array', memory_size=1000000, decay=0.0001, lr=None, actor_lr=None, epochs=450, opt=torch.optim.RMSprop, **kwargs): lr, actor_lr = ifnone(lr, 1e-3), ifnone(actor_lr, 1e-4) data = MDPDataBunch.from_env(env, render=render, bs=bs, add_valid=False, keep_env_open=False, feed_type=s_format, memory_management_strategy='k_partitions_top', k=3, **kwargs) exploration_method = OrnsteinUhlenbeck(size=data.action.taken_action.shape, epsilon_start=1, epsilon_end=0.1, decay=decay) memory = experience(memory_size=memory_size, reduce_ram=True) model = create_ddpg_model(data=data, base_arch=model_cls, lr=lr, actor_lr=actor_lr, layers=layers, opt=opt) learner = ddpg_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(epochs) return learner
def batch_stats(self, funcs: Collection[Callable] = None) -> Tensor: "Grab a batch of data and call reduction function `func` per channel" funcs = ifnone(funcs, [torch.mean, torch.std]) # x = self.one_batch(ds_type=DatasetType.Valid, denorm=False)[0].cpu() # one_batch gives (x,y) pair on first dim, next dim is going to be the number of images # xs = [b.cpu() for b in self.one_batch(ds_type=DatasetType.Valid, denorm=False)[0]] # return [[func(channel_view(x), 1) for func in funcs] for x in xs] x = self.one_batch(ds_type=DatasetType.Valid, denorm=False)[0][0].cpu() return [func(channel_view(x), 1) for func in funcs]
def on_epoch_end(self, epoch: int, smooth_loss: Tensor, last_metrics: MetricsList, **kwargs) -> bool: "Add a line with `epoch` number, `smooth_loss` and `last_metrics`." msg = ','.join( self.learn.recorder.names[:(None if self.add_time else -1)]) + '\n' last_metrics = ifnone(last_metrics, []) stats = [ str(stat) if isinstance(stat, int) else '#na#' if stat is None else f'{stat:.6f}' for name, stat in zip( self.learn.recorder.names, [epoch, smooth_loss] + last_metrics) ] if self.add_time: stats.append(format_time(time() - self.start_epoch)) str_stats = ','.join(stats) msg = msg + str_stats + '\n' try: self.bot.send_message(chat_id=self.chat_id, text=msg) except Exception as e: warn("Could not deliver message. Error: " + str(e), RuntimeWarning)
def _partition_args(cls, opt_func=None, loss_func=None, metrics=None, true_wd=None, bn_wd=True, wd=None, train_bn=True, path=None, model_dir=None, callback_fns=None, callbacks=None, layer_groups=None, add_time=True, silent=None, **kwargs): """Pull out arguments for the learner class and set their defaults. Returns a tuple (learner-args, other-args)""" # Why we do this seemingly pointless thing: this allows learner __init__ to accept a mixed kwargs that covers both Learner and model objects # Here we (a) separate the mixed kwargs into two lists, and (b) standardize the default values for the learner args (which include additional # defaults beyond what fastai does). learner_args = { 'opt_func': ifnone(opt_func, defaults.opt_func), 'loss_func': ifnone(loss_func, defaults.loss_func), 'metrics': ifnone(metrics, defaults.metrics), 'true_wd': ifnone(true_wd, defaults.fastai_wd), 'bn_wd': bn_wd, 'wd': ifnone(wd, defaults.wd), 'train_bn': train_bn, 'path': ifnone(path, defaults.model_directory), 'model_dir': ifnone( model_dir, '.' ), # This is a change from fastai default: we mix leaner exports and models in same dir 'callback_fns': ifnone(callback_fns, defaults.callback_fns), 'callbacks': callbacks, 'layer_groups': layer_groups, 'add_time': add_time, 'silent': silent } return (learner_args, kwargs)
def my_cl_int_plot_top_losses(self, k, largest=True, figsize=(25,7), heatmap:bool=True, heatmap_thresh:int=16, return_fig:bool=None)->Optional[plt.Figure]: "Show images in `top_losses` along with their prediction, actual, loss, and probability of actual class." tl_val,tl_idx = self.top_losses(k, largest) classes = self.data.classes cols = math.ceil(math.sqrt(k)) rows = math.ceil(k/cols) fig,axes = plt.subplots(rows, cols, figsize=figsize) fig.suptitle('prediction/actual/loss/probability', weight='bold', size=14) for i,idx in enumerate(tl_idx): audio, cl = self.data.dl(self.ds_type).dataset[idx] audio = audio.clone() m = self.learn.model.eval() x, _ = self.data.one_item(audio) # Process one audio into prediction x_consolidated = x.sum(dim=1, keepdim=True) # Sum accross all channels to ease the interpretation im = Image(x_consolidated[0, :, :, :].cpu()) # Extract the processed image from the prediction (after dl_tfms) and keep it into CPU cl = int(cl) title = f'{classes[self.pred_class[idx]]}/{classes[cl]} / {self.losses[idx]:.2f} / {self.probs[idx][cl]:.2f}' title = title + f'\n {audio.fn}' im.show(ax=axes.flat[i], title=title) if heatmap: # Related paper http://openaccess.thecvf.com/content_ICCV_2017/papers/Selvaraju_Grad-CAM_Visual_Explanations_ICCV_2017_paper.pdf with hook_output(m[0]) as hook_a: # hook activations from CNN module with hook_output(m[0], grad= True) as hook_g: # hook gradients from CNN module preds = m(x) # Forward pass to get activations preds[0,cl].backward() # Backward pass to get gradients acts = hook_a.stored[0].cpu() if (acts.shape[-1]*acts.shape[-2]) >= heatmap_thresh: grad = hook_g.stored[0][0].cpu() # Hook the gradients from the CNN module and extract the first one (because one item only) grad_chan = grad.mean(1).mean(1) # Mean accross image to keep mean gradients per channel mult = F.relu(((acts*grad_chan[...,None,None])).sum(0)) # Multiply activation with gradients (add 1 dim for height and width) sz = list(im.shape[-2:]) axes.flat[i].imshow(mult, alpha=0.35, extent=(0,*sz[::-1],0), interpolation='bilinear', cmap='magma') if ifnone(return_fig, defaults.return_fig): return fig
def __init__(self, ni: int, nf: int = None, scale: int = 2, blur: bool = False, norm_type=NormType.Weight): super().__init__() nf = ifnone(nf, ni) self.conv = conv_layer(ni, nf * (scale**2), ks=1, norm_type=norm_type, use_activ=False) icnr(self.conv[0].weight) self.shuf = nn.PixelShuffle(scale) # Blurring over (h*w) kernel # "Super-Resolution using Convolutional Neural Networks without Any Checkerboard Artifacts" # - https://arxiv.org/abs/1806.02658 self.pad = nn.ReplicationPad2d((1, 0, 1, 0)) self.blur = nn.AvgPool2d(2, stride=1) self.relu = nn.ReLU(inplace=True)
def plot_confusion_matrix_thresh(self, normalize: bool = False, title: str = 'Confusion matrix', cmap: Any = "Blues", slice_size: int = 1, thresh: float = 0.0, norm_dec: int = 2, plot_txt: bool = True, return_fig: bool = None, **kwargs) -> Optional[plt.Figure]: "Plot the confusion matrix, with `title` and using `cmap`." # This function is mainly copied from the sklearn docs if thresh == 0: cm = self.confusion_matrix(slice_size=slice_size) else: cm = threshold_confusion_matrix(self, thresh=thresh) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] fig = plt.figure(**kwargs) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) tick_marks = np.arange(self.data.c) plt.xticks(tick_marks, self.data.y.classes, rotation=90) plt.yticks(tick_marks, self.data.y.classes, rotation=0) if plot_txt: thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): coeff = f'{cm[i, j]:.{norm_dec}f}' if normalize else f'{cm[i, j]}' plt.text(j, i, coeff, horizontalalignment="center", verticalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.ylim(-0.5, self.data.c - 0.5) plt.tight_layout() plt.ylabel('Actual') plt.xlabel('Predicted') plt.grid(False) if ifnone(return_fig, defaults.return_fig): return fig
def loss_batch( model: nn.Module, xb: Tensor, yb: Tensor, loss_func: OptLossFunc = None, opt: OptOptimizer = None, cb_handler: Optional[CallbackHandler] = None, ) -> Tuple[Union[Tensor, int, float, str]]: "Calculate loss and metrics for a batch, call out to callbacks as necessary." cb_handler = ifnone(cb_handler, CallbackHandler()) if not is_listy(xb): xb = [xb] if not is_listy(yb): yb = [yb] out = [model(x) for x in xb] out = cb_handler.on_loss_begin(out) if not loss_func: return to_detach(out), yb[0].detach() loss = loss_func(out) if opt is not None: loss = cb_handler.on_backward_begin(loss) # fastai v1.0.52 introduced the possibility for the backwards step to # be optional by returning a tuple here # see https://github.com/fastai/fastai/commit/6fcaad870e0e833d325052b57e72e23a450ebc6f#diff-0730afdfa67f9712e46ad7866b0123f8L32 if type(loss) == tuple: loss, skip_bwd = loss if not skip_bwd: loss.backward() else: loss.backward() cb_handler.on_backward_end() opt.step() cb_handler.on_step_end() opt.zero_grad() return loss.detach().cpu()
def plot2(self, skip_start: int = 10, skip_end: int = 5, suggestion: bool = True, return_fig: bool = None, win=3, **kwargs) -> Optional[plt.Figure]: "Plot learning rate and losses, trimmed between `skip_start` and `skip_end`. Optionally plot and return min gradient" lrs = self._split_list(self.lrs, skip_start, skip_end) losses = self._split_list(self.losses, skip_start, skip_end) losses = [x.item() for x in losses] all_losses = [losses] #if 'k' in kwargs: losses = self.smoothen_by_spline(lrs, losses, **kwargs) fig, ax = plt.subplots(1, 1) ax.plot(lrs, losses) if win is not None: losses2 = my_smooth(losses, w=win) all_losses.append(losses2) ax.plot(lrs, losses2, 'g', lw=0.5) ax.set_ylabel("Loss") ax.set_xlabel("Learning Rate") ax.set_xscale('log') ax.xaxis.set_major_formatter(plt.FormatStrFormatter('%.0e')) if suggestion: for i, l in enumerate(all_losses): tag = '' if i == 0 else ' (smoothed)' try: mg = (np.gradient(np.array(l))).argmin() except: print( f"Failed to compute the gradients{tag}, there might not be enough points." ) return print(f"Min numerical gradient: {lrs[mg]:.2E} {tag}") color = 'r' if i == 0 else 'g' ax.plot(lrs[mg], losses[mg], markersize=10, marker='o', color=color) if i == 0: self.min_grad_lr = lrs[mg] ml = np.argmin(l) ax.plot(lrs[ml], losses[ml], markersize=8, marker='o', color='k') print(f"Min loss divided by 10: {lrs[ml]/10:.2E}") ax.plot([lrs[ml] / 10, lrs[ml] / 10], [np.min(l), np.max(l)], 'k--', alpha=0.5) #print(np.min(l), np.max(l)) elif i == 1: self.min_grad_lr_smoothed = lrs[mg] if ifnone(return_fig, defaults.return_fig): return fig try: if not IN_NOTEBOOK: plot_sixel(fig) except: pass
def on_train_begin(self, pbar, metrics_names, **kwargs): self.pbar = pbar metrics_names = ifnone(metrics_names, []) metrics_names.append('val_loss') return {'metrics_names': metrics_names}
def __init__(self, f_in=None, f_out=None, f_batch_in=None, f_batch_out=None): self.f_in = ifnone(f_in, lambda x: (torch.Tensor(x), None)) self.f_out = ifnone(f_out, lambda x, y: x.numpy()) self.f_batch_in = ifnone(f_batch_in, lambda x: (x, None)) self.f_batch_out = ifnone(f_batch_out, lambda x, y: x)
def from_file(cls, fpath, key=None, idxs=None, **kwargs): fpath = Path(fpath) file = h5py.File(fpath.name, 'r') key = ifnone(key, list(file.keys())[0]) items = ifnone(idxs, list(range(len(file[key])))) return cls(items, path=fpath.parent, file=file, key=key, **kwargs)