def download_url(url, dest, overwrite=False, pbar=None, show_progress=True, chunk_size=1024 * 1024, timeout=4, retries=5): "Download `url` to `dest` unless it exists and not `overwrite`" if os.path.exists(dest) and not overwrite: return s = requests.Session() s.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries)) u = s.get(url, stream=True, timeout=timeout) try: file_size = int(u.headers["Content-Length"]) except: show_progress = False with open(dest, 'wb') as f: nbytes = 0 if show_progress: pbar = progress_bar(range(file_size), auto_update=False, leave=False, parent=pbar) try: for chunk in u.iter_content(chunk_size=chunk_size): nbytes += len(chunk) if show_progress: pbar.update(nbytes) f.write(chunk) except requests.exceptions.ConnectionError as e: fname = url.split('/')[-1] data_dir = dest.parent print(f'\n Download of {url} has failed after {retries} retries\n' f' Fix the download manually:\n' f'$ mkdir -p {data_dir}\n' f'$ cd {data_dir}\n' f'$ wget -c {url}\n' f'$ tar -zxvf {fname}\n' f' And re-run your code once the download is successful\n')
def predict_folds(self, fy: FoldYielder, pred_name: str = 'pred', callbacks: Optional[List[AbsCallback]] = None, verbose: bool = True, bs: Optional[int] = None) -> None: r''' Apply model to all dataaccessed by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and save predictions as new group in fold file Arguments: fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data pred_name: name of group to which to save predictions callbacks: list of any callbacks to use during evaluation verbose: whether to print average prediction timings bs: if not `None`, will run prediction in batches of specified size to save of memory ''' times = [] mb = master_bar(range(len(fy))) for fold_idx in mb: fold_tmr = timeit.default_timer() if not fy.test_time_aug: fold = fy.get_fold(fold_idx)['inputs'] pred = self.predict_array(fold, callbacks=callbacks, bs=bs) else: tmpPred = [] pb = progress_bar(range(fy.aug_mult), parent=mb) for aug in pb: fold = fy.get_test_fold(fold_idx, aug)['inputs'] tmpPred.append( self.predict_array(fold, callbacks=callbacks, bs=bs)) pred = np.mean(tmpPred, axis=0) times.append((timeit.default_timer() - fold_tmr) / len(fold)) if self.n_out > 1: fy.save_fold_pred(pred, fold_idx, pred_name=pred_name) else: fy.save_fold_pred(pred[:, 0], fold_idx, pred_name=pred_name) times = uncert_round(np.mean(times), np.std(times, ddof=1) / np.sqrt(len(times))) if verbose: print(f'Mean time per event = {times[0]}±{times[1]}')
def train(self, dataloader): self.net.train() train_loss, train_metrics = [], None if self.metrics: train_metrics = [[] for m in self.metrics] for batch in progress_bar(dataloader, parent=self.mb): X, y = batch.text, batch.label X, y = X.to(device), y.to(device) self.optimizer.zero_grad() output = self.net(X) loss = self.loss(output, y) loss.backward() self.optimizer.step() train_loss.append(loss.item()) comment = f'train_loss {np.mean(train_loss):.5f}' if self.metrics: for i, metric in enumerate(self.metrics): train_metrics[i].append(metric.call(output, y)) comment += f' train_{metric.name} {np.mean(train_metrics[i]):.5f}' self.mb.child.comment = comment return train_loss, train_metrics
def train(model, dataloader, optimizer): model.train() avg_loss = 0 for i, (xc, yc, xt, yt) in enumerate(progress_bar(dataloader, parent=args.mb)): xc, yc, xt, yt = xc.to(args.device), yc.to(args.device), xt.to( args.device), yt.to(args.device) optimizer.zero_grad() pred_dist = model(xc, yc, xt) loss = -pred_dist.log_prob(yt).sum(-1).mean() loss.backward() optimizer.step() avg_loss -= loss.item() * xc.size(0) return avg_loss / len(dataloader.dataset)
def save_tiles(tile_size, num_tiles=5, scale=4, threshold=180): print(f'\n\nsave {tile_size} tiles') hr_ROI = paired_001 / f'roi_hr_{tile_size}' lr_ROI = paired_001 / f'roi_lr_{tile_size}' lr_ROI_small = paired_001 / f'roi_lr_up_{tile_size}' for (id, depth), hr_fn in progress_bar(list(hr_file_map.items())): lr_fn = lr_file_map[(id, depth)] if id in train_ids: sub_dir = 'train' else: sub_dir = 'valid' base_name = f'{tile_size}_{id}_{depth}.tif' helpers.tif_to_tiles(lr_fn, hr_fn, base_name, hr_ROI / sub_dir, lr_ROI / sub_dir, lr_ROI_small / sub_dir, size=tile_size, num_tiles=num_tiles, scale=scale, threshold=threshold)
def get_features(self, layer_idx: int, ds_name: str, ds_type: str = 'test') -> torch.Tensor: print(f'getting features for {ds_name} {ds_type}..') batched_outputs = [] layer_dir = f'/mnt/disks/disk/{ds_name}_{ds_type}_layer_output/{layer_idx}' batched_output_files = os.listdir(layer_dir) for batch_output_files in progress_bar(batched_output_files): batch_filename = f'{layer_dir}/{batch_output_files}' with self.load_feature_file(batch_filename) as batched_output: if isinstance(self.layers[layer_idx], (Conv2d, BatchNorm2d, ReLU, MaxPool2d)): batched_output = self.get_mean_channels(batched_output) else: batched_output = torch.tensor(batched_output).to('cpu') batched_outputs.append(batched_output) return torch.cat(batched_outputs, out=torch.Tensor( len(batched_output_files) * len(batched_output), 64))
def predict_images(self, image_list, ds_kwargs={}, verbose=1, **kwargs): "Predict images in 'image_list' with kwargs and save to zarr" for f in progress_bar(image_list, leave=False): if verbose > 0: print(f'Predicting {f.name}') ds = TileDataset([f], stats=self.stats, return_index=True, **ds_kwargs) softmax, stdeviation, energy = self.predict(ds, **kwargs) # Save to zarr self.g_smx[f.name] = softmax if stdeviation is not None: self.g_std = self.root.require_group('std') self.g_std[f.name] = stdeviation if energy is not None: self.g_eng = self.root.require_group('energy') self.g_eng[f.name] = energy return self.g_smx, self.g_std, self.g_eng
def train_epoch(model, train_loader, criterion, optimizer, mb, cfg): model.train() avg_loss = 0. for images, labels in progress_bar(train_loader, parent=mb): images = Variable(images).to(device) labels = Variable(labels).to(device) preds = model(images.float()) if cfg.model.n_classes > 1: loss = criterion(preds, labels) else: loss = criterion(preds.view(labels.shape), labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() / len(train_loader) del images, labels; gc.collect() return model, avg_loss
def train(model, dataloader, optimizer): model.train() avg_loss = 0 for index, (I, _) in enumerate(progress_bar(dataloader, parent=args.mb)): I = I.to(args.device) optimizer.zero_grad() pred_dist = model(I) loss = -pred_dist.log_prob(channel_last((I))).sum(-1).mean() loss.backward() optimizer.step() avg_loss -= loss.item() * I.size(0) if index % 10 == 0: args.mb.child.comment = 'loss={:.3f}'.format(loss.item()) return avg_loss / len(dataloader.dataset)
def __init__( self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num=0, progressbar=True, ): if any(len(arg) != chains for arg in [seeds, start_points]): raise ValueError("Number of seeds and start_points must be %s." % chains) self._samplers = [ ProcessAdapter( draws, tune, step_method, chain + start_chain_num, seed, start ) for chain, seed, start in zip(range(chains), seeds, start_points) ] self._inactive = self._samplers.copy() self._finished = [] self._active = [] self._max_active = cores self._in_context = False self._start_chain_num = start_chain_num self._progress = None self._divergences = 0 self._total_draws = 0 self._desc = "Sampling {0._chains:d} chains, {0._divergences:,d} divergences" self._chains = chains self._progress = progress_bar( range(chains * (draws + tune)), display=progressbar, auto_update=False ) self._progress.comment = self._desc.format(self)
def train_epoch(model, train_loader, criterion, optimizer, mb, cfg): model.train() avg_loss = 0. for images, labels in progress_bar(train_loader, parent=mb): images = images.to(device) labels = labels.to(device) r = np.random.rand() if cfg.data.train.mixup and r < 0.5: images, labels = mixup(images, labels, 1.0) preds = model(images.float()) loss = criterion(preds.view(labels.shape), labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() / len(train_loader) del images, labels; gc.collect() return model, avg_loss
def train_one_epoch(model, optimizer, scheduler, data_loader, device, master_bar): model.train() for x, target in progress_bar(data_loader, parent=master_bar): x = [_x.to(device) for _x in x] target = [{k: v.to(device) for k, v in t.items()} for t in target] loss_dict = model(x, [t['boxes'] for t in target], [t['labels'] for t in target]) batch_loss = sum(loss_dict.values()) optimizer.zero_grad() batch_loss.backward() # Safeguard for Gradient explosion torch.nn.utils.clip_grad_norm_(model.parameters(), .25) optimizer.step() if isinstance(scheduler, torch.optim.lr_scheduler.OneCycleLR): scheduler.step() master_bar.child.comment = ' | '.join(f"{k}: {v.item():.4}" for k, v in loss_dict.items())
def get_idxs_and_dists(query_features, index_features, BS=32): import faiss flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() FEAT_DIM = index_features.shape[1] cpu_index = faiss.IndexFlatL2(FEAT_DIM) cpu_index.add(index_features) index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) out_dists = np.zeros((len(query_features), 100), dtype=np.float32) out_idxs = np.zeros((len(query_features), 100), dtype=np.int32) NUM_QUERY = len(query_features) for ind in progress_bar(range(0, len(query_features), BS)): fin = ind + BS if fin > NUM_QUERY: fin = NUM_QUERY q_descs = query_features[ind:fin] D, I = index.search(q_descs, 100) out_dists[ind:fin] = D out_idxs[ind:fin] = I return out_idxs, out_dists
def train(self, num_epochs = 30, lr = 1e-3): criterion = RMELoss().to(self.device) optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=500, factor=0.5, min_lr=1e-7, eps=1e-08) print("SingleLSTM Train") # Train the model for epoch in progress_bar(range(num_epochs)): self.model.train() outputs = self.model(self.trainX.to(self.device)) optimizer.zero_grad() # obtain the loss function loss = criterion(outputs, self.trainY.to(self.device)) valid = self.model(self.testX.to(self.device)) vali_loss = criterion(valid, self.testY.to(self.device)) scheduler.step(vali_loss) return loss.cpu().item(), vali_loss.cpu().item()
def zmuv(learn: Learner, tol: float = 1e-5, exp_mean: float = 0., exp_std: float = 1., orthonorm: bool = True, cond: callable = has_weight_or_bias, verbose: bool = False) -> Learner: print('ZMUV initialization...') xb, yb = next(iter(learn.data.train_dl)) if orthonorm: learn.model.apply(orthogonal_weights_init) mods = get_layers(learn.model, cond=cond) mean_act, std_act = [], [] from fastprogress import progress_bar from time import sleep pb = progress_bar(mods) for m in pb: sleep(0.01) if has_weight(m) or has_bias(m): pre_mean, pre_std, mean, std = zmuv_layer(learn.model, m, xb, tol=tol, exp_mean=exp_mean, exp_std=exp_std) if mean == 0 and std == 0: continue mean_act.append(mean) std_act.append(std) if verbose >= 2: print(m) print( ' pre-zmuv activations : mean = {:9.5f} std = {:9.5f}' .format(pre_mean, pre_std)) print( ' post-zmuv activations : mean = {:9.5f} std = {:9.5f}' .format(mean, std)) print('\noverall post-zmuv activations: mean = {:9.5f} std = {:9.5f}'. format(np.mean(mean_act), np.mean(std_act))) print('...ZMUV initialization complete\n') return learn
def predict_data(self, model, loader, device, batch_size, target_cols): """ Run prediction. Args: model (obj): model loader (arr): test data device (str): choice of gpu or cpu for running model batch_size (int): batch size target_cols (arr): target features Returns: arr: list of batches of dict of target features and their corresponding predictions """ data_list = [] for i, data in enumerate(progress_bar(loader)): data_list += self.predict_batch(model, data, device, target_cols) expected_length = model.pred_len * len(loader) * batch_size assert len( data_list ) == expected_length, f"len = {len(data_list)} expected = {expected_length}" return data_list
def load(self, name: str) -> None: r''' Load an instantiated :class:`~lumin.nn.ensemble.ensemble.Ensemble` with weights and :class:`~lumin.nn.models.model.Model` from save. Arguments; name: base name for saved objects Examples:: >>> ensemble.load('weights/ensemble') ''' with open(f'{name}_builder.pkl', 'rb') as fin: self.model_builder = pickle.load(fin) names = glob.glob(f'{name}_*.h5') self.models = [] for n in progress_bar(sorted(names)): m = Model(self.model_builder) m.load(n) self.models.append(m) self.size = len(self.models) self.n_out = self.models[0].get_out_size() with open(f'{name}_weights.pkl', 'rb') as fin: self.weights = pickle.load(fin) try: with open(f'{name}_input_pipe.pkl', 'rb') as fin: self.input_pipe = pickle.load(fin) except FileNotFoundError: pass try: with open(f'{name}_output_pipe.pkl', 'rb') as fin: self.output_pipe = pickle.load(fin) except FileNotFoundError: pass try: with open(f'{name}_feats.pkl', 'rb') as fin: self.feats = pickle.load(fin) except FileNotFoundError: pass
def train_epoch(model, train_loader, optimizer, criterion, master_bar, epoch=0, scheduler=None, device='cpu'): """Train a model for one epoch Args: model (torch.nn.Module): model to train train_loader (torch.utils.data.DataLoader): training dataloader optimizer (torch.optim.Optimizer): parameter optimizer criterion (torch.nn.Module): criterion object master_bar (fastprogress.MasterBar): master bar of training progress epoch (int): current epoch index scheduler (torch.optim._LRScheduler, optional): learning rate scheduler device (str): device hosting tensor data Returns: batch_loss (float): latch batch loss """ # Training model.train() loader_iter = iter(train_loader) train_loss = 0 for _ in progress_bar(range(len(train_loader)), parent=master_bar): x, target = next(loader_iter) target = target.type(torch.LongTensor).squeeze() # Work with tensors on GPU if device.startswith('cuda'): x, target = x.cuda(), target.cuda() batch_loss = train_batch(model, x, target, optimizer, criterion) train_loss += batch_loss if scheduler: scheduler.step() master_bar.child.comment = f"Batch loss: {batch_loss:.4}" train_loss /= len(train_loader) return train_loss
def _fit_epoch(self, freeze_until, mb): """ Fit a single epoch Args: freeze_until (str): last layer to freeze mb (fastprogress.master_bar): primary progress bar """ # self.model = freeze_bn(self.model.train()) self.train_loss = 0 self.model.train() pb = progress_bar(self.train_loader, parent=mb) for x, target in pb: x, target = self.to_cuda(x, target) self.example_ct += x.shape[0] # Forward batch_loss = self._get_loss(x, target) self.train_loss += batch_loss.item() # Backprop self._backprop_step(batch_loss) # Update LR self.scheduler.step() pb.comment = f"Training loss: {batch_loss.item():.4}" self.step += 1 # Report metrics every 20th batch if self.step % 5 == 0: # where the magic happens if self.wb: wandb.log({"epoch": self.epoch, "train_loss": batch_loss.item()}, step=self.example_ct) self.epoch += 1 # print(self.train_loss,len(self.train_loader),self.train_loss/len(self.train_loader)) self.train_loss /= len(self.train_loader) self.train_loss_recorder.append(self.train_loss)
def _check_val_set_fy(train_fy:FoldYielder, val_fy:FoldYielder, test_fy:Optional[FoldYielder]=None, n_folds:Optional[int]=None) -> None: '''Method to check validation set suitability by seeing whether random forests can predict whether events belong to one dataset or the other. Trainings are run once per fold and averaged.''' n = min(train_fy.n_folds, val_fy.n_folds) if test_fy is not None: n = min(n, test_fy.n_folds) if n_folds is not None: n = min(n, n_folds) train_feats = None samples = {'train': train_fy} if test_fy is None else {'train': train_fy, 'test': test_fy} for sample in samples: aucs = [] fi = pd.DataFrame() for fold_idx in progress_bar(range(n)): df_0 = samples[sample].get_df(pred_name='None', inc_inputs=True, deprocess=True, fold_idx=fold_idx, verbose=False, suppress_warn=True) df_1 = val_fy.get_df(pred_name='None', inc_inputs=True, deprocess=True, fold_idx=fold_idx, verbose=False, suppress_warn=True) df_0['gen_target'] = 0 df_1['gen_target'] = 1 df_0['gen_weight'] = 1/len(df_0) df_1['gen_weight'] = 1/len(df_1) df = df_0.append(df_1, ignore_index=True).sample(frac=1) df_trn, df_val = df[:len(df)//2], df[len(df)//2:] if train_feats is None: train_feats = [f for f in df_trn.columns if 'gen_' not in f] m = RandomForestClassifier(n_estimators=40, min_samples_leaf=25, n_jobs=-1) m.fit(X=df_trn[train_feats], y=df_trn['gen_target'], sample_weight=df_trn['gen_weight']) aucs.append(roc_auc_score(y_true=df_val['gen_target'], y_score=m.predict(df_val[train_feats]), sample_weight=df_val['gen_weight'])) fi = fi.append(get_rf_feat_importance(m, df_val[train_feats], df_val['gen_target'], df_val['gen_weight']), ignore_index=True) mean = uncert_round(np.mean(aucs), np.std(aucs, ddof=1)/np.sqrt(len(aucs))) print(f"\nAUC for {sample}-validation discrimination = {mean[0]}±{mean[1]}") print("Top 10 most important features are:") mean_fi = pd.DataFrame() mean_fi['Importance'] = fi['Importance'].groupby(fi['Feature']).mean() mean_fi['Uncertainty'] = fi['Importance'].groupby(fi['Feature']).std()/np.sqrt(n) mean_fi.sort_values(['Importance'], inplace=True, ascending=False) mean_fi.reset_index(inplace=True) print(mean_fi[:min(10, len(mean_fi))])
def gt_estimation(self, method='STAPLE', save_dir=None, filetype='.png', **kwargs): assert method in ['STAPLE', 'majority_voting'] res = [] refs = {} print(f'Starting ground truth estimation - {method}') for m, exps in progress_bar(self.masks.items()): masks = [_read_msk(self.mask_fn(exp, m)) for exp in exps] if method == 'STAPLE': ref = staple(masks, self.staple_fval, self.staple_thres) elif method == 'majority_voting': ref = m_voting(masks, self.mv_undec) refs[m] = ref #assert ref.mean() > 0, 'Please try again!' df_tmp = pd.DataFrame({ 'method': method, 'file': m, 'exp': exps, 'iou': [iou(ref, msk) for msk in masks] }) res.append(df_tmp) if save_dir: path = self.path / save_dir path.mkdir(exist_ok=True, parents=True) save_mask(ref, path / Path(m).stem, filetype) self.gt[method] = refs self.df_res = pd.concat(res) self.df_agg = self.df_res.groupby('exp').agg(average_iou=('iou', 'mean'), std_iou=('iou', 'std')) if save_dir: self.df_res.to_csv(path.parent / f'{method}_vs_experts.csv', index=False) self.df_agg.to_csv(path.parent / f'{method}_vs_experts_agg.csv', index=False)
def train_epoch(self, stats, epoch, mb, lrs): it = epoch * len(self.train_batches) self.model.train(True) for lr, batch in zip(lrs, progress_bar(self.train_batches, parent=mb)): input, target = self.get_batch(batch) output = self.model(input) metric = self.metric(output, target) loss = self.criterion(output, target) it += 1 stats["train_it"].append(it) stats["train_loss"].append( loss.item()) # loss.item() * inputs.size(0) stats["train_metric"].append(metric) graphs = [[stats["train_it"], stats["train_loss"]], [stats["train_it"], stats["train_metric"]], [stats["valid_it"], stats["valid_loss"]], [stats["valid_it"], stats["valid_metric"]]] mb.update_graph(graphs) self.backward(lr, loss) return stats
def generate_tifs(src, dest, learn, size, tag=None, max_imgs=None): for fn in progress_bar(src): category = fn.parts[-3] try: if fn.suffix == '.czi': czi_predict_images(learn, fn, dest, category, size=size, tag=tag, max_imgs=max_imgs) elif fn.suffix == '.tif': tif_predict_images(learn, fn, dest, category, size=size, tag=tag, max_imgs=max_imgs) except Exception as e: print(f'exception with {fn.stem}') print(e)
def parallel(f, items, *args, n_workers=defaults.cpus, total=None, progress=None, pause=0, timeout=None, chunksize=1, **kwargs): "Applies `func` in parallel to `items`, using `n_workers`" if progress is None: progress = progress_bar is not None with ProcessPoolExecutor(n_workers, pause=pause) as ex: r = ex.map(f, items, *args, timeout=timeout, chunksize=chunksize, **kwargs) if progress: if total is None: total = len(items) r = progress_bar(r, total=total, leave=False) return L(r)
def _fit_epoch(self, mb: ConsoleMasterBar) -> None: """Fit a single epoch Args: mb (fastprogress.master_bar): primary progress bar """ self.model = freeze_bn(self.model.train()) pb = progress_bar(self.train_loader, parent=mb) for x, target in pb: x, target = self.to_cuda(x, target) # Forward batch_loss = self._get_loss(x, target) # Backprop self._backprop_step(batch_loss) # Update LR self.scheduler.step() pb.comment = f"Training loss: {batch_loss.item():.4}" self.step += 1 self.epoch += 1
def validate(dataloader, model, loss_fn, device, master_bar): """Compute loss, accuracy and confusion matrix on validation set. Args: dataloader (DataLoader): Torch DataLoader object to load data model (nn.Module): Torch model to train loss_fn: Torch loss function device (torch.device): Torch device to use for training master_bar (fastprogress.master_bar): Will be iterated over to draw batches and show validation progress Returns: float, float, torch.Tensor shape (10,10): Mean loss on validation set, fraction of correct predictions on validation set (accuracy) """ epoch_loss = [] epoch_mse_loss, epoch_kld_loss = [], [] model.eval() with torch.no_grad(): for x, y in fastprogress.progress_bar(dataloader, parent=master_bar): # make a prediction on validation set y_hat, mu, logvar = model(x.to(device)) # Compute loss total_loss, mse_loss, kld_loss = loss_fn(y_hat, x.to(device), mu, logvar) # For plotting the test loss, save it for each sample epoch_loss.append(total_loss.item()) epoch_mse_loss.append(mse_loss.item()) epoch_kld_loss.append(kld_loss.item()) # Return the mean loss, the accuracy and the confusion matrix return np.mean(epoch_loss), np.mean(epoch_mse_loss), np.mean( epoch_kld_loss)
def create_ground_truth(train: pd.DataFrame): labels = np.zeros((len(train), 264), dtype=int) for i, row in progress_bar(train.iterrows(), total=len(train)): ebird_code = BIRD_CODE[row.ebird_code] labels[i, ebird_code] = 1 secondary_labels = eval(row.secondary_labels) for sl in secondary_labels: if NAME2CODE.get(sl) is not None: second_code = NAME2CODE[sl] labels[i, BIRD_CODE[second_code]] = 1 background = row["background"] if isinstance(background, str): academic_names = re.findall("\((.*>)\)", background) academic_names = list( filter(lambda x: x is not None, map(lambda x: SCINAME2CODE.get(x), academic_names))) for bl in academic_names: labels[i, BIRD_CODE[bl]] = 1 columns = list(BIRD_CODE.keys()) index = train["filename"].map(lambda x: x.replace(".mp3", ".wav")).values labels_df = pd.DataFrame(labels, index=index, columns=columns) return labels_df
def parallel(f, items, *args, n_workers=defaults.cpus, total=None, progress=None, pause=0, threadpool=False, timeout=None, chunksize=1, **kwargs): "Applies `func` in parallel to `items`, using `n_workers`" pool = ThreadPoolExecutor if threadpool else ProcessPoolExecutor with pool(n_workers, pause=pause) as ex: r = ex.map(f, items, *args, timeout=timeout, chunksize=chunksize, **kwargs) if progress and progress_bar: if total is None: total = len(items) r = progress_bar(r, total=total, leave=False) return L(r)
def epoch_train(self, model, train_iterator, optimizer, loss_criteria, device, mb): # Epoch training ''' Training rountine in each epoch Returns: train_loss ''' model.train() training_loss = 0 for batch, (images, labels, _) in enumerate(progress_bar(train_iterator, parent=mb)): # Move X, Y to device images = images.to(device) labels = labels.to(device) # Clear previous gradient optimizer.zero_grad() # Feed forward tge nidek pred = model(images) loss = loss_criteria(pred, labels) # Back probagation loss.backward() # Update parameters() optimizer.step() # Update training loss after each batch training_loss += loss.item() mb.child.comment = f'Training loss: {round(training_loss/(batch + 1), 5)}' del images, labels, loss return training_loss / len(train_iterator)
def save_tiles(tile_size, untiled_files, num_tiles=5, scale=4, threshold=100): for sub_dir in ['train', 'valid']: print(f'\n\nbuild tiles {sub_dir}/{tile_size}\n\n') hr_ROI = dpath / f'roi_hr_{tile_size}' / sub_dir lr_ROI = dpath / f'roi_lr_{tile_size}' / sub_dir lr_up_ROI = dpath / f'roi_lr_up_{tile_size}' / sub_dir #print('\n', hr_ROI, '\n', lr_ROI, '\n', lr_up_ROI) print('Creating ROIs with tile size ' + str(tile_size)) hrdir = hr_path / sub_dir lrdir = lr_path / sub_dir for hr_fn in progress_bar(list(hrdir.iterdir())): #print('Processing ' + hr_fn.name + ', tile_size is ' + str(tile_size) + '.') lr_fn = lrdir / hr_fn.name helpers.tif_to_tiles(lr_fn, hr_fn, hr_fn.stem, hr_ROI, lr_up_ROI, lr_ROI, size=tile_size, num_tiles=num_tiles, scale=scale, threshold=threshold, untiled_ls=untiled_files)