def main( model_path=None, nms_threshold_rel=0.25, cuda_id=0, augment=True, #root_dir = Path.home() / 'workspace/WormData/screenings/CeNDR/MaskedVideos', data_root_dir=Path.home() / 'workspace/WormData/screenings/pesticides_adam/Syngenta/MaskedVideos/', save_dir_root=Path.home() / 'workspace/WormData/egg_laying/plates/predictions/syngenta', only_full_eggs=False): if model_path is None: bn = 'worm-eggs-adam-masks+Feggs+roi128+hard-neg-5_clf+unet-simple_maxlikelihood_20190808_151948_adam_lr0.000128_wd0.0_batch64' model_path = Path().home( ) / 'workspace/localization/results/locmax_detection/eggs/' / bn.partition( '+F')[0] / bn / 'model_best.pth.tar' else: model_path = Path(model_path) assert model_path.exists() bn = model_path.parent.name model_args = dict(nms_threshold_abs=0., nms_threshold_rel=nms_threshold_rel, pad_mode='reflect') device = get_device(cuda_id) model, epoch = load_model(model_path, **model_args) model = model.to(device) save_subdir = f'AUG_{bn}' if augment else bn save_dir = Path(save_dir_root) / save_subdir save_dir.mkdir(parents=True, exist_ok=True) mask_files = [ x for x in Path(data_root_dir).rglob('*.hdf5') if not x.name.startswith('.') ] feats_files = [ Path( str(x).replace('/MaskedVideos/', '/Results/')[:-5] + '_featuresN.hdf5') for x in mask_files ] files2check = [d for d in zip(mask_files, feats_files) if d[1].exists()] random.shuffle(files2check) #mask_file = Path.home() / 'workspace/WormData/screenings/CeNDR/MaskedVideos/CeNDR_Set1_020617/N2_worms10_food1-10_Set3_Pos4_Ch3_02062017_123419.hdf5' for mask_file, feats_file in tqdm(files2check): extract_eggs_from_file(mask_file, feats_file, model, save_dir, augment=augment, only_full_eggs=only_full_eggs)
def model2gpus(model, cuda_id): if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) batch_size = max(1, torch.cuda.device_count()) device = get_device(cuda_id) model = model.to(device) return model, device, batch_size
def main( data_type='limphocytes-20x', flow_type='limphocytes', root_model_dir=Path.home() / 'workspace/localization/results/locmax_detection/limphocytes/20x/limphocytes-20x', checkpoint_name='model_best.pth.tar', max_dist=10, thresh2check=np.arange(0.05, 1, 0.05), cuda_id=0, ): device = get_device(cuda_id) data_args = data_types[data_type] root_data_dir = data_args['root_data_dir'] flow_args = flow_types[flow_type] root_model_dir = Path(root_model_dir) train_flow = CoordFlow(root_data_dir / 'train', **flow_args, is_preloaded=True) val_flow = CoordFlow(root_data_dir / 'validation', **flow_args, is_preloaded=True) model_paths = root_model_dir.rglob(checkpoint_name) model_paths = list(model_paths) #model_paths = [x for x in model_paths if 'maxlikelihood' in x.parent.name] results = {} for model_path in tqdm.tqdm(model_paths): res = get_scores_with_best_threshold(model_path, device, thresh2check, max_dist, train_flow, val_flow) bn = model_path.parent.name results[bn] = res save_name = root_model_dir / 'scores.p' with open(save_name, 'wb') as fid: pickle.dump(results, fid)
def train(data_type='woundhealing-v2-mix', flow_type=None, model_name='unet-simple', use_classifier=False, loss_type='l1smooth-G1.5', cuda_id=0, log_dir=None, batch_size=256, n_epochs=2000, save_frequency=200, num_workers=0, root_data_dir=None, optimizer_name='adam', lr_scheduler_name='', lr=1e-5, weight_decay=0.0, momentum=0.9, roi_size=64, is_preloaded=False, hard_mining_freq=None, model_path_init=None, train_samples_per_epoch=40960): data_args = data_types[data_type] dflt_root_data_dir = data_args['root_data_dir'] n_ch_in = data_args['n_ch_in'] n_ch_out = data_args['n_ch_out'] if flow_type is None: flow_type = data_args['dflt_flow_type'] flow_args = flow_types[flow_type] if log_dir is None: if 'log_prefix' in data_args: log_dir = LOG_DIR_DFLT / data_args['log_prefix'] / data_type else: log_dir = LOG_DIR_DFLT / data_type if root_data_dir is None: root_data_dir = dflt_root_data_dir root_data_dir = Path(root_data_dir) train_dir = root_data_dir / 'train' test_dir = root_data_dir / 'validation' if '-merged' in data_type: flow_func = CoordFlowMerged else: flow_func = CoordFlow print(flow_func) print(root_data_dir) train_flow = flow_func(train_dir, samples_per_epoch=train_samples_per_epoch, roi_size=roi_size, **flow_args, is_preloaded=is_preloaded) val_flow = flow_func(test_dir, roi_size=roi_size, **flow_args, is_preloaded=is_preloaded) model = get_model(model_name, n_ch_in, n_ch_out, loss_type) if model_path_init is not None: model_name += '-pretrained' state = torch.load(model_path_init, map_location='cpu') model.load_state_dict(state['state_dict']) device = get_device(cuda_id) optimizer = get_optimizer(optimizer_name, model, lr=lr, momentum=momentum, weight_decay=weight_decay) lr_scheduler = get_scheduler(lr_scheduler_name, optimizer) date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') hard_mining_str = '' if hard_mining_freq is None else f'+hard-neg-{hard_mining_freq}' lr_scheduler_name = '+' + lr_scheduler_name if lr_scheduler_name else '' save_prefix = f'{data_type}+F{flow_type}+roi{roi_size}{hard_mining_str}_{model_name}_{loss_type}_{date_str}' save_prefix = f'{save_prefix}_{optimizer_name}{lr_scheduler_name}_lr{lr}_wd{weight_decay}_batch{batch_size}' train_locmax(save_prefix, model, device, train_flow, val_flow, optimizer, lr_scheduler=lr_scheduler, log_dir=log_dir, batch_size=batch_size, num_workers=num_workers, hard_mining_freq=hard_mining_freq, n_epochs=n_epochs, save_frequency=save_frequency)
def main( data_type='v3', #'v1-0.5x', model_name='', loss_name='BCE', #MSEp2 cuda_id=0, batch_size=4, n_epochs=200, samples_per_epoch=2500, num_workers=1, save_frequency=200, lr=1e-5, weight_decay=0.0, optimizer_name='adam', lr_scheduler_name='', log_dir=None, warmup_epochs=0, warmup_factor=1 / 1000, resume_path=None): root_dir = Path.home() / 'workspace/WormData/egg_laying/single_worm' if log_dir is None: log_dir = root_dir / 'results' data_d = data_type flow_train_argkws = {} if 'mixed-setups' in data_type: flow_train_argkws = dict( snippet_size=7, max_offset=12, max_offset_per_frame=5, zoom_range=(0.9, 1.1), scale_int=(0, 255), erosion_kernel_size=5, int_aug_offset=(-0.1, 0.1), int_aug_expansion=(0.9, 1.1), convolve_egg_flag_kernel=[0.1, 0.4, 1., 0.4, 0.1]) elif data_type.endswith('+hard'): data_d = data_type[:-5] flow_train_argkws = dict( max_offset_per_frame=50, motion_blur_range=(5, 45), zoom_range=(0.9, 1.1), int_aug_offset=(-0.2, 0.2), int_aug_expansion=(0.75, 1.2), convolve_egg_flag_kernel=[0, 0.15, 1., 0.3, 0.15]) if data_d == 'v1-0.5x': data_dir = root_dir / 'data/v1_0.5x/' else: data_dir = root_dir / 'data' / data_d if not data_dir.exists(): raise ValueError( f'Not Implemented `{data_type}`. Directory `{data_dir}` does not exists.' ) mm = model_name.partition('+')[0] #model = get_model(mm, n_in = 1, n_out = 2) model = get_model(mm, n_in=1, n_out=1) if 'pretrained' in model_name: bn = 'worm-eggs-adam-masks+Feggs+roi128+hard-neg-5_clf+unet-simple_maxlikelihood_20190808_151948_adam_lr0.000128_wd0.0_batch64' pretrained_path = Path().home( ) / 'workspace/localization/results/locmax_detection/eggs/worm-eggs-adam-masks/' / bn / 'model_best.pth.tar' model = from_pretrained(model, pretrained_path) if 'frozen' in model_name: print('`mapping_network` frozen') for p in model.mapping_network.parameters(): p.requires_grad = False if resume_path is not None: state = torch.load(resume_path, map_location='cpu') model.load_state_dict(state['state_dict']) model_name += '+R' train_dir = data_dir / 'train' test_dir = data_dir / 'test' train_flow = SnippetsRandomFlow(train_dir, samples_per_epoch=samples_per_epoch, **flow_train_argkws) val_flow = SnippetsFullFlow(test_dir) if loss_name == 'BCE': _criterion = nn.BCEWithLogitsLoss() def criterion(prediction, target): return _criterion(prediction.squeeze(-1), target) elif loss_name == 'BCEp2': _criterion = nn.BCEWithLogitsLoss() pad = 2 def criterion(prediction, target): return _criterion(prediction[:, pad:-pad].squeeze(-1), target[:, pad:-pad]) elif loss_name == 'MSEp2': pad = 2 _criterion = nn.MSELoss() def criterion(prediction, target): return _criterion(prediction[:, pad:-pad].squeeze(-1), target[:, pad:-pad]) optimizer = get_optimizer(optimizer_name, model, lr, weight_decay, weigth_decay_no_bias=True) lr_scheduler = get_scheduler(lr_scheduler_name, optimizer) device = get_device(cuda_id) train_loader = DataLoader(train_flow, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_loader = DataLoader(val_flow, batch_size=1, shuffle=True, num_workers=num_workers) model = model.to(device) date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') save_prefix = f'WT+{data_type}_{model_name}_{loss_name}_{date_str}_{optimizer_name}-{lr_scheduler_name}_lr{lr}_wd{weight_decay}_batch{batch_size}' if warmup_epochs > 0: save_prefix += f'_warmup{warmup_epochs}' log_dir = log_dir / save_prefix logger = SummaryWriter(log_dir=str(log_dir)) best_loss = 1e8 pbar_epoch = tqdm.trange(n_epochs) if warmup_epochs > 0: warmup_iters = len(train_loader) * warmup_epochs - 1 warmup_lr_scheduler = get_warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for epoch in pbar_epoch: if epoch >= warmup_epochs: warmup_lr_scheduler = None train_one_epoch(save_prefix, model, criterion, optimizer, warmup_lr_scheduler, train_loader, device, epoch, logger) if lr_scheduler is not None and epoch >= warmup_epochs: lr_scheduler.step() val_loss = evaluate_one_epoch(save_prefix, model, criterion, val_loader, device, epoch, logger) desc = f'epoch {epoch} , val_loss={val_loss}' pbar_epoch.set_description(desc=desc, refresh=False) state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } is_best = val_loss < best_loss if is_best: best_loss = val_loss save_checkpoint(state, is_best, save_dir=str(log_dir)) if (epoch + 1) % save_frequency == 0: checkpoint_path = log_dir / f'checkpoint-{epoch}.pth.tar' torch.save(state, checkpoint_path)
def __getitem__(self, ind): t1, t2 = self.times2read[ind] (xl, xr), (yl, yr) = self.valid_size imgs = self.masks[t1:t2, xl:xr, yl:yr] X = torch.from_numpy(imgs) X = X.float() / 255 return t1, X def __len__(self): return len(self.times2read) if __name__ == '__main__': cuda_id = 0 batch_size = 100 device = get_device(cuda_id) bn2check = [ 'WT+v2_unet-v1+pretrained_20190819_170413_adam-_lr1e-05_wd0.0_batch4', 'WT+v2+hard-neg_unet-v1_20190823_153141_adam-_lr0.0001_wd0.0_batch4', 'WT+v2+hard-neg-2_unet-v3-bn_20190906_113242_adam-_lr0.001_wd0.0_batch4', 'WT+v2+hard-neg-2_unet-v3_20190907_135706_adam-_lr0.0001_wd0.0_batch4' ] epoch2check = 199 for bn in bn2check: #model_path = Path.home() / 'workspace/WormData/egg_laying/results/' / bn / 'model_best.pth.tar' model_path = Path.home( ) / 'workspace/WormData/egg_laying/results/' / bn / f'checkpoint-{epoch2check}.pth.tar' root_save_dir = Path.home(
def main(cuda_id=0, screen_type='Drug_Screening'): #where the masked files are located root_dir = Path.home( ) / 'workspace/WormData/screenings' / screen_type / 'MaskedVideos/' #bn = 'worm-eggs-adam+Feggsonly+roi96+hard-neg-5_unet-simple_maxlikelihood_20190717_224214_adam_lr0.000128_wd0.0_batch128' #nms_threshold_rel = 0.2 #bn = 'worm-eggs-adam+Feggs+roi128+hard-neg-5_clf+unet-simple_maxlikelihood_20190803_225943_adam_lr0.000128_wd0.0_batch64' bn = 'worm-eggs-adam-masks+Feggs+roi128+hard-neg-5_clf+unet-simple_maxlikelihood_20190808_151948_adam_lr0.000128_wd0.0_batch64' nms_threshold_rel = 0.25 model_path = Path().home( ) / 'workspace/localization/results/locmax_detection/eggs/' / bn.partition( '+F')[0] / bn / 'model_best.pth.tar' #where the predictions are going to be stored save_dir = Path.home( ) / 'workspace/localization/predictions/worm_eggs/' / screen_type / bn model_args = dict(nms_threshold_abs=0., nms_threshold_rel=nms_threshold_rel, pad_mode='reflect') device = get_device(cuda_id) model, epoch = load_model(model_path, **model_args) model = model.to(device) gen = DirTierpsyFlow(root_dir) loader = DataLoader(gen, batch_size=1, num_workers=4, collate_fn=collate_simple) save_dir = Path(save_dir) for batch in tqdm.tqdm(loader): for (imgs, mask_file, frames) in zip(*batch): preds_l = [] for frame_number, xin in zip(frames, imgs): with torch.no_grad(): xin = torch.from_numpy(xin[None]) xin = xin.to(device) predictions = model(xin) predictions = predictions[0] res = [ predictions[x].detach().cpu().numpy() for x in ['coordinates', 'scores_abs', 'scores_rel'] ] res = [x[:, None] if x.ndim == 1 else x for x in res] res = np.concatenate(res, axis=1) preds_l += [(frame_number, *cc) for cc in zip(*res.T)] preds_df = pd.DataFrame( preds_l, columns=['frame_number', 'x', 'y', 'score_abs', 'score_rel']) save_name = Path( str(mask_file).replace(str(root_dir), str(save_dir))) save_name = save_name.parent / (save_name.stem + '_eggs-preds.csv') save_name.parent.mkdir(exist_ok=True, parents=True) preds_df.to_csv(save_name, index=False)
def train(data_type='woundhealing-contour', flow_type=None, model_name='unet-flat-48', loss_type='BCE', cuda_id=0, log_dir=None, batch_size=56, n_epochs=2000, save_frequency=200, num_workers=0, root_data_dir=None, optimizer_name='adam', lr_scheduler_name='', lr=64e-5, weight_decay=0.0, momentum=0.9, roi_size=256, model_path_init=None, train_samples_per_epoch=16384, num_folds=5, val_fold_id=1, val_dist=5): data_args = data_types[data_type] dflt_root_data_dir = data_args['root_data_dir'] n_ch_in = data_args['n_ch_in'] n_ch_out = data_args['n_ch_out'] if flow_type is None: flow_type = data_args['dflt_flow_type'] flow_args = flow_types[flow_type] if log_dir is None: if 'log_prefix' in data_args: log_dir = LOG_DIR_DFLT / data_args['log_prefix'] / data_type else: log_dir = LOG_DIR_DFLT / data_type if root_data_dir is None: root_data_dir = dflt_root_data_dir root_data_dir = Path(root_data_dir) data_type += f'fold-{val_fold_id}-{num_folds}' train_fold_ids = [x + 1 for x in range(num_folds) if x + 1 != val_fold_id] train_flow = MasksFlow(root_data_dir, samples_per_epoch=train_samples_per_epoch, roi_size=roi_size, **flow_args, folds2include=train_fold_ids, num_folds=num_folds) val_flow = MasksFlow(root_data_dir, roi_size=roi_size, **flow_args, folds2include=val_fold_id, num_folds=num_folds) model = get_mapping_network(n_ch_in, n_ch_out, **model_types[model_name], output_activation='sigmoid') criterion = get_criterion(loss_type) if model_path_init is not None: model_name += '-pretrained' state = torch.load(model_path_init, map_location='cpu') model.load_state_dict(state['state_dict']) device = get_device(cuda_id) optimizer = get_optimizer(optimizer_name, model, lr=lr, momentum=momentum, weight_decay=weight_decay) lr_scheduler = get_scheduler(lr_scheduler_name, optimizer) date_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') lr_scheduler_name = '+' + lr_scheduler_name if lr_scheduler_name else '' save_prefix = f'{data_type}+F{flow_type}+roi{roi_size}_{model_name}_{loss_type}_{date_str}' save_prefix = f'{save_prefix}_{optimizer_name}{lr_scheduler_name}_lr{lr}_wd{weight_decay}_batch{batch_size}' train_mask(save_prefix, model, device, criterion, train_flow, val_flow, optimizer, log_dir, lr_scheduler, batch_size, n_epochs, num_workers)