def manual_check(self): """ print examples from a toy batch to file. :return: """ self.ra_ext = utils.import_module("ra_ext", 'custom_extensions/roi_align/roi_align.py') # actual mrcnn mask input from datasets.toy import configs cf = configs.Configs() cf.exp_dir = "datasets/toy/experiments/dev/" cf.plot_dir = cf.exp_dir + "plots" os.makedirs(cf.exp_dir, exist_ok=True) cf.fold = 0 cf.n_workers = 1 logger = utils.get_logger(cf.exp_dir) data_loader = utils.import_module('data_loader', os.path.join("datasets", "toy", 'data_loader.py')) batch_gen = data_loader.get_train_generators(cf, logger=logger) batch = next(batch_gen['train']) roi_mask = np.zeros((1, 320, 200)) bb_target = (np.array([50, 40, 90, 120])).astype("int") roi_mask[:, bb_target[0]+1:bb_target[2]+1, bb_target[1]+1:bb_target[3]+1] = 1. #batch = {"roi_masks": np.array([np.array([roi_mask, roi_mask]), np.array([roi_mask])]), "bb_target": [[bb_target, bb_target + 25], [bb_target-20]]} #batch_boxes_cor = [torch.tensor(batch_el_boxes).cuda().float() for batch_el_boxes in batch_cor["bb_target"]] batch_boxes = [torch.tensor(batch_el_boxes).cuda().float() for batch_el_boxes in batch["bb_target"]] #import IPython; IPython.embed() for b in range(len(batch_boxes)): roi_masks = batch["roi_masks"][b] #roi_masks_cor = batch_cor["roi_masks"][b] if roi_masks.sum()>0: boxes = batch_boxes[b] roi_masks = torch.tensor(roi_masks).cuda().type(dtype=torch.float32) box_ids = torch.arange(roi_masks.shape[0]).cuda().unsqueeze(1).type(dtype=torch.float32) masks = tv.ops.roi_align(roi_masks, [boxes], cf.mask_shape) masks = masks.squeeze(1) masks = torch.round(masks) masks_own = self.ra_ext.roi_align_2d(roi_masks, torch.cat((box_ids, boxes), dim=1), cf.mask_shape) boxes = boxes.type(torch.int) #print("check roi mask", roi_masks[0, 0, boxes[0][0]:boxes[0][2], boxes[0][1]:boxes[0][3]].sum(), (boxes[0][2]-boxes[0][0]) * (boxes[0][3]-boxes[0][1])) #print("batch masks", batch["roi_masks"]) masks_own = masks_own.squeeze(1) masks_own = torch.round(masks_own) #import IPython; IPython.embed() for mix, mask in enumerate(masks): fig = plg.plt.figure() ax = fig.add_subplot() ax.imshow(roi_masks[mix][0].cpu().numpy(), cmap="gray", vmin=0.) ax.axis("off") y1, x1, y2, x2 = boxes[mix] bbox = plg.mpatches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=0.9, edgecolor="c", facecolor='none') ax.add_patch(bbox) x1, y1, x2, y2 = boxes[mix] bbox = plg.mpatches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=0.9, edgecolor="r", facecolor='none') ax.add_patch(bbox) debug_dir = Path("/home/gregor/Documents/regrcnn/datasets/toy/experiments/debugroial") os.makedirs(debug_dir, exist_ok=True) plg.plt.savefig(debug_dir/"mask_b{}_{}.png".format(b, mix)) plg.plt.imsave(debug_dir/"mask_b{}_{}_pooled_tv.png".format(b, mix), mask.cpu().numpy(), cmap="gray", vmin=0.) plg.plt.imsave(debug_dir/"mask_b{}_{}_pooled_own.png".format(b, mix), masks_own[mix].cpu().numpy(), cmap="gray", vmin=0.) return
def main(): args = parser.parse_args() assert args.ext_len >= 0, 'extended context length must be non-negative' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Get logger logging = get_logger(os.path.join(args.work_dir, 'eval-log.txt'), log_=not args.no_log) # Load dataset corpus = get_lm_corpus(args.data, args.dataset, use_bpe=args.bpe) ntokens = len(corpus.vocab) # Load the best saved model. with open(os.path.join(args.work_dir, 'model-best.pt'), 'rb') as f: model = torch.load(f) model_tokens = model.n_token if hasattr( model, 'n_token') else model.module.n_token assert model_tokens == ntokens, 'vocab size mismatch, did you mean `--bpe`?' model = model.to(device) logging( 'Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'. format(args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) if hasattr(model, 'reset_length'): model.reset_length(args.tgt_len, args.ext_len, args.mem_len) else: model.module.reset_length(args.tgt_len, args.ext_len, args.mem_len) if args.clamp_len > 0: model.clamp_len = args.clamp_len if args.same_length: model.same_length = True log_str = '' # Run on test data. for split in ('valid', 'test'): if args.split in (split, 'all'): it = corpus.get_iterator(split, args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) log_str += format_log(args, *evaluate(model, it, split), split) logging('=' * 100) logging(log_str)
copied_files = os.listdir(target_dir) t = utils.get_formatted_duration(time.time() - start_time) logger.info( "\ncopying and unpacking data set finished using {} threads.\n{} files in target dir: {}. Took {}\n" .format(n_threads, len(copied_files), target_dir, t)) if __name__ == "__main__": total_stime = time.time() cf_file = utils.import_module("cf", "configs.py") cf = cf_file.configs() cf.created_fold_id_pickle = False cf.exp_dir = "dev/" cf.plot_dir = cf.exp_dir + "plots" os.makedirs(cf.exp_dir, exist_ok=True) cf.fold = 0 logger = utils.get_logger(cf.exp_dir) #batch_gen = get_train_generators(cf, logger) #train_batch = next(batch_gen["train"]) test_gen = get_test_generator(cf, logger) test_batch = next(test_gen["test"]) mins, secs = divmod((time.time() - total_stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t))
type=str, required=True, help='path to the work_dir') parser.add_argument('--no_log', action='store_true', help='do not log the eval result') parser.add_argument('--same_length', action='store_true', help='set same length attention with masking') args = parser.parse_args() assert args.ext_len >= 0, 'extended context length must be non-negative' device = torch.device("cuda" if args.cuda else "cpu") # Get logger logging = get_logger(os.path.join(args.work_dir, 'log.txt'), log_=not args.no_log) # Load dataset corpus = get_lm_corpus(args.data, args.dataset) ntokens = len(corpus.vocab) va_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) te_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, args.use_stored_settings) cf.slurm_job_id = args.slurm_job_id model = utils.import_module('model', cf.model_path) data_loader = utils.import_module( 'dl', os.path.join(args.exp_source, 'data_loader.py')) if folds is None: folds = range(cf.n_cv_splits) for fold in folds: cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold)) cf.fold = fold cf.resume_to_checkpoint = resume_to_checkpoint if not os.path.exists(cf.fold_dir): os.mkdir(cf.fold_dir) logger = utils.get_logger(cf.fold_dir) train(logger) cf.resume_to_checkpoint = None if args.mode == 'train_test': test(logger) elif args.mode == 'test': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, is_training=False, use_stored_settings=True) cf.slurm_job_id = args.slurm_job_id model = utils.import_module('model', cf.model_path) data_loader = utils.import_module(
action='store_true', help="development mode: shorten everything") args = parser.parse_args() args.dataset_name = os.path.join( "datasets", args.dataset_name ) if not "datasets" in args.dataset_name else args.dataset_name folds = args.folds resume = None if args.resume in ['None', 'none'] else args.resume if args.mode == 'create_exp': cf = utils.prep_exp(args.dataset_name, args.exp_dir, args.server_env, use_stored_settings=False) logger = utils.get_logger(cf.exp_dir, cf.server_env, -1) logger.info('created experiment directory at {}'.format(args.exp_dir)) elif args.mode == 'train' or args.mode == 'train_test': cf = utils.prep_exp(args.dataset_name, args.exp_dir, args.server_env, args.use_stored_settings) if args.dev: folds = [0, 1] cf.batch_size, cf.num_epochs, cf.min_save_thresh, cf.save_n_models = 3 if cf.dim == 2 else 1, 2, 0, 2 cf.num_train_batches, cf.num_val_batches, cf.max_val_patients = 5, 1, 1 cf.test_n_epochs, cf.max_test_patients = cf.save_n_models, 2 torch.backends.cudnn.benchmark = cf.dim == 3 else: torch.backends.cudnn.benchmark = cf.cuda_benchmark if args.data_dest is not None: cf.data_dest = args.data_dest
def main(): parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model') parser.add_argument('--data', type=str, default='../data/wikitext-103', help='location of the data corpus') parser.add_argument('--dataset', type=str, default='wt103', choices=['wt103', 'lm1b', 'enwik8', 'text8', 'wt2', 'wiki'], help='dataset name') parser.add_argument('--split', type=str, default='all', choices=['all', 'valid', 'test'], help='which split to evaluate') parser.add_argument('--batch_size', type=int, default=10, help='batch size') parser.add_argument('--tgt_len', type=int, default=5, help='number of tokens to predict') parser.add_argument('--ext_len', type=int, default=0, help='length of the extended context') parser.add_argument('--mem_len', type=int, default=0, help='length of the retained previous heads') parser.add_argument('--clamp_len', type=int, default=-1, help='max positional embedding index') parser.add_argument('--work_dir', type=str, required=True, help='path to the work_dir') parser.add_argument('--no_log', action='store_true', help='do not log the eval result') parser.add_argument('--same_length', action='store_true', help='set same length attention with masking') parser.add_argument('--bpe', action='store_true', default=False, help='Use BPE instead of traditional vocabulary.') args = parser.parse_args() assert args.ext_len >= 0, 'extended context length must be non-negative' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Get logger logging = get_logger(os.path.join(args.work_dir, 'eval-log.txt'), log_=not args.no_log) # Load dataset corpus = get_lm_corpus(args.data, args.dataset, use_bpe=args.bpe) ntokens = len(corpus.vocab) # Load the best saved model. with open(os.path.join(args.work_dir, 'model-best.pt'), 'rb') as f: model = torch.load(f) model_tokens = model.n_token if hasattr(model, 'n_token') else model.module.n_token assert model_tokens == ntokens, 'vocab size mismatch, did you mean `--bpe`?' model = model.to(device) logging('Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.format( args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) if hasattr(model, 'reset_length'): model.reset_length(args.tgt_len, args.ext_len, args.mem_len) else: model.module.reset_length(args.tgt_len, args.ext_len, args.mem_len) if args.clamp_len > 0: model.clamp_len = args.clamp_len if args.same_length: model.same_length = True # Run on test data. for split in ('valid', 'test'): if args.split in (split, 'all'): it = corpus.get_iterator(split, args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) logging(format_log(args, *evaluate(model, it, split), split))
if args.large_model_support is True: torch.cuda.set_enabled_lms(True) if args.mode == 'train' or args.mode == 'train_test': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, args.use_stored_settings) if args.dev: folds = [0, 1] cf.batch_size, cf.num_epochs, cf.min_save_thresh, cf.save_n_models = 3 if cf.dim == 2 else 1, 1, 0, 2 cf.num_train_batches, cf.num_val_batches, cf.max_val_patients = 5, 1, 1 cf.test_n_epochs = cf.save_n_models cf.max_test_patients = 2 cf.data_dest = args.data_dest logger = utils.get_logger(cf.exp_dir, cf.server_env) logger.info("cudnn benchmark: {}, deterministic: {}.".format( torch.backends.cudnn.benchmark, torch.backends.cudnn.deterministic)) logger.info("sending tensors to CUDA device: {}.".format( torch.cuda.get_device_name(args.cuda_device))) data_loader = utils.import_module( 'dl', os.path.join(args.exp_source, 'data_loader.py')) model = utils.import_module('mdt_model', cf.model_path) logger.info("loaded model from {}".format(cf.model_path)) if folds is None: folds = range(cf.n_cv_splits) # MLFLow new experiment try: if args.mlflow_artifacts_uri is not None:
def test(self): print("Testing multithreaded iterator.") dataset = "toy" exp_dir = Path("datasets/{}/experiments/dev".format(dataset)) cf_file = utils.import_module("cf_file", exp_dir / "configs.py") cf = cf_file.Configs() dloader = utils.import_module( 'data_loader', 'datasets/{}/data_loader.py'.format(dataset)) cf.exp_dir = Path(exp_dir) cf.n_workers = 5 cf.batch_size = 3 cf.fold = 0 cf.plot_dir = cf.exp_dir / "plots" logger = utils.get_logger(cf.exp_dir, cf.server_env, cf.sysmetrics_interval) cf.num_val_batches = "all" cf.val_mode = "val_sampling" cf.n_workers = 8 batch_gens = dloader.get_train_generators(cf, logger, data_statistics=False) val_loader = batch_gens["val_sampling"] for epoch in range(4): produced_ids = [] for i in range(batch_gens['n_val']): batch = next(val_loader) produced_ids.append(batch["pid"]) uni, cts = np.unique(np.concatenate(produced_ids), return_counts=True) assert np.all( cts < 3 ), "with batch size one: every item should occur exactly once.\n uni {}, cts {}".format( uni[cts > 2], cts[cts > 2]) #assert len(np.setdiff1d(val_loader.generator.dataset_pids, uni))==0, "not all val pids were shown." assert len(np.setdiff1d(uni, val_loader.generator.dataset_pids) ) == 0, "pids shown that are not val set. impossible?" cf.n_workers = os.cpu_count() cf.batch_size = int( val_loader.generator.dataset_length / cf.n_workers) + 2 val_loader = dloader.create_data_gen_pipeline( cf, val_loader.generator._data, do_aug=False, sample_pids_w_replace=False, max_batches=None, raise_stop_iteration=True) for epoch in range(2): produced_ids = [] for b, batch in enumerate(val_loader): produced_ids.append(batch["pid"]) uni, cts = np.unique(np.concatenate(produced_ids), return_counts=True) assert np.all( cts == 1 ), "with batch size one: every item should occur exactly once.\n uni {}, cts {}".format( uni[cts > 1], cts[cts > 1]) assert len(np.setdiff1d(val_loader.generator.dataset_pids, uni)) == 0, "not all val pids were shown." assert len(np.setdiff1d(uni, val_loader.generator.dataset_pids) ) == 0, "pids shown that are not val set. impossible?" pass
with open(os.path.join(fold_dir, 'file_list.txt'), 'w') as handle: for pid in pids: handle.write('{}.npy\n'.format(pid)) subprocess.call('rsync -av --files-from {} {} {}'.format( os.path.join(fold_dir, 'file_list.txt'), source_dir, target_dir), shell=True) # dutils.unpack_dataset(target_dir) copied_files = os.listdir(target_dir) logger.info( "copying and unpacking data set finsihed : {} files in target dir: {}. took {} sec" .format(len(copied_files), target_dir, np.round(time.time() - start_time, 0))) if __name__ == "__main__": import utils.exp_utils as utils from .configs import Configs total_stime = time.time() cf = Configs() logger = utils.get_logger(0) batch_gen = get_train_generators(cf, logger) train_batch = next(batch_gen["train"]) mins, secs = divmod((time.time() - total_stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t))
help='use CUDA') parser.add_argument('--work_dir', type=str, required=True, help='path to the work_dir') parser.add_argument('--out_dir', type=str, required=True, help='path to the out_dir') parser.add_argument('--no_log', action='store_true', help='do not log the eval result') parser.add_argument('--same_length', action='store_true', help='set same length attention with masking') args = parser.parse_args() assert args.ext_len >= 0, 'extended context length must be non-negative' device = torch.device("cuda" if args.cuda else "cpu") # Get logger logging = get_logger(os.path.join(args.out_dir, 'log_' + str(args.mem_len) + '.txt'), log_=not args.no_log) # Load dataset corpus = get_lm_corpus(args.data, args.dataset) ntokens = len(corpus.vocab) va_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) te_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) # Load the best saved model. with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f: model = torch.load(f) model.backward_compatible() model = model.to(device)
cf = utils.prep_exp(args.exp_dir, is_training=True) cf.resume_to_checkpoint = resume_to_checkpoint #default:None model = utils.import_module('model', cf.model_path) data_loader = utils.import_module( 'dl', os.path.join(args.exp_source, 'data_loader.py')) for fold in folds: cf.fold_dir = os.path.join( cf.exp_dir, 'fold_{}'.format(fold)) #path to save results cf.fold = fold if not os.path.exists(cf.fold_dir): os.mkdir(cf.fold_dir) logger = utils.get_logger(cf.fold_dir) #loginfo for this fold train(logger) cf.resume_to_checkpoint = None if args.mode == 'train_test': test(logger) for hdlr in logger.handlers: hdlr.close() logger.handlers = [] elif args.mode == 'test': cf = utils.prep_exp(args.exp_dir, is_training=False)
# dutils.unpack_dataset(target_dir) copied_files = os.listdir(target_dir) logger.info( "copying data set finished : {} files in target dir: {}. took {} sec". format(len(copied_files), target_dir, np.round(time.time() - start_time, 0))) if __name__ == "__main__": import utils.exp_utils as utils total_stime = time.time() cf_file = utils.import_module("cf", "configs.py") cf = cf_file.configs() logger = utils.get_logger("dev") batch_gen = get_train_generators(cf, logger) train_batch = next(batch_gen["train"]) pids = [] total = 100 for i in range(total): print("\r producing batch {}/{}.".format(i, total), end="", flush=True) train_batch = next(batch_gen["train"]) pids.append(train_batch["pid"]) print() mins, secs = divmod((time.time() - total_stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t))
start_time = time.time() with open(os.path.join(fold_dir, 'file_list.txt'), 'w') as handle: for pid in pids: handle.write('{}.npy\n'.format(pid)) subprocess.call('rsync -av --files-from {} {} {}'.format(os.path.join(fold_dir, 'file_list.txt'), source_dir, target_dir), shell=True) # dutils.unpack_dataset(target_dir) copied_files = os.listdir(target_dir) logger.info("copying and unpacking data set finsihed : {} files in target dir: {}. took {} sec".format( len(copied_files), target_dir, np.round(time.time() - start_time, 0))) if __name__=="__main__": import utils.exp_utils as utils cf_file = utils.import_module("cf", "configs.py") total_stime = time.time() cf = cf_file.configs(server_env=False) cf.server_env = False logger = utils.get_logger(".") batch_gen = get_train_generators(cf, logger) train_batch = next(batch_gen["train"]) mins, secs = divmod((time.time() - total_stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t))