def train(dataset, features, reg_metric, algo='lightgbm', n_folds=5, config=None): models = [] folds = GroupKFold(n_splits=n_folds) groups = dataset['installation_id'] X = dataset[features].copy() y = dataset['accuracy_group'] oof = np.zeros(X.shape[0], dtype=np.float32) cv = OrderedDict() model_cls = get_model_class(algo) metric = getattr(reg_metric, algo) feat_imp = np.zeros(len(features), dtype=np.float32) for i, (trn_idx, val_idx) in enumerate(folds.split(X, y, groups), 1): U.log(f'Running k-fold {i} of {n_folds}') x_trn, y_trn = X.iloc[trn_idx], y.iloc[trn_idx] x_val, y_val = X.iloc[val_idx], y.iloc[val_idx] model = model_cls(config or get_default_config(algo)) model.fit(train_data=(x_trn, y_trn), valid_data=(x_val, y_val), metric=metric) oof[val_idx] = model.predict(x_val) cv[f'cv_cappa_{i}'] = np.mean(reg_metric(y_val, oof[val_idx])) models.append(model) feat_imp += model.feature_importances.values feat_imp /= n_folds feat_imp = pd.Series(OrderedDict(zip(features, feat_imp))) return U.named_tuple('Result', models=models, cv=cv, oof=oof, fi=feat_imp)
def get_model(C , lab_num): model = get_model_class(C , C.model)( out_d = lab_num , **C.__dict__ ).cuda() return model
def train(cfg): print(f"Trainig {cfg.run}") run_dir = join(config.get('RESULTS_DIR'), cfg.exp, cfg.run) cfg.save(run_dir) ModelClass = models.get_model_class(cfg.model) model = ModelClass(cfg) datasets_dir = config.get('DATASETS_DIR') trn_ctx, tst_ctx = build_subsets_contexts(datasets_dir, run_dir, cfg) trn_zip = utils.build_tzip(cfg.train_strategy) tst_zip = utils.build_tzip('longest') loss_fn, opt = build_loss_opt(cfg) weights_dir = join(run_dir, 'weights') for epoch in trange(cfg.train_epochs, desc=' epochs', ncols=TQDM_NCOLS): train_epoch(epoch, trn_zip, model, loss_fn, opt, cfg.opt_alphas, trn_ctx) if cfg.eval_tst_freq and ((epoch + 1) % cfg.eval_tst_freq == 0): test_epoch(epoch, tst_zip, model, trn_ctx, tst_ctx) if cfg.save_freq and ((epoch + 1) % cfg.save_freq == 0): model.save_weights(join(weights_dir, f'{epoch:03d}.ckpt'))
def train(cfg): print(f"Trainig {cfg.run}") run_dir = join(config.get('RESULTS_DIR'), cfg.exp, cfg.run) cfg.save(run_dir) extractor = build_extractor() ModelClass = models.get_model_class(cfg.model) model = ModelClass(cfg) datasets_dir = config.get('DATASETS_DIR') trn_ctx, tst_ctx = build_subsets_contexts(datasets_dir, run_dir, cfg) trn_zip = utils.build_tzip(cfg.train_strategy) tst_zip = utils.build_tzip('longest') loss_fn, opt = build_loss_opt(cfg) weights_dir = join(run_dir, 'weights') metrics = defaultdict(list) for epoch in trange(cfg.train_epochs, desc=' epochs', ncols=TQDM_NCOLS): train_epoch(epoch, trn_zip, extractor, model, loss_fn, opt, cfg.opt_alphas, trn_ctx) if cfg.eval_tst_freq and ((epoch + 1) % cfg.eval_tst_freq == 0): test_epoch(epoch, tst_zip, extractor, model, trn_ctx, tst_ctx, metrics) if cfg.save_freq and ((epoch + 1) % cfg.save_freq == 0): model.save_weights(join(weights_dir, f'{epoch:03d}.ckpt')) df = pd.DataFrame.from_records(metrics, index='epoch') df.to_csv(join(run_dir, 'metrics.csv'))
def load_model(checkpoint): hparams = get_hparams(checkpoint) model = get_model_class(hparams['model_type']).load_from_checkpoint( checkpoint, hparams=hparams) model.hparams.root_dir = repeat(lambda x: os.path.dirname(x), checkpoint, 4) return model
def main(config_file_path: str) -> None: """ Main function that runs OpenCMP. Args: config_file_path: Filename of the config file to load """ # Load the config file. config = ConfigParser(config_file_path) # Load run parameters from the config file. num_threads = config.get_item(['OTHER', 'num_threads'], int) msg_level = config.get_item(['OTHER', 'messaging_level'], int, quiet=True) model_name = config.get_item(['OTHER', 'model'], str) # Load error analysis parameters from the config file. check_error = config.get_item(['ERROR ANALYSIS', 'check_error'], bool) # Set parameters for ngsolve ngcore.SetNumThreads(num_threads) ngsglobals.msg_level = msg_level # Run the model. with ngcore.TaskManager(): model_class = get_model_class(model_name) solver_class = get_solver_class(config) solver = solver_class(model_class, config) sol = solver.solve() if check_error: calc_error(config, solver.model, sol) # Suppressing the warning about using the default value for convergence_test. convergence_test = config.get_dict(['ERROR ANALYSIS', 'convergence_test'], solver.t_param, quiet=True) for key, var_lst in convergence_test.items(): if key == 'h' and var_lst: for var in var_lst: h_convergence(config, solver, sol, var) elif key == 'p' and var_lst: for var in var_lst: p_convergence(config, solver, sol, var) save_output = config.get_item(['VISUALIZATION', 'save_to_file'], str, quiet=True) if save_output: save_type = config.get_item(['VISUALIZATION', 'save_type'], str, quiet=True) # Run the post-processor to convert the .sol to .vtu if save_type == '.vtu': print('Converting saved output to VTU.') # Path where output is stored output_dir_path = config.get_item(['OTHER', 'run_dir'], str) + '/output/' # Run the conversion sol_to_vtu(output_dir_path, config_file_path, solver.model) return
def load_model(run_dir, cfg, epoch): weights_dir = join(run_dir, 'weights') if epoch is not None: checkpoint = join(weights_dir, f'{epoch:03d}.ckpt') else: checkpoint = tf.train.latest_checkpoint(weights_dir) ModelClass = models.get_model_class(cfg.model) model = ModelClass(cfg) model([np.zeros((1, 16, 512), dtype=np.float32) for _ in cfg._dss]) model.load_weights(checkpoint) return model
def load_model(run_dir, cfg, epoch): model_class_name = get_class(run_dir) weights_dir = join(run_dir, 'weights') if epoch is not None: checkpoint = join(weights_dir, f'{epoch:03d}.ckpt') else: checkpoint = tf.train.latest_checkpoint(weights_dir) num_classes = 51 if cfg.ds == 'hmdb51' else 101 ModelClass = models.get_model_class(model_class_name) model = ModelClass(cfg, num_classes) model(np.zeros((1, 16, cfg.reps_size), dtype=np.float32)) model.load_weights(checkpoint) return model
def main(args): pl.seed_everything(args.seed) torch.multiprocessing.set_sharing_strategy('file_system') args.multigpu = torch.cuda.device_count() > 1 train_data = load_data(args.train, args.add_eos, args.cat_sent, args.max_len) valid_data = load_data(args.valid, args.add_eos, args.cat_sent, args.max_len) os.makedirs(args.root_dir, exist_ok=True) vocab_file = os.path.join(args.root_dir, 'vocab.txt') if not os.path.isfile(vocab_file): max_blank_len = args.max_len if args.model_type == 'lblm' else None Vocab.build(train_data, vocab_file, args.vocab_size, max_blank_len) vocab = Vocab(vocab_file) args.vocab_size = vocab.size train_dl = get_train_dataloader( train_data, vocab, args.max_tok, data_workers=args.data_workers if not args.multigpu else 0, model_type=args.model_type) val_dl = get_eval_dataloader( valid_data, vocab, args.eval_max_tok, data_workers=args.data_workers if not args.multigpu else 0, model_type=args.model_type) model = get_model_class(args.model_type)(args) trainer = pl.Trainer(accumulate_grad_batches=args.accum_grad, max_steps=args.max_steps, callbacks=[LearningRateMonitor()] if args.lr_schedule != 'fixed' else None, val_check_interval=args.val_check_interval if args.val_check_interval > 0 else 1.0, gpus=args.gpus, distributed_backend='ddp' if args.multigpu else None, amp_level=args.fp16_opt_level, precision=16 if args.fp16 else 32, default_root_dir=args.root_dir, resume_from_checkpoint=args.load_checkpoint) trainer.fit(model, train_dataloader=train_dl, val_dataloaders=val_dl)
def restore_model(config): model_name = config.get('DEFAULT', 'model') Model = models.get_model_class(model_name) model = Model(config) model.register_trainable() # Resume from checkpoint assert model.trainable_registered, ( "Register the trainable layers to have them restored from the " "checkpoint") ckpt = tf.train.Checkpoint(net=model) ckpt.restore(FLAGS.ckpt).expect_partial() return model
def train(self, dataset, features, fold, target='accuracy_group', grouping='installation_id', config=None): assert target not in features assert grouping in dataset or grouping is None groups = dataset[grouping] X = dataset[features] y = dataset[target] model_cls = get_model_class(self.algo) n_folds = fold.get_n_splits() models = [] feat_imp = np.zeros(len(features), dtype=np.float32) oof = np.zeros(X.shape[0], dtype=np.float32) cv = OrderedDict() for i, (trn_idx, val_idx) in enumerate(fold.split(X, y, groups), 1): U.log(f'Running k-fold {i} of {n_folds}') x_trn, y_trn = X.iloc[trn_idx], y.iloc[trn_idx] x_val, y_val = X.iloc[val_idx], y.iloc[val_idx] model = model_cls(config or get_default_config(self.algo)) model.fit(train_data=(x_trn, y_trn), valid_data=(x_val, y_val), metric=self.eval_metric) oof[val_idx] = model.predict(x_val) for name, metric in self.cv_metrics.items(): cv[f'cv_{name}_{i}'] = metric(y_val, oof[val_idx]) models.append(model) if model.has_feature_importance: feat_imp += model.feature_importances.values if cv: U.log('Fold evaluation results:') U.log(U.dict_format(cv)) feat_imp /= n_folds feat_imp = pd.Series(OrderedDict(zip(features, feat_imp))) return U.named_tuple('Result', models=models, cv=cv, oof=oof, fi=feat_imp)
def train(cfg): model_dir = join(config.get('RESULTS_DIR'), cfg.exp_name, cfg.run) print(f"Trainig {cfg.run}") cfg.save_params(model_dir) datasets_dir = config.get('DATASETS_DIR') trn_dl = build_dataloader(datasets_dir, cfg.ds, cfg.split, 'train', cfg.tbatch_size) etrn_dl = build_dataloader(datasets_dir, cfg.ds, cfg.split, 'train', cfg.ebatch_size) etst_dl = build_dataloader(datasets_dir, cfg.ds, cfg.split, 'test', cfg.ebatch_size) num_classes = 51 if cfg.ds == 'hmdb51' else 101 ModelClass = models.get_model_class(cfg.model) model = ModelClass(cfg, num_classes) loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.SGD(learning_rate=cfg.lr) trn_loss_epoch = tf.keras.metrics.SparseCategoricalCrossentropy() trn_acc_epoch = tf.keras.metrics.SparseCategoricalAccuracy() tst_loss_epoch = tf.keras.metrics.SparseCategoricalCrossentropy() tst_acc_epoch = tf.keras.metrics.SparseCategoricalAccuracy() trn_writer = tf.summary.create_file_writer(join(model_dir, 'trn')) tst_writer = tf.summary.create_file_writer(join(model_dir, 'tst')) trn_eval_step = (etrn_dl, trn_loss_epoch, trn_acc_epoch) tst_eval_step = (etst_dl, tst_loss_epoch, tst_acc_epoch) trn_eval_epoch = (trn_loss_epoch, trn_acc_epoch, trn_writer) tst_eval_epoch = (tst_loss_epoch, tst_acc_epoch, tst_writer) weights_dir = join(model_dir, 'weights') for epoch in trange(cfg.epochs): for x, y_true in trn_dl: train_step(x, y_true, model, loss_fn, optimizer) eval_step(model, trn_eval_step, tst_eval_step) eval_epoch(epoch, cfg.ds, trn_eval_epoch, tst_eval_epoch) model.save_weights(join(weights_dir, f'{epoch:03d}.ckpt'))
def build_moco_diffloss(self): moco_dim = self.cfg.get_int('moco.dim') moco_t = self.cfg.get_float('moco.t') moco_k = self.cfg.get_int('moco.k') moco_m = self.cfg.get_float('moco.m') moco_fc_type = self.cfg.get_string('moco.fc_type') moco_diff_speed = self.cfg.get_list('moco.diff_speed') base_model_class = get_model_class(**self.cfg.get_config('model')) def model_class(num_classes=128): model = MultiTaskWrapper( base_model_class, num_classes=num_classes, fc_type=moco_fc_type, finetune=False, groups=1, ) return model model = MoCoDiffLossTwoFc( model_class, dim=moco_dim, K=moco_k, m=moco_m, T=moco_t, diff_speed=moco_diff_speed, ) model.cuda() model = nn.parallel.DistributedDataParallel( model, device_ids=[dist.get_rank()], find_unused_parameters=True, ) return model
def train_teacher(): #get args args = get_args() seed = set_seed(args.seed, args.use_cuda) trainset, testset, nr_channels, mlp_input_neurons, classes = get_dataset( args) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_processes) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=1) #get teacher model teacher_model_class = get_model_class(args.teacher_model) if "MLP" in args.teacher_model: teacher_model = teacher_model_class(mlp_input_neurons, 10, args.dropout) else: teacher_model = teacher_model_class(nr_channels, 10, 6) if torch.cuda.is_available() and args.use_cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") teacher_model.to(device) teacher_model.train() #get loss function criterion = nn.CrossEntropyLoss(reduction='mean') #get optimizer if args.optimizer == "SGD": optimizer = optim.SGD(teacher_model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=0.0001) elif args.optimizer == "Adam": optimizer = optim.Adam(teacher_model.parameters(), lr=args.lr, betas=args.beta, eps=args.eps, weight_decay=0.0001) else: optimizer = optim.RMSprop(teacher_model.parameters(), lr=args.lr, alpha=args.alpha, eps=args.eps) loss_values = [] total_accuracy = [] epoch_eval = [] scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[30, 60, 90, 120, 150, 180], gamma=0.1) #train the teacher network for epoch in range(args.nr_epochs): loss_epoch = 0.0 scheduler.step() for i, data in enumerate(trainloader, 0): samples, labels = data samples = to_cuda(samples, args.use_cuda) labels = to_cuda(labels, args.use_cuda) #zero the gradients of network params optimizer.zero_grad() #define loss output_logits = teacher_model(samples) loss = criterion(output_logits, labels) loss.backward() optimizer.step() loss_epoch += loss.item() loss_epoch /= float(i) loss_values.append(loss_epoch) print("Loss at epoch {} is {}".format(epoch, loss_epoch)) if epoch % args.eval_interval == 0: teacher_model.eval() acc = compute_overall_accuracy(testloader, teacher_model, args.use_cuda) total_accuracy.append(acc) epoch_eval.append(epoch) teacher_model.train() print("Accuracy at epoch {} is {}".format(epoch, acc)) if epoch % args.save_interval == 0: print("Saving model at {} epoch".format(epoch)) with open( args.dataset + "_teacher_network_" + args.teacher_model + "_" + str(seed), "wb") as f: torch.save(teacher_model.state_dict(), f) #plot loss and total accuracy plt.figure(1) plt.plot(loss_values) plt.xlabel('Nr Epochs') plt.ylabel('Loss function') plt.title('Loss function for Teacher on' + args.dataset + " using " + args.teacher_model) plt.savefig('Loss_function_teacher' + args.teacher_model + "_" + args.dataset + str(seed)) plt.figure(2) plt.plot(epoch_eval, total_accuracy) plt.xlabel('Nr Epochs') plt.ylabel('Total accuracy') plt.title('Accuracy for Teacher on ' + args.dataset + " using " + args.teacher_model) plt.savefig('Accuracy_teacher' + args.teacher_model + "_" + args.dataset + str(seed)) with open( "params" + args.dataset + '_' + args.teacher_model + '_' + str(seed), "wb") as f: params = [loss_values, epoch_eval, total_accuracy] pickle.dump(params, f)
def sol_to_vtu(output_dir_path: str, config_file_path: str, model: Union[Model, None] = None, delete_sol_file: bool = False) -> None: """ Function to take the output .sol files and convert them into .vtu for visualization. Args: output_dir_path: The path to the folder in which the .sol files are, and where the .vtu files will be saved. config_file_path: The path to the config file used by the model. model: The model that generated the .sol files. delete_sol_file: Bool to indicate whether or not to delete the original .sol files after converting to .vtu, Default is False. """ # Create config parser config = ConfigParser(config_file_path) # Being run outside of run.py, so have to create model if model is None: # Load model model_name = config.get_item(['OTHER', 'model'], str) model_class = get_model_class(model_name) model = model_class(config, Parameter(0.0)) # Number of subdivisions per element subdivision = config.get_item(['VISUALIZATION', 'subdivision'], int) # NOTE: -1 is the value used whenever an int default is needed. if subdivision == -1: subdivision = model.interp_ord # Generate a list of all .sol files sol_path_generator = Path(output_dir_path + 'sol/').rglob('*.sol') sol_path_list = [str(sol_path) for sol_path in sol_path_generator] # Number of files to convert n_files = len(sol_path_list) # Number of cores to use # NOTE: No point of starting more threads than files, and also lets us depend on modulo math later. n_threads = min(n_files, cpu_count()) # Create gridfunctions, one per thread gfus = [model.construct_gfu() for _ in range(n_threads)] # Create a list to contain the .pvd entries output_list = ['' for _ in range(n_files)] # NOTE: We HAVE to use Pool, and not ThreadPool. ThreadPool causes seg faults on the VTKOutput call. with Pool(processes=n_threads) as pool: # Create the pool and start it. It will automatically take and run the next entry when it needs it a = [ pool.apply_async( _sol_to_vtu, (gfus[i % n_threads], sol_path_list[i], output_dir_path, model.save_names, delete_sol_file, subdivision, model.mesh)) for i in range(n_files) ] # Iterate through each thread and get it's result when it's done for i in range(len(a)): # Grab the result string and insert it in the correct place in the output list output_list[i] = a[i].get() # Add the header and footer output_list.insert( 0, '<?xml version=\"1.0\"?>\n<VTKFile type=\"Collection\" version=\"0.1\"\n' + 'byte_order=\"LittleEndian\"\ncompressor=\"vtkZLibDataCompressor\">\n<Collection>\n' ) output_list.append('</Collection>\n</VTKFile>') # Write each line to the file with open(output_dir_path + 'transient.pvd', 'a+') as file: for line in output_list: file.write(line)
my_trainer = trainer.GoTrainer(problem, model, hparams, experiment_dir, skip_generate_data) # train and evaluate network on dev split my_trainer.train_and_evaluate(restore_from=restore_dir) utils.set_logger(os.path.join(experiment_dir, 'test.log')) # evaluate the network on test split my_trainer.test(test_dir) if __name__ == '__main__': """Parse command line arguments and start main function.""" args = parser.parse_args() model_name = args.model _model = get_model_class(model_name) problem_name = args.problem _problem = get_problem_class(problem_name) hp_name = args.hparams _hp = get_hparams(hp_name) _experiment_dir = args.experiment_dir _restore_dir = args.restore_dir _test_dir = args.test_dir _overwrite_results = args.overwrite_results _skip_generate_data = args.skip_generate_data main(_problem, _model, _hp, _experiment_dir, _restore_dir, _test_dir,
import logging from aiohttp import web from typing import Mapping import config from models import get_model_class from authirisations import get_authorisation_class from cookies_manager import CookiesManager logging.basicConfig(level=logging.INFO) routes = web.RouteTableDef() cookie_manager = CookiesManager(config.SECRET_KEY) authorisation = get_authorisation_class(config.AUTHORISATION) headers_model = get_model_class(config.AUTHORISATION) def parse_headers(response_headers: Mapping): try: return headers_model(**response_headers) except ValueError: return None def is_valid_cookies(cookies: Mapping, ip, user_agent) -> bool: session_id = cookies.get(config.COOKIE_KEY) if not session_id: return False if cookie_manager.is_valid(session_id, ip, user_agent): return True return False
def train_student(): #get args args = get_args() seed = set_seed(args.seed, args.use_cuda) trainset, testset, nr_channels, mlp_input_neurons, classes = get_dataset( args) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_processes) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=1) #get student and teacher models student_model_class = get_model_class(args.student_model) teacher_model_class = get_model_class(args.teacher_model) if "MLP" in args.student_model: stud_model_simple = student_model_class(mlp_input_neurons, 10, args.dropout) stud_model_teacher = student_model_class(mlp_input_neurons, 10, args.dropout) teacher_model = teacher_model_class(mlp_input_neurons, 10, args.dropout) else: stud_model_simple = student_model_class(nr_channels, 10, args.dropout) stud_model_teacher = student_model_class(nr_channels, 10, args.dropout) teacher_model = teacher_model_class(nr_channels, 10, args.dropout) print("Train student with teacher help") loss_epoch2, loss_values2, total_accuracy2 = train_student_teacher( stud_model_teacher, teacher_model, args, trainloader, testloader, seed) print("Train simple student") loss_epoch1, loss_values1, total_accuracy1 = train_student_normal( stud_model_simple, args, trainloader, testloader, seed) with open( "params" + args.dataset + '_' + args.teacher_model + '_' + str(seed), "rb") as f: _, epoch_eval_teacher, total_accuracy_teacher = pickle.load(f) #plot loss and total accuracy plt.figure(1) plt.plot(range(0, args.nr_epochs), loss_values1) plt.plot(range(0, args.nr_epochs), loss_values2) plt.legend(['student_simple', 'student_teacher'], loc='upper right') plt.xlabel('Nr Epochs') plt.ylabel('Loss function value') plt.title('Loss function comparison between students') plt.savefig('Loss_function_' + args.dataset + '_students' + str(seed) + "_" + str(args.id)) plt.figure(2) plt.plot(loss_epoch1, total_accuracy1) plt.plot(loss_epoch2, total_accuracy2) plt.plot(epoch_eval_teacher, total_accuracy_teacher) plt.legend(['student_simple', 'student_teacher', 'teacher'], loc='lower right') plt.xlabel('Nr Epochs') plt.ylabel('Total accuracy') plt.title('Accuracy comparison between students') plt.savefig('Accuracy_' + args.dataset + '_students' + str(seed) + "_" + str(args.id))
def pipeline(args): ############################# INITIALIZATION ############################# batch_size = args.batch_size shuffle = args.shuffle random_seed = args.random_seed experiment_name = args.name data_path = args.data_path log_path = args.log_path tensorboard_dir = os.path.join(log_path, 'tensorboard', experiment_name) submission_dir = os.path.join(log_path, 'submission') submission_path = os.path.join(submission_dir, experiment_name + '_%s.csv') model_dir = os.path.join(log_path, './checkpoints') model_path = os.path.join(model_dir, experiment_name + '.h5') train_path = os.path.join(data_path, 'train') test_path = os.path.join(data_path, 'test') train_img_path = os.path.join(train_path, 'images') train_mask_path = os.path.join(train_path, 'masks') test_img_path = os.path.join(test_path, 'images') if not os.path.isdir(submission_dir): os.makedirs(submission_dir) if not os.path.isdir(model_dir): os.makedirs(model_dir) if not os.path.isdir(tensorboard_dir): os.makedirs(tensorboard_dir) ############################# GET IMAGE PREPROCESSING INFO ############################# model_class = get_model_class(args.model_name) image_process_func = model_class.get_image_preprocessor() image_size = model_class.get_image_size() n_channels = model_class.get_number_of_channels() if args.use_depth: n_channels = 3 ############################# CREATE TRAIING SET ############################# train_ids = os.listdir(train_img_path) print("Preparing training set") sys.stdout.flush() images, masks = get_dataset(train_ids, train_img_path, train_mask_path, image_size=image_size, is_test=False, preprocess_func=image_process_func, single_channel=n_channels == 1, use_depth=args.use_depth) print("Preparing test set") sys.stdout.flush() test_ids = os.listdir(test_img_path) X_test = get_dataset(test_ids, test_img_path, None, image_size=image_size, is_test=True, preprocess_func=image_process_func, single_channel=n_channels == 1, use_depth=args.use_depth) print("Train shape:", images.shape) print("Test shape:", X_test.shape) sys.stdout.flush() ############################# TRAINING ############################# gc.collect() # Prepairing data X_train, X_val, Y_train, Y_val = train_test_split( images, masks, test_size=args.validation_split, shuffle=shuffle, random_state=args.random_seed) args.validation_split = 0 train_gen = create_datagen(X_train, Y_train, args, batch_size=batch_size, shuffle=shuffle, random_seed=random_seed) val_gen = create_datagen(X_val, Y_val, args, batch_size=batch_size, shuffle=shuffle, random_seed=random_seed) train_steps = int(np.ceil(len(X_train) / batch_size)) val_steps = int(np.ceil(len(X_val) / batch_size)) gc.collect() ############################# Creating model ############################# print("Stage 1: binary crossentropy loss") training_stage(train_gen, val_gen, train_steps, val_steps, model_path, tensorboard_dir, args, "sigmoid", [my_iou_metric], "binary_crossentropy", X_train, Y_train, X_val, Y_val, weights_path=args.weights_path) print("Stage 2: lovasz loss") model = training_stage(train_gen, val_gen, train_steps, val_steps, model_path, tensorboard_dir, args, "linear", [my_iou_metric_2], lovasz_loss, X_train, Y_train, X_val, Y_val, model_path) model.load_weights(model_path) print("Validation prediction. Estimating optimal threshold.") preds_valid = predict_result(model, X_val) # Scoring for last model, choose threshold by validation data thresholds_ori = np.linspace(0.3, 0.7, 31) # Reverse sigmoid function: Use code below because the sigmoid activation was removed thresholds = np.log(thresholds_ori / (1 - thresholds_ori)) ious = np.array([ iou_metric_batch(Y_val, preds_valid > threshold) for threshold in tqdm(thresholds) ]) print("IOUS:", ious) # instead of using default 0 as threshold, use validation data to find the best threshold. threshold_best_index = np.argmax(ious) iou_best = ious[threshold_best_index] threshold_best = thresholds[threshold_best_index] print("Best threshold: %f. Best iou: %f." % (threshold_best, iou_best)) print("Test prediction") preds_test = predict_result(model, X_test) print("Making submission") make_submission(test_ids, preds_test, submission_path % '_thr', mode="threshold", threshold=threshold_best) make_submission(test_ids, preds_test, submission_path % '_round', mode="threshold", threshold=0.0)
def eval_networks(): # get args args = get_args() seed = set_seed(args.seed, args.use_cuda) _, testset, nr_channels, mlp_input_neurons, classes = get_dataset(args) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=1) # get student and teacher models student_model_class = get_model_class(args.student_model) teacher_model_class = get_model_class(args.teacher_model) if "MLP" in args.student_model: stud_model_simple = student_model_class(mlp_input_neurons, 10, args.dropout) stud_model_teacher = student_model_class(mlp_input_neurons, 10, args.dropout) teacher_model = teacher_model_class(mlp_input_neurons, 10, args.dropout) else: stud_model_simple = student_model_class(nr_channels, 10, args.dropout) stud_model_teacher = student_model_class(nr_channels, 10, args.dropout) teacher_model = teacher_model_class(nr_channels, 10, args.dropout) if torch.cuda.is_available() and args.use_cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") with open( args.dataset + "_teacher_network_" + args.teacher_model + "_" + str(seed), "rb") as f: teacher_model.load_state_dict(torch.load(f)) with open( args.dataset + "_student_network_simple" + args.student_model + str(seed) + "_10", "rb") as f: stud_model_simple.load_state_dict(torch.load(f)) with open( args.dataset + "_student_network_teacher" + args.student_model + str(seed) + "_10", "rb") as f: stud_model_teacher.load_state_dict(torch.load(f)) stud_model_simple.to(device) stud_model_teacher.to(device) teacher_model.to(device) stud_model_simple.eval() stud_model_teacher.eval() teacher_model.eval() print("Eval teacher model") compute_class_accuracy(testloader, teacher_model, "ConfusionMatrixTeacherCIFAR110", use_cuda=True) print("Eval student model simple") compute_class_accuracy(testloader, stud_model_simple, "ConfusionMatrixStudentSCIFAR110", use_cuda=True) print("Eval student model twacher") compute_class_accuracy(testloader, stud_model_teacher, "ConfusionMatrixStudentTCIFAR110", use_cuda=True)
def pipeline(args): ############################# INITIALIZATION ############################# args.validation_split = 0 batch_size = args.batch_size shuffle = args.shuffle random_seed = args.random_seed experiment_name = args.name data_path = args.data_path log_path = args.log_path tensorboard_dir_base = os.path.join(log_path, 'tensorboard', experiment_name) submission_dir = os.path.join(log_path, 'submission') submission_path_template = os.path.join(submission_dir, experiment_name + '%s.csv') model_dir = os.path.join(log_path, './checkpoints') model_path_template = os.path.join(model_dir, experiment_name + '_fold%d.h5') train_path = os.path.join(data_path, 'train') test_path = os.path.join(data_path, 'test') train_img_path = os.path.join(train_path, 'images') train_mask_path = os.path.join(train_path, 'masks') test_img_path = os.path.join(test_path, 'images') if not os.path.isdir(submission_dir): os.makedirs(submission_dir) if not os.path.isdir(model_dir): os.makedirs(model_dir) ############################# GET IMAGE PREPROCESSING INFO ############################# model_class = get_model_class(args.model_name) image_process_func = model_class.get_image_preprocessor() image_size = model_class.get_image_size() single_channel = model_class.get_number_of_channels() == 1 if args.use_depth: single_channel = False ############################# CREATE TRAIING SET ############################# images_ids = os.listdir(train_img_path) print("Preparing training set") sys.stdout.flush() images, masks = get_dataset(images_ids, train_img_path, train_mask_path, image_size=image_size, is_test=False, preprocess_func=image_process_func, single_channel=single_channel, use_depth=args.use_depth) mask_types = get_mask_types(images_ids, train_mask_path) print("Preparing test set") sys.stdout.flush() test_ids = os.listdir(test_img_path) X_test = get_dataset(test_ids, test_img_path, None, image_size=image_size, is_test=True, preprocess_func=image_process_func, single_channel=single_channel, use_depth=args.use_depth) print("Train shape:", images.shape) print("Test shape:", X_test.shape) sys.stdout.flush() ############################# K-FOLD TRAINING ############################# kf = StratifiedKFold(n_splits=args.nfolds, random_state=args.random_seed, shuffle=True) predictions = np.zeros(shape=(args.nfolds, len(test_ids), image_size, image_size, 1), dtype=np.float32) for fold, (train_index, test_index) in enumerate(kf.split(images, mask_types)): print("\n\nFold: ", fold + 1) tensorboard_dir = tensorboard_dir_base + "_fold" + str(fold + 1) model_path = model_path_template % (fold + 1) submission_path_round = submission_path_template % ("_fold" + str(fold + 1) + "_round") submission_path_thr = submission_path_template % ("_fold" + str(fold + 1) + "_thr") submission_path_all_round = submission_path_template % ("_fold" + str(fold + 1) + "_all_round") if not os.path.isdir(tensorboard_dir): os.makedirs(tensorboard_dir) gc.collect() # Prepairing data X_train, X_val = images[train_index], images[test_index] Y_train, Y_val = masks[train_index], masks[test_index] train_gen = create_datagen(X_train, Y_train, args, batch_size=batch_size, shuffle=shuffle, random_seed=random_seed) val_gen = create_datagen(X_val, Y_val, args, batch_size=batch_size, shuffle=shuffle, random_seed=random_seed) train_steps = int(np.ceil(len(X_train) / batch_size)) val_steps = int(np.ceil(len(X_val) / batch_size)) gc.collect() # Check pretrained model pretrained_weights = None if fold > 0: pretrained_weights = model_path_template % fold # Model from the previous fold elif args.weights_path is not None: pretrained_weights = args.weights_path ############################# Training model ############################# # There is no pretrained model. Need to train with binary crossentropy first print("Stage 1: binary crossentropy loss") training_stage(train_gen, val_gen, train_steps, val_steps, model_path, tensorboard_dir, args, "sigmoid", [my_iou_metric], "binary_crossentropy", X_train, Y_train, X_val, Y_val, weights_path=pretrained_weights) pretrained_weights = model_path print("Stage 2: lovasz loss") model = training_stage(train_gen, val_gen, train_steps, val_steps, model_path, tensorboard_dir, args, "linear", [my_iou_metric_2], lovasz_loss, X_train, Y_train, X_val, Y_val, pretrained_weights) model.load_weights(model_path) print("Validation prediction. Estimating optimal threshold.") preds_valid = predict_result(model, X_val) # Scoring for last model, choose threshold by validation data thresholds_ori = np.linspace(0.3, 0.7, 31) # Reverse sigmoid function: Use code below because the sigmoid activation was removed thresholds = np.log(thresholds_ori / (1 - thresholds_ori)) ious = np.array([iou_metric_batch(Y_val, preds_valid > threshold) for threshold in tqdm(thresholds)]) print("IOUS:", ious) # instead of using default 0 as threshold, use validation data to find the best threshold. threshold_best_index = np.argmax(ious) iou_best = ious[threshold_best_index] threshold_best = thresholds[threshold_best_index] print("Best threshold: %f. Best iou: %f." % (threshold_best, iou_best)) print("Test prediction") preds_test = predict_result(model, X_test) predictions[fold] = preds_test print("Making submission") make_submission(test_ids, preds_test, submission_path_round, mode="threshold", threshold=threshold_best) make_submission(test_ids, preds_test, submission_path_thr, mode="threshold", threshold=0.0) temp_pred = np.mean(predictions, axis=0) make_submission(test_ids, temp_pred, submission_path_all_round, mode="threshold", threshold=0.0) predictions = np.mean(predictions, axis=0) # np.save(os.path.join(submission_dir, args.model_name + "_prediction.npy"), predictions) print("\n\nMaking final submission") # TODO ???? > 0 ???? make_submission(test_ids, predictions, submission_path_template % "_final", mode="threshold", threshold=0.0)