def _run_stage(self, stage: str): self._prepare_for_stage(stage) # checkpoint loading self._run_event("stage", moment="start") while self.state.stage_epoch < self.state.num_epochs: self._run_event("epoch", moment="start") utils.set_global_seed(self.experiment.initial_seed + self.state.epoch + 1) self._run_epoch(stage=stage, epoch=self.state.stage_epoch) self._run_event("epoch", moment="end") if self._check_run and self.state.stage_epoch >= 1: break if self.state.early_stop: self.state.early_stop = False break self.state.epoch += 1 self.state.stage_epoch += 1 self._run_event("stage", moment="end")
def make_classifier(): # Set seeds for reproducibility set_global_seed(SEED) prepare_cudnn(deterministic=True) np.random.seed(SEED) torch.manual_seed(SEED) model = BertForSequenceClassification(PRETRAINED_MODEL_NAME, NUM_LABELS) model.to(device) print(f'Loaded model: {PRETRAINED_MODEL_NAME}') print(f'Trainable parameters: {model.n_trainable()}') criterion = model.configure_loss() optimizer = model.configure_optimizers(1) scheduler = model.configure_scheduler(optimizer) return { 'criterion': criterion, 'optimizer': optimizer, 'scheduler': scheduler, 'model': model, # 'epoch': 0 }
def _run_epoch(self, stage: str, epoch: int): self._prepare_for_epoch(stage=stage, epoch=epoch) state: _State = self.state assert state.loaders is not None loaders = state.loaders # @TODO: better solution with train/inference handling ? is_infer_stage = state.stage_name.startswith("infer") if not is_infer_stage: assert state.valid_loader in loaders.keys(), \ f"'{state.valid_loader}' " \ f"should be in provided loaders: {list(loaders.keys())}" else: # @TODO: add check for non distributed run for inference assert not any(x.startswith("train") for x in loaders.keys()), \ "for inference no train loader should be passed" for loader_name, loader in loaders.items(): is_train_loader = loader_name.startswith("train") state.loader_name = loader_name state.loader_len = len(loader) state.need_backward_pass = is_train_loader self.model.train(state.need_backward_pass) if isinstance(loader.sampler, DistributedSampler) \ and not is_infer_stage: loader.sampler.set_epoch(state.epoch) utils.set_global_seed( self.experiment.initial_seed + state.global_epoch + 1 ) self._run_event("on_loader_start") with torch.set_grad_enabled(state.need_backward_pass): self._run_loader(loader) self._run_event("on_loader_end")
def _get_experiment_components( self, stage: str = None ) -> Tuple[Model, Criterion, Optimizer, Scheduler, Device]: """ Inner method for children's classes for model specific initialization. As baseline, checks device support and puts model on it. :return: """ utils.set_global_seed(self.experiment.initial_seed) model = self.experiment.get_model(stage) criterion, optimizer, scheduler = \ self.experiment.get_experiment_components(model, stage) model, criterion, optimizer, scheduler, device = \ utils.process_components( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, distributed_params=self.experiment.distributed_params, device=self.device ) return model, criterion, optimizer, scheduler, device
def _prepare_for_stage(self, stage: str): utils.set_global_seed(self.experiment.initial_seed) self.model, criterion, optimizer, scheduler, self.device = \ self._get_experiment_components(stage=stage) utils.set_global_seed(self.experiment.initial_seed) callbacks = self._get_callbacks(stage) utils.set_global_seed(self.experiment.initial_seed) self.state = self._get_state( stage=stage, model=self.model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, device=self.device, callbacks=callbacks, )
parser.add_argument("--optimize_postprocess", help="to optimize postprocess", type=bool, default=False) parser.add_argument("--train", help="train", type=bool, default=False) parser.add_argument("--make_prediction", help="to make prediction", type=bool, default=False) parser.add_argument("--preload", help="save processed data", type=bool, default=False) parser.add_argument("--separate_decoder", help="number of epochs", type=bool, default=False) parser.add_argument("--multigpu", help="use multi-gpu", type=bool, default=False) parser.add_argument("--lookahead", help="use lookahead", type=bool, default=False) args, unknown = parser.parse_known_args() # args.train = False args.optimize_postprocess = False print(args) if args.task == 'classification': os.environ["CUDA_VISIBLE_DEVICES"] = "0" set_global_seed(args.seed) prepare_cudnn(deterministic=True) sub_name = f'Model_{args.task}_{args.model_type}_{args.encoder}_bs_{args.bs}_{str(datetime.datetime.now().date())}' logdir = f"./logs/{sub_name}" if args.logdir is None else args.logdir preprocessing_fn = smp.encoders.get_preprocessing_fn(args.encoder, args.encoder_weights) loaders = prepare_loaders(path=args.path, bs=args.bs, num_workers=args.num_workers, preprocessing_fn=preprocessing_fn, preload=args.preload, image_size=(args.height, args.width), augmentation=args.augmentation, task=args.task) test_loader = loaders['test'] del loaders['test'] model = get_model(model_type=args.segm_type, encoder=args.encoder, encoder_weights=args.encoder_weights, activation=None, task=args.task)
def main(args): wandb.init(project="teacher-pruning", config=vars(args)) set_global_seed(42) # dataloader initialization transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=True, transform=transform_train, download=True)) valid_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=False, transform=transform_test)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=128, num_workers=2) loaders = { "train": train_dataloader, "valid": valid_dataloader, } # model initialization model = PreActResNet18() model.fc = nn.Linear(512, 10) if args.teacher_model is not None: is_kd = True teacher_model = NAME2MODEL[args.teacher_model]() load_model_from_path(model=teacher_model, path=args.teacher_path) model = { "student": model, "teacher": teacher_model, } output_hiddens = args.beta is None is_kd_on_hiddens = output_hiddens runner = KDRunner(device=args.device, output_hiddens=output_hiddens) parameters = model["student"].parameters() else: is_kd = False runner = dl.SupervisedRunner(device=args.device) parameters = model.parameters() # optimizer optimizer_cls = NAME2OPTIM[args.optimizer] optimizer_kwargs = {"params": parameters, "lr": args.lr} if args.optimizer == "sgd": optimizer_kwargs["momentum"] = args.momentum else: optimizer_kwargs["betas"] = (args.beta1, args.beta2) optimizer = optimizer_cls(**optimizer_kwargs) scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma) logdir = f"logs/{wandb.run.name}" # callbacks callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()] if is_kd: metrics = {} callbacks.append(dl.CriterionCallback(output_key="cls_loss")) callbacks.append(DiffOutputCallback()) coefs = get_loss_coefs(args.alpha, args.beta) metrics["cls_loss"] = coefs[0] metrics["diff_output_loss"] = coefs[1] if is_kd_on_hiddens: callbacks.append(DiffHiddenCallback()) metrics["diff_hidden_loss"] = coefs[2] aggregator_callback = dl.MetricAggregationCallback(prefix="loss", metrics=metrics, mode="weighted_sum") wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback, loaders=["train"]) callbacks.append(wrapped_agg_callback) runner.train( model=model, optimizer=optimizer, scheduler=scheduler, criterion=nn.CrossEntropyLoss(), loaders=loaders, callbacks=callbacks, num_epochs=args.epoch, logdir=logdir, verbose=True, )
def main(): # hyper param # TODO: set your params num_folds = 5 seed = 1234 base_dataset_path = '/content/drive/My Drive/kaggle/google-quest-challenge/dataset' batch_size = 4 num_epochs = 4 bert_model = 'bert-base-uncased' base_logdir = '/kaggle/google_quest/bert' # fix seed set_global_seed(seed) device = get_device() # set up logdir now = datetime.now() base_logdir = os.path.join(base_logdir, now.strftime("%Y%m%d%H%M%S")) os.makedirs(base_logdir, exist_ok=True) # dump this scripts my_file_path = os.path.abspath(__file__) shutil.copyfile(my_file_path, base_logdir) # load dataset # TODO: set your dataset train, test, sample_submission = read_data(base_dataset_path) input_cols = list(train.columns[[1, 2, 5]]) target_cols = list(train.columns[11:]) num_labels = len(target_cols) # init Bert tokenizer = BertTokenizer.from_pretrained(bert_model) # execute CV # TODO: set your CV method kf = GroupKFold(n_splits=num_folds) ids = kf.split(train['question_body'], groups=train['question_body']) fold_scores = [] for fold, (train_idx, valid_idx) in enumerate(ids): print("Current Fold: ", fold + 1) logdir = os.path.join(base_logdir, 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) # create dataloader train_df, val_df = train.iloc[train_idx], train.iloc[valid_idx] print("Train and Valid Shapes are", train_df.shape, val_df.shape) print("Preparing train datasets....") inputs_train = compute_input_arrays(train_df, input_cols, tokenizer, max_sequence_length=512) outputs_train = compute_output_arrays(train_df, columns=target_cols) lengths_train = np.argmax(inputs_train[0] == 0, axis=1) lengths_train[lengths_train == 0] = inputs_train[0].shape[1] print("Preparing valid datasets....") inputs_valid = compute_input_arrays(val_df, input_cols, tokenizer, max_sequence_length=512) outputs_valid = compute_output_arrays(val_df, columns=target_cols) lengths_valid = np.argmax(inputs_valid[0] == 0, axis=1) lengths_valid[lengths_valid == 0] = inputs_valid[0].shape[1] print("Preparing dataloaders datasets....") train_set = QuestDataset(inputs=inputs_train, lengths=lengths_train, labels=outputs_train) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) valid_set = QuestDataset(inputs=inputs_valid, lengths=lengths_valid, labels=outputs_valid) valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False) # init models model = CustomBertForSequenceClassification.from_pretrained( bert_model, num_labels=num_labels, output_hidden_states=True) criterion = nn.BCEWithLogitsLoss() optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0.05, num_training_steps=num_epochs * len(train_loader)) # model training runner = BertRunner(device=device) loaders = {'train': train_loader, 'valid': valid_loader} print("Model Training....") runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=num_epochs, score_func=mean_spearmanr_correlation_score) # calc valid score best_model_path = os.path.join(logdir, 'best_model.pth') val_preds = runner.predict_loader(model, loaders['valid'], resume=best_model_path) val_truth = train[target_cols].iloc[valid_idx].values # TODO: set your score function cv_score = mean_spearmanr_correlation_score(val_truth, val_preds) print('Fold {} CV score : {}'.format(fold + 1, cv_score)) fold_scores.append(cv_score) return True
test_df = pd.DataFrame({'ImageFileName': list( test_filenames)}, columns=['ImageFileName']) test_dataset = DataLoader( ALASKATestData(test_df, augmentations=albu.Compose([ # albu.CenterCrop() albu.Normalize(), ToTensorV2() ]) ), batch_size=1, shuffle=False, num_workers=args.nw) print(len(train_data)) print(len(val_data)) SEED = 2020 utils.set_global_seed(SEED) utils.prepare_cudnn(deterministic=True) loaders = {'train': train_data, 'valid': val_data} criterion = nn.CrossEntropyLoss() model = ENet('efficientnet-b0') print(model) optimizer = Lookahead(RAdam( model.parameters(), lr=args.lr, weight_decay=args.wd)) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.25, patience=3) num_epochs = args.e logdir = "./logs/effnet-b0" fp16_params = None # dict(opt_level="O1") runner = SupervisedRunner(device='cuda')
plt.scatter(X[:, 0], X[:, 1], c=y, cmap="viridis") plt.show() X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42) X_train_t = # YOUR CODE GOES HERE y_train_t = # YOUR CODE GOES HERE X_val_t = # YOUR CODE GOES HERE y_val_t = # YOUR CODE GOES HERE train_dataset = TensorDataset(X_train_t, y_train_t) val_dataset = TensorDataset(X_val_t, y_val_t) train_dataloader = DataLoader(train_dataset, batch_size=128) val_dataloader = DataLoader(val_dataset, batch_size=128) utils.set_global_seed(42) linear_regression = LinearRegression(2, 1) loss_function = nn.BCEWithLogitsLoss() optimizer = torch.optim.SGD(linear_regression.parameters(), lr=0.05) tol = 1e-3 losses = [] max_epochs = 100 prev_weights = torch.zeros_like(linear_regression.weights) stop_it = False for epoch in range(max_epochs): utils.set_global_seed(42 + epoch) for it, (X_batch, y_batch) in enumerate(train_dataloader): optimizer.zero_grad() outp = # YOUR CODE GOES HERE loss = # YOUR CODE GOES HERE
return [dictionary.doc2bow(d) for d in corpus] def evaluate_tfidf(index, tokenized_candidates, tfidf_corpus, tokenized_names): metrics = [] for i, example in tqdm(enumerate(tfidf_corpus)): top_5_idx = np.argsort(index.get_similarities(example))[-1:-5:-1] candidates = [tokenized_candidates[j] for j in top_5_idx] metrics.append(compute_metrics(tokenized_names[i], candidates)) return pd.DataFrame(metrics) if __name__ == "__main__": args = parse_args() set_global_seed(33) DATA_FOLDER = Path("data") train = read_jsonl(DATA_FOLDER / "train_preprocessed.jsonl") test = read_jsonl(DATA_FOLDER / "test_preprocessed.jsonl") body_key = 'function_body_tokenized' train_sentences = get_and_flatten(train, body_key) test_sentences = get_and_flatten(test, body_key) name_key = 'function_name_tokenized' train_names = get_and_flatten(train, name_key) test_names = get_and_flatten(test, name_key) EOS_TOKEN = '\\u\\u\\uNEWLINE\\u\\u\\u_' train_names = [name[:name.index(EOS_TOKEN)] for name in train_names] test_names = [name[:name.index(EOS_TOKEN)] for name in test_names]
from torch.autograd import Variable from torch.nn import functional as F import torchvision from collections import OrderedDict import os import argparse import time import numpy as np import pandas as pd np.set_printoptions(threshold=np.inf) pd.options.display.width = 0 # reproduce SEED = 15 set_global_seed(SEED) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # read csv folder # read csv files def data_together(filepath): csvs = [] dfs = [] for subdir, dirs, files in os.walk(filepath): for file in files: # print os.path.join(subdir, file) filepath = subdir + os.sep + file if filepath.endswith(".csv"): csvs.append(filepath)
train_val_loaders, test_loaders = read_data(params) # initialize the model model = BertForSequenceClassification( pretrained_model_name=params["model"]["model_name"], num_classes=params["model"]["num_classes"], ) # specify criterion for the multi-class classification task, optimizer and scheduler criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=float(params["training"]["learn_rate"])) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # reproducibility set_global_seed(params["general"]["seed"]) prepare_cudnn(deterministic=True) # here we specify that we pass masks to the runner. So model's forward method will be called with # these arguments passed to it. runner = SupervisedRunner(input_key=("features", "attention_mask")) # finally, training the model with Catalyst runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_val_loaders, callbacks=[ AccuracyCallback(num_classes=int(params["model"]["num_classes"])),
import torch from torch.nn.modules.loss import _Loss import torch.nn as nn from pytorch_toolbelt import losses as L if not os.path.exists('logs_segmentation/'): os.mkdir('logs_segmentation/') #c_kefir ent_cloacae kleiella_pneumoniae moraxella_catarrhalis staphylococcus_aureus staphylococcus_epidermidis Experiment = 'efficientb7_60_250_unet' DATA_DIR = 'train_classes_with_json/' from catalyst.utils import set_global_seed set_global_seed(345) torch.manual_seed(345) torch.cuda.manual_seed_all(345) np.random.seed(345) torch.cuda.manual_seed(345) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True import imgaug from dataset import * def _init_fn(worker_id): np.random.seed(int(345)) ENCODER = 'efficientnet-b7'
def seed_all(SEED): set_global_seed(SEED) prepare_cudnn(deterministic=True)
type=int, help="Starting learning rate", default=3e-4) parser.add_argument("--log_path", type=str, help="Path to logs", default="logs") parser.add_argument("--fold", type=int, help="Fold to validate on", default=0) parser.add_argument("--model", type=str, help="Model to train", default='efficientnet-b6') args = parser.parse_args() set_global_seed(42) DATA_FOLDER = args.data_folder FOLD = args.fold dataset = pd.read_csv(f"{DATA_FOLDER}/data.csv") transforms_train = A.Compose([ A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=args.image_height, width=args.image_width, p=1.0), A.Normalize(p=1.0), ], p=1.0) transforms_val = A.Compose([
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: from catalyst import utils utils.set_global_seed(RANDOM_STATE) # 1. train, valid and test loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_data = MNIST(os.getcwd(), train=True, download=True, transform=transforms) train_labels = train_data.targets.cpu().numpy().tolist() train_sampler = data.BatchBalanceClassSampler(train_labels, num_classes=10, num_samples=4) train_loader = DataLoader(train_data, batch_sampler=train_sampler) valid_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=32) test_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) test_loader = DataLoader(dataset=test_dataset, batch_size=32) # 2. model and optimizer model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16), nn.LeakyReLU(inplace=True)) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: images, targets = batch["features"].float( ), batch["targets"].long() features = self.model(images) self.batch = { "embeddings": features, "targets": targets, } callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders=["valid", "infer"], model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=50, ), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk_args=(1, 3)), loaders=["valid", "infer"], ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader, "infer": test_loader }, verbose=False, valid_loader="valid", valid_metric="accuracy", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) valid_path = Path(logdir) / "logs/infer.csv" best_accuracy = max( float(row["accuracy"]) for row in read_csv(valid_path)) assert best_accuracy > 0.8
def main(train, test, features, target): # get args args = parse_arguments() params = yaml_to_json(args.yaml_path) # hyper param num_folds = params.fold seed = params.seed base_path = params.base_path target_cols = params.target features_cols = params.features preprocessed_data_path = params.preprocessed_data batch_size = params.batch_size num_epochs = params.epochs # ex) '/hoge/logs' base_logdir = params.base_logdir # fix seed set_global_seed(seed) device = get_device() # set up logdir now = datetime.now() base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S")) os.makedirs(base_logdir, exist_ok=True) # dump yaml contents with open(os.path.join(base_logdir, 'params.json'), mode="w") as f: json.dump(params, f, indent=4) # dump this scripts my_file_path = os.path.abspath(__file__) shutil.copyfile(my_file_path, base_logdir) # load dataset if preprocessed_data_path == '': train, test, sample_submission = read_data(base_path) # noqa # TODO: You should implement these function!! train, test = preprocess(train, test) # noqa train, test = build_feature(train, test) # noqa else: train = pd.read_csv(preprocessed_data_path + 'train.csv') test = pd.read_csv(preprocessed_data_path + 'test.csv') sample_submission = pd.read_csv(preprocessed_data_path + 'sample_submission.csv') # execute CV # TODO: set your CV method kf = KFold(n_splits=num_folds, random_state=seed) ids = kf.split(train) fold_scores = [] test_preds = [] for fold, (train_idx, valid_idx) in enumerate(ids): print('Fold {}'.format(fold + 1)) logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) # data X_train = train[features_cols] # 目的変数の正規化は...? Y_train = train[target_cols] X_test = train[features_cols] # create dataloaders train_dls, test_dl = create_data_loader( X_train.iloc[train_idx].to_numpy(), Y_train.iloc[train_idx].to_numpy(), X_train.iloc[valid_idx].to_numpy(), Y_train.iloc[valid_idx].to_numpy(), X_test.to_numpy(), batch_size=batch_size) # init models # TODO: set your model and learning condition # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる model = SampleNN(input_dim=1000, out_dim=1) criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # init catalyst runner runner = SupervisedRunner(device=device) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_dls, logdir=logdir, num_epochs=num_epochs, callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)], verbose=False) # calculate valid score best_model_path = logdir + '/checkpoints/best.pth' val_preds = runner.predict_loader(model, train_dls['valid'], resume=best_model_path, verbose=False) val_truth = Y_train.iloc[valid_idx].values # TODO: set your score function cv_score = mean_spearmanr_correlation_score(val_truth, val_preds) print('Fold {} CV score : {}'.format(fold + 1, cv_score)) fold_scores.append(cv_score) # test prediction test_pred = runner.predict_loader( model, test_dl, resume=best_model_path, verbose=False) / num_folds test_preds.append(test_pred) # submit # TODO: set your submit process sample_submission[target_cols] = np.mean(test_preds, axis=0) sample_submission.to_csv('submission.csv') return True
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) args.vis = args.vis or 0 args.infer = args.infer or 0 args.valid = args.valid or 0 args.train = args.train or 0 if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 environment_name = config["environment"].pop("environment") environment_fn = ENVIRONMENTS.get(environment_name) algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS sync_epoch = False elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES: ALGORITHMS = ONPOLICY_ALGORITHMS sync_epoch = True else: raise NotImplementedError() algorithm_fn = ALGORITHMS.get(algorithm_name) processes = [] sampler_id = args.sampler_id def on_exit(): for p in processes: p.terminate() atexit.register(on_exit) params = dict( seed=args.seed, logdir=args.logdir, algorithm_fn=algorithm_fn, environment_fn=environment_fn, config=config, resume=args.resume, db=args.db, sync_epoch=sync_epoch ) if args.check: mode = "train" mode = "valid" if (args.valid is not None and args.valid > 0) else mode mode = "infer" if (args.infer is not None and args.infer > 0) else mode params_ = dict( visualize=(args.vis is not None and args.vis > 0), mode=mode, id=sampler_id ) run_sampler(**params, **params_) return for i in range(args.vis): params_ = dict( visualize=True, mode="infer", id=sampler_id, exploration_power=0.0 ) p = mp.Process( target=run_sampler, kwargs=dict(**params, **params_), daemon=args.daemon, ) p.start() processes.append(p) sampler_id += 1 time.sleep(args.run_delay) for i in range(args.infer): params_ = dict( visualize=False, mode="infer", id=sampler_id, exploration_power=0.0 ) p = mp.Process( target=run_sampler, kwargs=dict(**params, **params_), daemon=args.daemon, ) p.start() processes.append(p) sampler_id += 1 time.sleep(args.run_delay) for i in range(args.valid): params_ = dict( visualize=False, mode="valid", id=sampler_id, exploration_power=0.0 ) p = mp.Process( target=run_sampler, kwargs=dict(**params, **params_), daemon=args.daemon, ) p.start() processes.append(p) sampler_id += 1 time.sleep(args.run_delay) for i in range(1, args.train + 1): exploration_power = i / args.train params_ = dict( visualize=False, mode="train", id=sampler_id, exploration_power=exploration_power ) p = mp.Process( target=run_sampler, kwargs=dict(**params, **params_), daemon=args.daemon, ) p.start() processes.append(p) sampler_id += 1 time.sleep(args.run_delay) for p in processes: p.join()
def read_data(params: dict) -> Tuple[dict, dict]: """ A custom function that reads data from CSV files, creates PyTorch datasets and data loaders. The output is provided to be easily used with Catalyst :param params: a dictionary read from the config.yml file :return: a tuple with 2 dictionaries """ # reading CSV files to Pandas dataframes train_df = pd.read_csv( Path(params["data"]["path_to_data"]) / params["data"]["train_filename"]) valid_df = pd.read_csv( Path(params["data"]["path_to_data"]) / params["data"]["validation_filename"]) test_df = pd.read_csv( Path(params["data"]["path_to_data"]) / params["data"]["test_filename"]) # creating PyTorch Datasets train_dataset = TextClassificationDataset( texts=train_df[params["data"]["text_field_name"]].values.tolist(), labels=train_df[params["data"]["label_field_name"]].values, max_seq_length=params["model"]["max_seq_length"], model_name=params["model"]["model_name"], ) valid_dataset = TextClassificationDataset( texts=valid_df[params["data"]["text_field_name"]].values.tolist(), labels=valid_df[params["data"]["label_field_name"]].values, max_seq_length=params["model"]["max_seq_length"], model_name=params["model"]["model_name"], ) test_dataset = TextClassificationDataset( texts=test_df[params["data"]["text_field_name"]].values.tolist(), labels=test_df[params["data"]["label_field_name"]].values, max_seq_length=params["model"]["max_seq_length"], model_name=params["model"]["model_name"], ) set_global_seed(params["general"]["seed"]) # creating PyTorch data loaders and placing them in dictionaries (for Catalyst) train_val_loaders = { "train": DataLoader( dataset=train_dataset, batch_size=params["training"]["batch_size"], shuffle=True, ), "valid": DataLoader( dataset=valid_dataset, batch_size=params["training"]["batch_size"], shuffle=False, ), } test_loaders = { "test": DataLoader( dataset=test_dataset, batch_size=params["training"]["batch_size"], shuffle=False, ) } return train_val_loaders, test_loaders
def test_mnist(self): utils.set_global_seed(42) x_train = np.random.random((100, 1, 28, 28)).astype(np.float32) y_train = _to_categorical( np.random.randint(10, size=(100, 1)), num_classes=10 ).astype(np.float32) x_valid = np.random.random((20, 1, 28, 28)).astype(np.float32) y_valid = _to_categorical( np.random.randint(10, size=(20, 1)), num_classes=10 ).astype(np.float32) x_train, y_train, x_valid, y_valid = \ list(map(torch.tensor, [x_train, y_train, x_valid, y_valid])) bs = 32 num_workers = 4 data_transform = transforms.ToTensor() loaders = collections.OrderedDict() trainset = torch.utils.data.TensorDataset(x_train, y_train) trainloader = torch.utils.data.DataLoader( trainset, batch_size=bs, shuffle=True, num_workers=num_workers) validset = torch.utils.data.TensorDataset(x_valid, y_valid) validloader = torch.utils.data.DataLoader( validset, batch_size=bs, shuffle=False, num_workers=num_workers) loaders["train"] = trainloader loaders["valid"] = validloader # experiment setup num_epochs = 3 logdir = "./logs" # model, criterion, optimizer model = Net() criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=False, callbacks=[CheckpointCallback(save_n_best=3)] ) metrics = Safict.load("./logs/checkpoints/_metrics.json") metrics_flag1 = \ metrics.get("train.2", "loss") < metrics.get("train.0", "loss") metrics_flag2 = metrics.get("best", "loss") < 0.35 self.assertTrue(metrics_flag1) self.assertTrue(metrics_flag2)
def main(args): set_global_seed(42) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) datasets = { "train": Wrp( CIFAR100(root=".", train=True, download=True, transform=transform_train)), "valid": Wrp(CIFAR100(root=".", train=False, transform=transform_test)), } loaders = { k: DataLoader(v, batch_size=args.batch_size, shuffle=k == "train", num_workers=2) for k, v in datasets.items() } teacher_model = NAME2MODEL[args.teacher](num_classes=100) if args.teacher_path is None: teacher_sd = load_state_dict_from_url(NAME2URL[args.teacher]) teacher_model.load_state_dict(teacher_sd) else: unpack_checkpoint(torch.load(args.teacher_path), model=teacher_model) student_model = NAME2MODEL[args.student](num_classes=100) optimizer = torch.optim.SGD(student_model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [150, 180, 210], gamma=0.1) runner = DistilRunner(apply_probability_shift=args.probability_shift) runner.train(model={ "teacher": teacher_model, "student": student_model }, loaders=loaders, optimizer=optimizer, scheduler=scheduler, valid_metric="accuracy", minimize_valid_metric=False, logdir=args.logdir, callbacks=[ ControlFlowCallback(AttentionHiddenStatesCallback(), loaders="train"), ControlFlowCallback(KLDivCallback(temperature=4), loaders="train"), CriterionCallback(input_key="s_logits", target_key="targets", metric_key="cls_loss"), ControlFlowCallback( MetricAggregationCallback( prefix="loss", metrics={ "attention_loss": args.beta, "kl_div_loss": args.alpha, "cls_loss": 1 - args.alpha, }, mode="weighted_sum", ), loaders="train", ), AccuracyCallback(input_key="s_logits", target_key="targets"), OptimizerCallback(metric_key="loss", model_key="student"), SchedulerCallback(), ], valid_loader="valid", num_epochs=args.num_epochs, criterion=torch.nn.CrossEntropyLoss(), seed=args.seed)
def get_loaders_from_params( batch_size: int = 1, num_workers: int = 0, drop_last: bool = False, per_gpu_scaling: bool = False, loaders_params: Dict[str, Any] = None, samplers_params: Dict[str, Any] = None, initial_seed: int = 42, get_datasets_fn: Callable = None, **data_params, ) -> "OrderedDict[str, DataLoader]": """ Creates pytorch dataloaders from datasets and additional parameters. Args: batch_size (int): ``batch_size`` parameter from ``torch.utils.data.DataLoader`` num_workers (int): ``num_workers`` parameter from ``torch.utils.data.DataLoader`` drop_last (bool): ``drop_last`` parameter from ``torch.utils.data.DataLoader`` per_gpu_scaling (bool): boolean flag, if ``True``, uses ``batch_size=batch_size*num_available_gpus`` loaders_params (Dict[str, Any]): additional loaders parameters samplers_params (Dict[str, Any]): additional sampler parameters initial_seed (int): initial seed for ``torch.utils.data.DataLoader`` workers get_datasets_fn(Callable): callable function to get dictionary with ``torch.utils.data.Datasets`` **data_params: additional data parameters or dictionary with ``torch.utils.data.Datasets`` to use for pytorch dataloaders creation Returns: OrderedDict[str, DataLoader]: dictionary with ``torch.utils.data.DataLoader`` Raises: NotImplementedError: if datasource is out of `Dataset` or dict ValueError: if batch_sampler option is mutually exclusive with distributed """ default_batch_size = batch_size default_num_workers = num_workers loaders_params = loaders_params or {} assert isinstance(loaders_params, dict), (f"`loaders_params` should be a Dict. " f"Got: {loaders_params}") samplers_params = samplers_params or {} assert isinstance( samplers_params, dict), f"`samplers_params` should be a Dict. Got: {samplers_params}" distributed_rank = get_rank() distributed = distributed_rank > -1 if get_datasets_fn is not None: datasets = get_datasets_fn(**data_params) else: datasets = dict(**data_params) loaders = OrderedDict() for name, datasource in datasets.items(): # noqa: WPS426 assert isinstance( datasource, (Dataset, dict )), f"{datasource} should be Dataset or Dict. Got: {datasource}" loader_params = loaders_params.pop(name, {}) assert isinstance(loader_params, dict), f"{loader_params} should be Dict" sampler_params = samplers_params.pop(name, None) if sampler_params is None: if isinstance(datasource, dict) and "sampler" in datasource: sampler = datasource.pop("sampler", None) else: sampler = None else: sampler = SAMPLER.get_from_params(**sampler_params) if isinstance(datasource, dict) and "sampler" in datasource: datasource.pop("sampler", None) batch_size = loader_params.pop("batch_size", default_batch_size) num_workers = loader_params.pop("num_workers", default_num_workers) if per_gpu_scaling and not distributed: num_gpus = max(1, torch.cuda.device_count()) batch_size *= num_gpus num_workers *= num_gpus loader_params = { "batch_size": batch_size, "num_workers": num_workers, "pin_memory": torch.cuda.is_available(), "drop_last": drop_last, **loader_params, } if isinstance(datasource, Dataset): loader_params["dataset"] = datasource elif isinstance(datasource, dict): assert ( "dataset" in datasource), "You need to specify dataset for dataloader" loader_params = merge_dicts(datasource, loader_params) else: raise NotImplementedError if distributed: if sampler is not None: if not isinstance(sampler, DistributedSampler): sampler = DistributedSamplerWrapper(sampler=sampler) else: sampler = DistributedSampler(dataset=loader_params["dataset"]) loader_params["shuffle"] = name.startswith("train") and sampler is None loader_params["sampler"] = sampler if "batch_sampler" in loader_params: if distributed: raise ValueError("batch_sampler option is mutually " "exclusive with distributed") for k in ("batch_size", "shuffle", "sampler", "drop_last"): loader_params.pop(k, None) if "worker_init_fn" not in loader_params: loader_params["worker_init_fn"] = lambda x: set_global_seed( initial_seed + x) loaders[name] = DataLoader(**loader_params) return loaders
from torch import nn from torch.optim import Adam from torch.optim.lr_scheduler import ReduceLROnPlateau from utils.dataset import get_train_val_dataloaders from utils.callbacks import DiceCallback as MyDiceCallbak, IouCallback as MyIouCallback from utils.coord_conv import CoordConv from catalyst.dl import SupervisedRunner, DiceCallback, IouCallback from catalyst.utils import set_global_seed, prepare_cudnn import segmentation_models_pytorch as smp prepare_cudnn(True, True) set_global_seed(0) class Model(nn.Module): def __init__(self, encoder): super().__init__() self.coord_conv = CoordConv(3, 3, True, kernel_size=3, padding=1) self.coord_conv_decoder = CoordConv(16, 16, True, kernel_size=3, padding=1) self.model = smp.Unet(encoder, encoder_weights='imagenet', classes=4, activation=None)
def train_experiment(engine=None): with TemporaryDirectory() as logdir: utils.set_global_seed(RANDOM_STATE) # 1. generate data num_samples, num_features, num_classes = int(1e4), int(30), 3 X, y = make_classification( n_samples=num_samples, n_features=num_features, n_informative=num_features, n_repeated=0, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) X, y = torch.tensor(X), torch.tensor(y) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=64, num_workers=1, shuffle=True) # 2. model, optimizer and scheduler hidden_size, out_features = 20, 16 model = nn.Sequential( nn.Linear(num_features, hidden_size), nn.ReLU(), nn.Linear(hidden_size, out_features), ) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = HardTripletsSampler(norm_required=False) criterion = TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: features, targets = batch["features"].float( ), batch["targets"].long() embeddings = self.model(features) self.batch = { "embeddings": embeddings, "targets": targets, } callbacks = [ dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders="valid", model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=100, ), dl.ControlFlowCallbackWrapper( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk=(1, 3)), loaders="valid", ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, scheduler=scheduler, loaders={ "train": loader, "valid": loader }, verbose=False, valid_loader="valid", valid_metric="accuracy01", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) best_accuracy = max( epoch_metrics["valid"]["accuracy01"] for epoch_metrics in runner.experiment_metrics.values()) assert best_accuracy > 0.9
def main(): # Enable argument parsing for file paths args = vars(get_args()) train_images_path = args["train_images"] train_masks_path = args["train_masks"] test_images_path = args["test_images"] test_masks_path = args["test_masks"] # print out yaml file configuration dir_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(dir_path, "config/igvc.yaml") ARCH = yaml.safe_load(open(yaml_path, "r")) # Set a seed for reproducibility utils.set_global_seed(ARCH["train"]["seed"]) utils.prepare_cudnn(deterministic=ARCH["train"]["cudnn"]) # Set up U-Net with pretrained EfficientNet backbone model = smp.Unet( encoder_name=ARCH["encoder"]["name"], encoder_weights=ARCH["encoder"]["weight"], classes=ARCH["train"]["classes"], activation=ARCH["encoder"]["activation"], ) # Get Torch loaders loaders = get_loaders( images=np.load(train_images_path), masks=np.load(train_masks_path), image_arr_path=train_images_path, mask_arr_path=train_masks_path, random_state=ARCH["train"]["random_state"], valid_size=ARCH["train"]["valid_size"], batch_size=ARCH["train"]["batch_size"], num_workers=ARCH["train"]["num_workers"], ) # Optimize for cross entropy using Adam criterion = { "CE": CrossentropyND(), } optimizer = AdamW( model.parameters(), lr=ARCH["train"]["lr"], betas=(ARCH["train"]["betas_min"], ARCH["train"]["betas_max"]), eps=float(ARCH["train"]["eps"]), weight_decay=ARCH["train"]["w_decay"], amsgrad=ARCH["train"]["amsgrad"], ) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=ARCH["train"]["optim_factor"], patience=ARCH["train"]["optim_patience"], ) device = utils.get_device() print("Using device: {}".format(device)) print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}") runner = SupervisedRunner(device=device, input_key="image", input_target_key="mask") # Use Catalyst callbacks for metric calculations during training callbacks = [ CriterionCallback(input_key="mask", prefix="loss", criterion_key="CE"), MulticlassDiceMetricCallback(input_key="mask"), ] # Train and print model training logs runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=ARCH["train"]["logdir"], num_epochs=ARCH["train"]["epochs"], main_metric="loss", minimize_metric=ARCH["train"]["minimize_metric"], fp16=ARCH["train"]["fp16"], verbose=ARCH["train"]["verbose"], ) # Test model on test dataset test_data = SegmentationDataset(test_images_path, test_masks_path) infer_loader = DataLoader( test_data, batch_size=ARCH["test"]["batch_size"], shuffle=ARCH["test"]["shuffle"], num_workers=ARCH["test"]["num_workers"], ) # Get model predictions on test dataset predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"content/full_model2/checkpoints/best.pth", ), ))) save_result(predictions, test_data)
def work(self): args, config = self.parse_args_uargs() set_global_seed(args.seed) Experiment, R = import_experiment_and_runner(Path(args.expdir)) runner_params = config.pop('runner_params', {}) experiment = Experiment(config) runner: Runner = R(**runner_params) register() self.experiment = experiment self.runner = runner stages = experiment.stages[:] if self.master: task = self.task if not self.task.parent \ else self.task_provider.by_id(self.task.parent) task.steps = len(stages) self.task_provider.commit() self._checkpoint_fix_config(experiment) _get_callbacks = experiment.get_callbacks def get_callbacks(stage): res = self.callbacks() for k, v in _get_callbacks(stage).items(): res[k] = v self._checkpoint_fix_callback(res) return res experiment.get_callbacks = get_callbacks if experiment.logdir is not None: dump_environment(config, experiment.logdir, args.configs) if self.distr_info: info = yaml_load(self.task.additional_info) info['resume'] = { 'master_computer': self.distr_info['master_computer'], 'master_task_id': self.task.id - self.distr_info['rank'], 'load_best': True } self.task.additional_info = yaml_dump(info) self.task_provider.commit() experiment.stages_config = { k: v for k, v in experiment.stages_config.items() if k == experiment.stages[0] } runner.run_experiment(experiment, check=args.check) if self.master and self.trace: traced = trace_model_from_checkpoint(self.experiment.logdir, self) torch.jit.save(traced, self.trace) return {'stage': experiment.stages[-1], 'stages': stages}
# flake8: noqa import torch from torch.utils.data import DataLoader, TensorDataset from catalyst import dl, utils # data utils.set_global_seed(42) num_samples, num_features = int(32e1), int(1e1) X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 1) criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) class BatchOverfitCallbackCheck(dl.Callback): def __init__(self): super().__init__(order=dl.CallbackOrder.external) def on_loader_start(self, runner): # 320 samples with 32 batch size # -> 1 batch size = 32 # -> 0.1 portion = 32 assert len(runner.loaders[runner.loader_key]) == 32
def run_sampler(*, config, logdir, algorithm_fn, environment_fn, visualize, mode, seed=42, id=None, resume=None, db=True, exploration_power=1.0, sync_epoch=False): config_ = copy.deepcopy(config) id = 0 if id is None else id seed = seed + id set_global_seed(seed) db_server = DATABASES.get_from_params( **config.get("db", {}), sync_epoch=sync_epoch) if db else None env = environment_fn( **config_["environment"], visualize=visualize, mode=mode, sampler_id=id, ) agent = algorithm_fn.prepare_for_sampler(env_spec=env, config=config_) exploration_params = config_["sampler"].pop("exploration_params", None) exploration_handler = ExplorationHandler(env=env, *exploration_params) \ if exploration_params is not None \ else None if exploration_handler is not None: exploration_handler.set_power(exploration_power) seeds = dict((k, config_["sampler"].pop(f"{k}_seeds", None)) for k in ["train", "valid", "infer"]) seeds = seeds[mode] if algorithm_fn in OFFPOLICY_ALGORITHMS.values(): weights_sync_mode = "critic" if env.discrete_actions else "actor" elif algorithm_fn in ONPOLICY_ALGORITHMS.values(): weights_sync_mode = "actor" else: # @TODO: add registry for algorithms, trainers, samplers raise NotImplementedError() if mode in ["valid"]: sampler_fn = ValidSampler else: sampler_fn = Sampler monitoring_params = config.get("monitoring_params", None) sampler = sampler_fn( agent=agent, env=env, db_server=db_server, exploration_handler=exploration_handler, logdir=logdir, id=id, mode=mode, weights_sync_mode=weights_sync_mode, seeds=seeds, monitoring_params=monitoring_params, **config_["sampler"], ) if resume is not None: sampler.load_checkpoint(filepath=resume) sampler.run()
def main(args): if args.wandb: import wandb wandb.init() logdir = args.logdir + "/" + wandb.run.name else: logdir = args.logdir set_global_seed(args.seed) datasets = load_dataset(args.dataset) tokenizer = AutoTokenizer.from_pretrained(args.teacher_model) datasets = datasets.map( lambda e: tokenizer( e["text"], truncation=True, padding="max_length", max_length=128), batched=True, ) datasets = datasets.map(lambda e: {"labels": e["label"]}, batched=True) datasets.set_format( type="torch", columns=["input_ids", "token_type_ids", "attention_mask", "labels"], ) loaders = { "train": DataLoader(datasets["train"], batch_size=args.batch_size, shuffle=True), "valid": DataLoader(datasets["test"], batch_size=args.batch_size), } teacher_model = AutoModelForSequenceClassification.from_pretrained( args.teacher_model, num_labels=args.num_labels) unpack_checkpoint(torch.load(args.teacher_path), model=teacher_model) metric_callback = LoaderMetricCallback( metric=HFMetric(metric=load_metric("accuracy")), input_key="s_logits", target_key="labels", ) layers = [int(layer) for layer in args.layers.split(",")] slct_callback = ControlFlowCallback( HiddenStatesSelectCallback(hiddens_key="t_hidden_states", layers=layers), loaders="train", ) lambda_hiddens_callback = ControlFlowCallback( LambdaPreprocessCallback(lambda s_hiddens, t_hiddens: ( [c_s[:, 0] for c_s in s_hiddens], [t_s[:, 0] for t_s in t_hiddens], # tooks only CLS token )), loaders="train", ) mse_hiddens = ControlFlowCallback(MSEHiddenStatesCallback(), loaders="train") kl_div = ControlFlowCallback( KLDivCallback(temperature=args.kl_temperature), loaders="train") runner = HFDistilRunner() student_model = AutoModelForSequenceClassification.from_pretrained( args.student_model, num_labels=args.num_labels) callbacks = [ metric_callback, slct_callback, lambda_hiddens_callback, kl_div, OptimizerCallback(metric_key="loss"), CheckpointCallback(logdir=logdir, loader_key="valid", mode="model", metric_key="accuracy", minimize=False) ] if args.beta > 0: aggregator = ControlFlowCallback( MetricAggregationCallback( prefix="loss", metrics={ "kl_div_loss": args.alpha, "mse_loss": args.beta, "task_loss": 1 - args.alpha }, mode="weighted_sum", ), loaders="train", ) callbacks.append(mse_hiddens) callbacks.append(aggregator) else: aggregator = ControlFlowCallback( MetricAggregationCallback( prefix="loss", metrics={ "kl_div_loss": args.alpha, "task_loss": 1 - args.alpha }, mode="weighted_sum", ), loaders="train", ) callbacks.append(aggregator) runner.train(model=torch.nn.ModuleDict({ "teacher": teacher_model, "student": student_model }), loaders=loaders, optimizer=torch.optim.Adam(student_model.parameters(), lr=args.lr), callbacks=callbacks, num_epochs=args.num_epochs, valid_metric="accuracy", logdir=logdir, minimize_valid_metric=False, valid_loader="valid", verbose=args.verbose, seed=args.seed) if args.wandb: import csv import shutil with open(logdir + "/valid.csv") as fi: reader = csv.DictReader(fi) accuracy = [] for row in reader: if row["accuracy"] == "accuracy": continue accuracy.append(float(row["accuracy"])) wandb.log({"accuracy": max(accuracy[-args.num_epochs:])}) shutil.rmtree(logdir)