def test_experiment_restoration(nested_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath # create an experiment experiment = Experiment(nested_dict_config, experiments_dir=experiments_dir) experiment.register_directory("temp") with pytest.raises(ValueError): # since the experiment with the same identifier has been # already created, experiment raises an error experiment = Experiment(nested_dict_config, experiments_dir=experiments_dir) # test restoration from identifier experiment = Experiment(resume_from=experiment.config.identifier, experiments_dir=experiments_dir) assert experiment.config.to_dict() == nested_dict_config # test that `temp` is registered after restoration assert os.path.isdir(experiment.temp) # test restoration from directory experiment = Experiment(resume_from=os.path.join( experiments_dir, experiment.config.identifier)) assert experiment.config.to_dict() == nested_dict_config # test that `temp` is registered after restoration assert os.path.isdir(experiment.temp)
def test_experiment_register_directory(nested_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath experiment = Experiment(nested_dict_config, experiments_dir=experiments_dir) experiment.register_directory("temp") target = os.path.join(experiment.experiment_dir, "temp") assert os.path.isdir(target) assert experiment.temp == target
def do_exp(model_name, params, _dir, preproc_name, padding): """Perform an experiment using the specified parameters. Args: params (dict): specific hyperparameter set to use. Returns: (dict): score found using specified hyperparameters. """ model = get_model(model_name) preproc = get_filter(preproc_name) X_train, y_train = get_data(dataset='train', preprocessor=preproc, TIMIT_root='../../TIMIT/TIMIT', padding=padding) X_test, y_test = get_data(dataset='val', preprocessor=preproc, TIMIT_root='../../TIMIT/TIMIT', padding=padding) try: with Experiment(config=params, experiments_dir=_dir) as experiment: score = run_model(model, X_train, y_train, X_test, y_test, params) experiment.register_result('score', score) except ValueError: # if something breaks, return the worst score possible return 0 return score
def test_experiment_commit_hash_saving(nested_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath experiment = Experiment(nested_dict_config, experiments_dir=experiments_dir) assert os.path.isfile( os.path.join(experiment.experiment_dir, "commit_hash"))
def test_experiment_initialization(nested_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath experiment = Experiment(nested_dict_config, experiments_dir=experiments_dir) config = Config.from_json( os.path.join(experiments_dir, experiment.config.identifier, "config.json")) assert config.to_dict() == nested_dict_config
def test_experiment_logging(nested_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath with Experiment(nested_dict_config, experiments_dir=experiments_dir) as experiment: print("test") with open(experiment.log_file, "r") as f: assert f.readlines()[-1].strip() == "test" print("test2") # check that nothing is logged when print is called # outside with block with open(experiment.log_file, "r") as f: assert f.readlines()[-1].strip() == "test"
def single_experiment(model, data, params): """Apply the model to the data and store the results using mag. Args: model (str): name of callable model constructor in the current namespace. data (str): specify the TIMIT data sets to use. If specified, must be one of {'full', 'toy'}. params (dict): dictionary with parameters for model. """ # prepare the experiment directory ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S') _dir = "./results/" + model + "_" + st if data is not None and data.lower() == 'toy': _dir = "./results/TOY_" + model + "_" + st + '/' else: _dir = "./results/" + model + "_" + st + '/' # select the model to be used model = get_model(model) # get the specified dataset if data is None: data = "full" if data.lower() == "toy": X_train, y_train = get_data(dataset='toy', preprocessor=mel, TIMIT_root='../TIMIT/TIMIT', use_cache=True) X_test, y_test = X_train, y_train elif data == "full": X_train, y_train = get_data(dataset='train', preprocessor=mel, TIMIT_root='../TIMIT/TIMIT', use_cache=True) X_test, y_test = get_data(dataset='val', preprocessor=mel, TIMIT_root='../TIMIT/TIMIT', use_cache=True) else: raise ValueError("data must be one of {'toy', 'full'}") with Experiment(config=params, experiments_dir=_dir) as experiment: score = run_model(model, X_train, y_train, X_test, y_test, params) experiment.register_result('score', score)
def test_experiment_register_result(simple_dict_config, tmpdir): experiments_dir = tmpdir.join("experiments").strpath experiment = Experiment(simple_dict_config, experiments_dir=experiments_dir) experiment.register_result("fold1.accuracy", 0.97) experiment.register_result("fold2.accuracy", 0.99) experiment.register_result("fold1.loss", 0.03) experiment.register_result("fold2.loss", 0.01) experiment.register_result("overall_accuracy", 0.98) results = experiment.results.to_dict() assert results["fold1"]["accuracy"] == 0.97 assert results["fold2"]["accuracy"] == 0.99 assert results["fold1"]["loss"] == 0.03 assert results["fold2"]["loss"] == 0.01 assert results["overall_accuracy"] == 0.98
def do_exp(params, _dir, X_train, y_train, X_test, y_test, result_dict): """Perform an experiment using the specified parameters. Args: params (dict): specific hyperparameter set to use. Returns: (float): score found using specified hyperparameters. """ try: with Experiment(config=params, experiments_dir=_dir) as experiment: score = run_model(model, X_train, y_train, X_test, y_test, params) # save the params and score for k in params.keys(): result_dict[k].append(params[k]) experiment.register_result('score', score) except ValueError: # if something breaks, return the worst score possible return np.inf return -score # pylint: disable=invalid-unary-operand-type
"--num_workers", type=int, default=4, help="number of workers for data loader", ) parser.add_argument( "--label", type=str, default="finetuned_hierarchical_cnn_classifier", help="optional label", ) args = parser.parse_args() class_map = load_json(args.classmap) pretrained = Experiment(resume_from=args.pretrained_model) with Experiment({ "network": { "num_conv_blocks": pretrained.config.network.num_conv_blocks, "start_deep_supervision_on": pretrained.config.network.start_deep_supervision_on, "conv_base_depth": pretrained.config.network.conv_base_depth, "growth_rate": pretrained.config.network.growth_rate, "dropout": args.dropout, "output_dropout": args.output_dropout, }, "data": { "_n_folds": args.n_folds, "_kfold_seed": args.kfold_seed, "n_fft": pretrained.config.data.n_fft,
"bert_model": args.bert_model.replace("-", "_"), "batch_accumulation": args.batch_accumulation, "batch_size": args.batch_size, "warmup": args.warmup, "lr": args.lr, "folds": args.folds, "max_sequence_length": args.max_sequence_length, "max_title_length": args.max_title_length, "max_question_length": args.max_question_length, "max_answer_length": args.max_answer_length, "head_tail": args.head_tail, "label": args.label, "_pseudo_file": args.pseudo_file, "model_type": args.model_type, } experiment = Experiment(config, implicit_resuming=args.use_folds is not None) experiment.register_directory("checkpoints") experiment.register_directory("predictions") def seed_everything(seed: int): random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True logging.getLogger("transformers").setLevel(logging.ERROR) seed_everything(args.seed)
"_bert_model": args.bert_model, "batch_accumulation": args.batch_accumulation, "batch_size": args.batch_size, "warmup": args.warmup, "lr": args.lr, "folds": args.folds, "max_sequence_length": args.max_sequence_length, "max_title_length": args.max_title_length, "max_question_length": args.max_question_length, "max_answer_length": args.max_answer_length, "head_tail": args.head_tail, "label": args.label, "split_pseudo": args.split_pseudo, "_pseudo_file": args.pseudo_file, } experiment = Experiment(config) experiment.register_directory("checkpoints") experiment.register_directory("predictions") def seed_everything(seed: int): random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True logging.getLogger("transformers").setLevel(logging.ERROR) seed_everything(args.seed)
with Experiment( { "network": { "num_conv_blocks": args.num_conv_blocks, "start_deep_supervision_on": args.start_deep_supervision_on, "conv_base_depth": args.conv_base_depth, "growth_rate": args.growth_rate, "output_dropout": args.output_dropout, "aggregation_type": args.aggregation_type }, "data": { "features": args.features, "_n_folds": args.n_folds, "_kfold_seed": args.kfold_seed, "_input_dim": audio_transform.n_features, "p_mixup": args.p_mixup, "p_aug": args.p_aug, "max_audio_length": args.max_audio_length, "_train_df": args.train_df, "_train_data_dir": args.train_data_dir }, "train": { "accumulation_steps": args.accumulation_steps, "batch_size": args.batch_size, "learning_rate": args.lr, "scheduler": args.scheduler, "optimizer": args.optimizer, "epochs": args.epochs, "_save_every": args.save_every, "weight_decay": args.weight_decay, "switch_off_augmentations_on": args.switch_off_augmentations_on }, "label": args.label }, implicit_resuming=args.resume) as experiment:
help="How frequenlty to plot samples from current distribution." ) parser.add_argument( "--plot_points", type=int, default=1000, help="How many to points to generate for one plot." ) args = parser.parse_args() torch.manual_seed(42) with Experiment({ "batch_size": 40, "iterations": 10000, "initial_lr": 0.01, "lr_decay": 0.999, "flow_length": 16, "name": "planar" }) as experiment: config = experiment.config experiment.register_directory("samples") experiment.register_directory("distributions") flow = NormalizingFlow(dim=2, flow_length=config.flow_length) bound = FreeEnergyBound(density=p_z) optimizer = optim.RMSprop(flow.parameters(), lr=config.initial_lr) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, config.lr_decay) plot_density(p_z, directory=experiment.distributions)
help="whether to train on cuda or cpu", choices=("cuda", "cpu")) parser.add_argument( "--num_workers", type=int, default=4, help="number of workers for data loader", ) args = parser.parse_args() class_map = load_json(args.classmap) train_df = pd.read_csv(args.train_df) with Experiment(resume_from=args.experiment) as experiment: config = experiment.config audio_transform = AudioFeatures(config.data.features) splits = list( train_validation_data_stratified(train_df.fname, train_df.labels, class_map, config.data._n_folds, config.data._kfold_seed)) all_labels = np.zeros(shape=(len(train_df), len(class_map)), dtype=np.float32) all_predictions = np.zeros(shape=(len(train_df), len(class_map)), dtype=np.float32)
audio_transform = AudioFeatures(args.features) with Experiment({ "network": { "prediction_steps": args.prediction_steps, "rnn_size": args.rnn_size, "rnn_layers": args.rnn_layers }, "data": { "features": args.features, "_n_folds": args.n_folds, "_kfold_seed": args.kfold_seed, "_input_dim": audio_transform.n_features, "p_aug": args.p_aug, "max_audio_length": args.max_audio_length }, "train": { "_proj_interval": args.proj_interval, "accumulation_steps": args.accumulation_steps, "batch_size": args.batch_size, "learning_rate": args.lr, "scheduler": args.scheduler, "optimizer": args.optimizer, "epochs": args.epochs, "_save_every": args.save_every, "weight_decay": args.weight_decay, "switch_off_augmentations_on": args.switch_off_augmentations_on }, "label": args.label }) as experiment: config = experiment.config
from torch.utils.data import DataLoader from transformers import BertTokenizer, RobertaTokenizer mag.use_custom_separator("-") parser = argparse.ArgumentParser() parser.add_argument("--experiment", type=str, required=True) parser.add_argument("--checkpoint", type=str, required=True) parser.add_argument("--bert_model", type=str, required=True) parser.add_argument("--dataframe", type=str, required=True) parser.add_argument("--output_dir", type=str, required=True) args = parser.parse_args() experiment = Experiment(resume_from=args.experiment) config = experiment.config logging.getLogger("transformers").setLevel(logging.ERROR) test_df = pd.read_csv(args.dataframe) original_args = argparse.Namespace( folds=config.folds, lr=config.lr, batch_size=config.batch_size, seed=config._seed, bert_model=args.bert_model, num_classes=30, target_columns=target_columns, input_columns=input_columns,
args = parser.parse_args() svm_config = { "model": { "C": args.C, "gamma": args.gamma }, "crossval": { "n_folds": args.cv, "_random_seed": args.cv_random_seed } } iris = load_iris() with Experiment(config=svm_config) as experiment: config = experiment.config model = SVC(C=config.model.C, gamma=config.model.gamma) score = cross_val_score( model, X=iris.data, y=iris.target, scoring="accuracy", cv=StratifiedKFold( config.crossval.n_folds, shuffle=True, random_state=config.crossval._random_seed), ).mean() print("Accuracy is", round(score, 4)) experiment.register_result("accuracy", score)
default=0.9, help="Momentum value used in optimizer.") args = parser.parse_args() with Experiment({ "_n_classes": 10, "network": { "n_layers": args.n_layers, "hidden_units": args.hidden_units, "activation": args.activation }, "train": { "batch_size": args.batch_size, "n_epochs": args.n_epochs, # to exclude the parameter from the identifier, # start its name from the underscore "_buffer_size": 128, "learning_rate": args.lr, "momentum": args.momentum }, "validation": { "_batch_size": 128 } }) as experiment: classifier = MnistClassifier(mnist, experiment) classifier.fit() print("Finished!") # will be logged to a file
with Experiment({ "data": { "_input_dim": 64, "_kfold_seed": 42, "_n_folds": 5, "_train_data_dir": "data/Training_Data/", "_train_df": "data/train_df.csv", "features": "mel_1024_512_64", "max_audio_length": 3, "p_aug": 0.3, "p_mixup": 0.0 }, "label": "2d_cnn", "network": { "aggregation_type": "max", "conv_base_depth": 32, "growth_rate": 1.3, "num_conv_blocks": 5, "output_dropout": 0.5, "start_deep_supervision_on": 2 }, "train": { "_save_every": 5, "accumulation_steps": 1, "batch_size": 50, "epochs": 7, "learning_rate": 0.001, "optimizer": "adam", "scheduler": "1cycle_0.0001_0.005", "switch_off_augmentations_on": 6, "weight_decay": 0.0 } }) as experiment:
with Experiment( { "network": { "backbone": args.backbone, "output_dropout": args.output_dropout, }, "data": { "features": args.features, "_n_folds": args.n_folds, "_kfold_seed": args.kfold_seed, "_input_dim": audio_transform.n_features, "_n_classes": len(class_map), "_holdout_size": args.holdout_size, "p_mixup": args.p_mixup, "p_aug": args.p_aug, "max_audio_length": args.max_audio_length, "noisy": args.noisy_train_df is not None, "_train_df": args.train_df, "_train_data_dir": args.train_data_dir, "_noisy_train_df": args.noisy_train_df, "_noisy_train_data_dir": args.noisy_train_data_dir, "_share_noisy": args.share_noisy }, "train": { "accumulation_steps": args.accumulation_steps, "batch_size": args.batch_size, "learning_rate": args.lr, "scheduler": args.scheduler, "optimizer": args.optimizer, "epochs": args.epochs, "_save_every": args.save_every, "weight_decay": args.weight_decay, "switch_off_augmentations_on": args.switch_off_augmentations_on }, "label": args.label }, implicit_resuming=args.resume) as experiment: