def config(): debug = False batch_size = 6 train_dataset = "mix_2_spk_min_tr" validate_dataset = "mix_2_spk_min_cv" # Start with an empty dict to allow tracking by Sacred trainer = { "model": { "factory": pt.models.bss.PermutationInvariantTrainingModel, "dropout_input": 0., "dropout_hidden": 0., "dropout_linear": 0. }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (300_000, "iteration"), "loss_weights": { "pit_ips_loss": 1.0, "pit_mse_loss": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = get_new_folder(path_template, mkdir=False) ex.observers.append( FileStorageObserver.create(Path(trainer['storage_dir']) / 'sacred'))
def test_fs_observer_equality(dir_obs): basedir, obs = dir_obs obs2 = FileStorageObserver.create(obs.basedir) assert obs == obs2 assert not obs != obs2 assert not obs == 'foo' assert obs != 'foo'
def test_fs_observer_equality(dir_obs): basedir, obs = dir_obs obs2 = FileStorageObserver.create(obs.basedir) assert obs == obs2 assert not obs != obs2 assert not obs == "foo" assert obs != "foo"
def config(): debug = False batch_size = 4 # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080) chunk_size = 32000 # 4s chunks @8kHz train_dataset = "mix_2_spk_min_tr" validate_dataset = "mix_2_spk_min_cv" target = 'speech_source' lr_scheduler_step = 2 lr_scheduler_gamma = 0.98 load_model_from = None database_json = None if database_json is None and JSON_BASE: database_json = Path(JSON_BASE) / 'wsj0_2mix_8k.json' if database_json is None: raise MissingConfigError( 'You have to set the path to the database JSON!', 'database_json') if not Path(database_json).exists(): raise InvalidConfigError('The database JSON does not exist!', 'database_json') feat_size = 64 encoder_window_size = 16 trainer = { "model": { "factory": padertorch.contrib.examples.source_separation.tasnet.TasNet, 'encoder': { 'factory': padertorch.contrib.examples.source_separation.tasnet.tas_coders.TasEncoder, 'window_length': encoder_window_size, 'feature_size': feat_size, }, 'decoder': { 'factory': padertorch.contrib.examples.source_separation.tasnet.tas_coders.TasDecoder, 'window_length': encoder_window_size, 'feature_size': feat_size, }, }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (100, "epoch"), "loss_weights": { "si-sdr": 1.0, "log-mse": 0.0, "log1p-mse": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = pt.io.get_new_storage_dir(experiment_name) ex.observers.append(FileStorageObserver( Path(trainer['storage_dir']) / 'sacred') )
def test_no_sources(tmpdir, tmpfile, sample_run): obs = FileStorageObserver(tmpdir, copy_sources=False) sample_run["ex_info"]["sources"] = [[tmpfile.name, tmpfile.md5sum]] obs.started_event(**sample_run) assert not os.path.exists(tmpdir / "_sources") # Test the test: that the source would otherwise have been created. obs = FileStorageObserver(tmpdir, copy_sources=True) sample_run["_id"] = sample_run["_id"] + "_2" obs.started_event(**sample_run) name, _ = os.path.splitext(os.path.basename(tmpfile.name)) assert os.path.exists(tmpdir / "_sources") assert any(x.startswith(name) for x in os.listdir(tmpdir / "_sources"))
def test_fs_observer_resource_event_does_not_duplicate(dir_obs, sample_run, tmpfile): basedir, obs = dir_obs obs2 = FileStorageObserver(obs.basedir) obs.started_event(**sample_run) obs.resource_event(tmpfile.name) # let's have another run from a different observer sample_run["_id"] = None _id = obs2.started_event(**sample_run) run_dir = basedir.join(str(_id)) obs2.resource_event(tmpfile.name) res_dir = basedir.join("_resources") assert res_dir.exists() assert len(res_dir.listdir()) == 1 assert res_dir.listdir()[0].read() == tmpfile.content run = json.loads(run_dir.join("run.json").read()) assert len(run["resources"]) == 1 assert run["resources"][0] == [tmpfile.name, res_dir.listdir()[0].strpath]
def config(): debug = False batch_size = 4 # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080) chunk_size = 32000 # 4s chunks @8kHz train_datasets = ["mix_2_spk_min_tr", "mix_3_spk_min_tr"] validate_datasets = ["mix_2_spk_min_cv", "mix_3_spk_min_cv"] target = 'speech_source' lr_scheduler_step = 2 lr_scheduler_gamma = 0.98 load_model_from = None database_jsons = [] # if not database_jsons: # raise MissingConfigError( # 'You have to set the path to the database JSON!', 'database_jsons') # Start with an empty dict to allow tracking by Sacred trainer = { "model": { "factory": 'padertorch.contrib.examples.or_pit.or_pit.OneAndRestPIT', "separator": { "factory": 'padertorch.contrib.examples.tasnet.tasnet.TasNet' } }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (100_000, "iteration"), "loss_weights": { "si-sdr": 0.0, "log-mse": 1.0, "si-sdr-grad-stop": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = get_storage_dir() ex.observers.append( FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))
def config(): debug = False batch_size = 6 database_json = None # Path to WSJ0_2mix .json if database_json is None and JSON_BASE: database_json = Path(JSON_BASE) / 'wsj0_2mix_8k.json' if database_json is None: raise MissingConfigError( 'You have to set the path to the database JSON!', 'database_json') if not Path(database_json).exists(): raise InvalidConfigError('The database JSON does not exist!', 'database_json') train_dataset = "mix_2_spk_min_tr" validate_dataset = "mix_2_spk_min_cv" # Dict describing the model parameters, to allow changing the parameters from the command line. # Configurable automatically inserts the default values of not mentioned parameters to the config.json trainer = { "model": { "factory": pt.contrib.examples.source_separation.pit.model. PermutationInvariantTrainingModel, "dropout_input": 0., "dropout_hidden": 0., "dropout_linear": 0. }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (300_000, "iteration"), "loss_weights": { "pit_ips_loss": 1.0, "pit_mse_loss": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = pt.io.get_new_storage_dir(experiment_name) ex.observers.append( FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))
def test_fs_observer_resource_event_does_not_duplicate(dir_obs, sample_run, tmpfile): basedir, obs = dir_obs obs2 = FileStorageObserver.create(obs.basedir) obs.started_event(**sample_run) obs.resource_event(tmpfile.name) # let's have another run from a different observer sample_run['_id'] = None _id = obs2.started_event(**sample_run) run_dir = basedir.join(str(_id)) obs2.resource_event(tmpfile.name) res_dir = basedir.join('_resources') assert res_dir.exists() assert len(res_dir.listdir()) == 1 assert res_dir.listdir()[0].read() == tmpfile.content run = json.loads(run_dir.join('run.json').read()) assert len(run['resources']) == 1 assert run['resources'][0] == [tmpfile.name, res_dir.listdir()[0].strpath]
def config(): debug = False batch_size = 6 database_json = "" # Path to WSJ0_2mix .json if "WSJ0_2MIX" in os.environ: database_json = os.environ.get("WSJ0_2MIX") assert len(database_json) > 0, 'Set path to database Json on the command line or set environment variable WSJ0_2MIX' train_dataset = "mix_2_spk_min_tr" validate_dataset = "mix_2_spk_min_cv" # dict describing the model parameters, to allow changing the paramters from the command line. # Configurable automatically inserts default values of not mentioned parameters to the config.json trainer = { "model": { "factory": pt.contrib.examples.pit.model.PermutationInvariantTrainingModel, "dropout_input": 0., "dropout_hidden": 0., "dropout_linear": 0. }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (300_000, "iteration"), "loss_weights": { "pit_ips_loss": 1.0, "pit_mse_loss": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = get_new_folder(path_template, mkdir=False) ex.observers.append(FileStorageObserver.create( Path(trainer['storage_dir']) / 'sacred') )
def test_observer_equality(tmpdir): observer_1 = FileStorageObserver.create(str(tmpdir / "a")) observer_2 = FileStorageObserver.create(str(tmpdir / "b")) observer_3 = FileStorageObserver.create(str(tmpdir / "a")) assert observer_1 == observer_3 assert observer_1 != observer_2
def dir_obs(tmpdir): basedir = tmpdir.join("file_storage") return basedir, FileStorageObserver.create(basedir.strpath)
def dir_obs(tmpdir): return tmpdir, FileStorageObserver.create(tmpdir.strpath)
def dir_obs(tmpdir): basedir = tmpdir.join('file_storage') return basedir, FileStorageObserver.create(basedir.strpath)
from zeiss_umbrella.fundus.setting_parser import get_baseline, get_loss from zeiss_umbrella.fundus.data import get_fundus_train from zeiss_umbrella.fundus.train import test_model import torch import json import os from zeiss_umbrella.config import FILE_OBSERVER_BASE_PATH, FILE_OBSERVER_RESOURCE_PATH, FILE_OBSERVER_SOURCE_PATH import sacred from sacred import Experiment from sacred.observers.file_storage import FileStorageObserver ex = Experiment('fundus training') template = "efficientnetb0" ex.observers.append( FileStorageObserver(FILE_OBSERVER_BASE_PATH, FILE_OBSERVER_RESOURCE_PATH, FILE_OBSERVER_SOURCE_PATH, template)) @ex.config def my_config(): experiments_path = '/home/jiwu/interpretable-fundus/fundus_experiments' exp_dir = 'corruption_experiments/efficientnetb0_corruption_imbalance_3' weights_dir = 'corruption_experiments/efficientnetb0_corruption_imbalance_3/train_efficientnetb0_normalize_baseline_unfreezed_crossentropy_parallel_corrupted' device = 'cuda:0' exp_dir = os.path.join(experiments_path, exp_dir) weights_dir = os.path.join(experiments_path, weights_dir) with open(os.path.join(exp_dir, 'config.json')) as f: config = json.load(f) valid_corruption = True
import sacred from sacred.utils import apply_backspaces_and_linefeeds from sacred.observers.file_storage import FileStorageObserver GRAMTOOLS_INSTALL_PATH = '/home/robyn/Documents/gramtools' GENERATE_PRG_SCRIPT_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH, 'utils/vcf_to_linear_prg.pl') GENERATE_KMERS_SCRIPT_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH, 'utils/variantKmers.py') MAP_READS_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH, 'bin', 'gramtools') experiment = sacred.Experiment() experiment.captured_out_filter = apply_backspaces_and_linefeeds file_observer = FileStorageObserver.create('gramtools_runs') file_observer.save_sources = lambda x: None experiment.observers.append(file_observer) def generate_paths(vcf_path, fasta_path): """Generate and return all file paths associated with experiment.""" vcf_path = os.path.abspath(vcf_path) file_observer.run_entry['artifacts'].append(vcf_path) fasta_path = os.path.abspath(fasta_path) file_observer.run_entry['artifacts'].append(fasta_path) run_path = os.path.abspath(file_observer.dir) data_path = os.path.join(run_path, 'data')
def test_no_duplicate(tmpdir, sample_run): obs = FileStorageObserver(tmpdir, copy_artifacts=False) file = Path(str(tmpdir / "koko.txt")) file.touch() obs.started_event(**sample_run) obs.resource_event(str(file)) assert not os.path.exists(tmpdir / "_resources") # Test the test: that the resource would otherwise have been created. obs = FileStorageObserver(tmpdir, copy_artifacts=True) sample_run["_id"] = sample_run["_id"] + "_2" obs.started_event(**sample_run) obs.resource_event(str(file)) assert os.path.exists(tmpdir / "_resources") assert any(x.startswith("koko") for x in os.listdir(tmpdir / "_resources"))
def config(): debug = False batch_size = 4 # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080) chunk_size = 32000 # 4s chunks @8kHz train_datasets = ["mix_2_spk_min_tr", "mix_3_spk_min_tr"] validate_datasets = ["mix_2_spk_min_cv", "mix_3_spk_min_cv"] target = 'speech_source' lr_scheduler_step = 2 lr_scheduler_gamma = 0.98 load_model_from = None database_jsons = [] if len(database_jsons) == 0 and JSON_BASE: database_jsons = [ Path(JSON_BASE) / 'wsj0_2mix_8k.json', Path(JSON_BASE) / 'wsj0_3mix_8k.json', ] # if not database_jsons: # raise MissingConfigError( # 'You have to set the path to the database JSON!', 'database_jsons') # Start with an empty dict to allow tracking by Sacred trainer = { "model": { "factory": pt.contrib.examples.source_separation.or_pit.OneAndRestPIT, "separator": { "factory": pt.contrib.examples.source_separation.tasnet.TasNet, 'encoder': { 'factory': pt.contrib.examples.source_separation.tasnet.tas_coders. TasEncoder, 'window_length': 16, 'feature_size': 64, }, 'separator': { 'factory': pt.modules.dual_path_rnn.DPRNN, 'input_size': 64, 'rnn_size': 128, 'window_length': 100, 'hop_size': 50, 'num_blocks': 6, }, 'decoder': { 'factory': pt.contrib.examples.source_separation.tasnet.tas_coders. TasDecoder, 'window_length': 16, 'feature_size': 64, }, } }, "storage_dir": None, "optimizer": { "factory": pt.optimizer.Adam, "gradient_clipping": 1 }, "summary_trigger": (1000, "iteration"), "stop_trigger": (100_000, "iteration"), "loss_weights": { "si-sdr": 0.0, "log-mse": 1.0, "si-sdr-grad-stop": 0.0, } } pt.Trainer.get_config(trainer) if trainer['storage_dir'] is None: trainer['storage_dir'] = get_new_storage_dir(experiment_name) ex.observers.append( FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))