def main(): print("START") gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # load the data and labels train, dev, test = load_NoXi_data_all_languages() # shuffle one more time train data train = train.sample(frac=1).reset_index(drop=True) sweep_config = { 'method': 'random', 'metric': { 'name': 'val_loss', 'goal': 'minimize' }, 'parameters': { 'optimizer': { 'values': ['Adam', 'SGD', 'Nadam'] }, 'learning_rate_max': { 'distribution': 'uniform', 'max': 0.001, 'min': 0.0001 }, 'learning_rate_min': { 'distribution': 'uniform', 'max': 0.00001, 'min': 0.000001 }, 'lr_scheduller': { 'values': ['Cyclic', 'reduceLRonPlateau'] }, 'augmentation_rate': { 'values': [0.1, 0.2, 0.3] } } } # categorical crossentropy sweep_id = wandb.sweep(sweep_config, project='VGGFace2_FtF_training') wandb.agent( sweep_id, function=lambda: train_model(train, dev, 'categorical_crossentropy'), count=30, project='VGGFace2_FtF_training') tf.keras.backend.clear_session() gc.collect() # focal loss print("Wandb with focal loss") sweep_id = wandb.sweep(sweep_config, project='VGGFace2_FtF_training') wandb.agent(sweep_id, function=lambda: train_model(train, dev, 'focal_loss'), count=30, project='VGGFace2_FtF_training') tf.keras.backend.clear_session() gc.collect()
def sweep(count=10): sweep_config = { 'method': 'random', 'metric': { 'name': 'avg_reward', 'goal': 'maximize' }, 'parameters': { 'batch_size': { 'distribution': 'q_log_uniform', 'q': 1.0, 'min': math.log(800), 'max': math.log(33000) }, 'epochs': { 'values': [30, 40, 50, 80, 100] }, 'n_layers': { 'values': [1, 2, 3, 5] }, 'hidden_size': { 'distribution': 'q_log_uniform', 'q': 1.0, 'min': math.log(4), 'max': math.log(128) }, 'clip_ratio': { 'values': [0.02, 0.06, 0.1, 0.2, 0.3, 0.4] } } } sweep_id = wandb.sweep(sweep_config, project="lunar-lander") wandb.agent(sweep_id, function=wandb_train, count=count)
def main(): parser = argparse.ArgumentParser() parser.add_argument("sweep_config", type=str) parser.add_argument("train_script", type=str) parser.add_argument("project", type=str) args = parser.parse_args() wandb.init(project=args.project) with open(args.sweep_config) as file: config_dict = yaml.load(file, Loader=yaml.FullLoader) config_dict["program"] = args.train_script sweep_id = wandb.sweep(config_dict, project=args.project) sp = [] for node in node_list: sp.append( subprocess.Popen([ "srun", "--nodes=1", "--ntasks=1", "-w", node, "start-agent.sh", sweep_id, args.project, ])) exit_codes = [p.wait() for p in sp] # wait for processes to finish return exit_codes
def __init__(self, config_defaults, sweep_config, sweeps_project_name, env, test_name, training=False, testing=True, games_to_avg=50): self.config_defaults = config_defaults self.env = env self.test_name = test_name self.training = training self.testing = testing self.sweep_id = wandb.sweep(sweep_config, project=sweeps_project_name + '-' + test_name) #project="simpledrawer_test-" if not os.path.exists('tests'): os.mkdir('tests') tests_sweepsproj_name = os.path.join('tests', sweeps_project_name) if not os.path.exists(tests_sweepsproj_name): os.mkdir(tests_sweepsproj_name) self.models_path = os.path.join(tests_sweepsproj_name, 'models') if not os.path.exists(self.models_path): os.mkdir(self.models_path) if not os.path.exists(os.path.join(self.models_path, test_name)): os.mkdir(os.path.join(self.models_path, test_name)) self.plots_path = os.path.join(tests_sweepsproj_name, 'plots') if not os.path.exists(self.plots_path): os.mkdir(self.plots_path) if not os.path.exists(os.path.join(self.plots_path, test_name)): os.mkdir(os.path.join(self.plots_path, test_name))
def papersweep_exec(input_nb:Param("Input notebook", str), sweep_config:Param("YAML file with the sweep config", str), entity:Param("wandb entity", str), project:Param("wandb project", str), pm_params:Param("YAML file with papermill parameters", str)=None, sweep_id:Param("Sweep ID. This option omits `sweep_config`", str)=None, login_key:Param("Login key for wandb", str)=None): """ Executes the notebook `input_nb` with the sweep configuration given in `sweep_config`. Optionally, in case the notebook has one cell tagged as 'parameters', those will be injected from the file `pm_params`. """ if login_key: wandb.login(key=login_key) with maybe_open(sweep_config, 'r') as f: sc = yaml.safe_load(f) if pm_params: with maybe_open(pm_params, 'r') as f: _pm_params = yaml.safe_load(f) else: _pm_params = None sid = wandb.sweep(sweep=sc, entity=entity, project=project) if not sweep_id else sweep_id sweep_agent = wandb.agent(sid, function=partial(pm.execute_notebook, input_path=input_nb, output_path='__.ipynb', parameters=_pm_params)) return sid
def sweep(sweep_config): # load data # data = pd.read_csv(data_path, sep=',', header=0) # train, test, label_encoder = train_test_from_df_categorical(data, 'cls', 0.9, seed) sweep_id = wandb.sweep(sweep_config, project='network_from_scratch') wandb.agent(sweep_id, function=train)
def finetuneBertOnMultiClassClsTask(self): ##################################################################################### # This method evaluates the finetuned BERT model on multi-class classification task. ##################################################################################### global SECTOR_LABELS, WAND_PROJECT_NAME, WAND_API_KEY try: # Build training and eval datasets if self.__buildTrainingAndEvalDatasets() is False: log.error(f"Error building training / eval dataset to train / eval finetuned BERT embeddings on multi-classification task! Cannot continue with evaluation.") return time.sleep(60) # Check if CUDA is available for doing training on a GPU system if torch.cuda.is_available() is False: log.error(f"CUDA libs not found. A new language model can be trained from scratch only on a GPU system with CUDA libs!") # Build WandB sweep params that are used to automatically pick up the hyper-params during training subprocess.run(["wandb", "login", WAND_API_KEY]) time.sleep(1) sweep_defaults = self.wandbDefaults sweep_id = wandb.sweep(self.wandbConfig, project=WAND_PROJECT_NAME) # Start training startTime = time.time() def train(): wandb.init(WAND_PROJECT_NAME) modelArgs = { "max_seq_length": self.maxSeqLength, "output_dir": self.modelOutputDir, "overwrite_output_dir": True, "best_model_dir": self.bestModelOutputDir, "wandb_project": WAND_PROJECT_NAME, "num_training_epochs": wandb.config.epochs, "learning_rate": wandb.config.learning_rate, "do_lower_case": True, "cache_dir": self.modelCacheDir, "encoding": "utf-8", "train_batch_size": 5, "eval_batch_size": 5, "evaluate_during_training_steps": 50, "evaluate_during_training_verbose": True, "logging_steps": 5, "sliding_window": True, "reprocess_input_data": True, "evaluate_during_training": True, "use_multiprocessing": True, "labels_list": SECTOR_LABELS } model = ClassificationModel(self.modelType, self.modelNameOrPath, args=modelArgs, sweep_config=wandb.config, use_cuda=torch.cuda.is_available(), num_labels=len(SECTOR_LABELS), ) # Training and evaluation try: log.info(f"Started training/finetuning BERT on multi-class classification task..") model.train_model(train_df=self.trainDataset, eval_df=self.evalDataset, show_running_loss=True, output_dir=self.modelOutputDir, mcc=sklearn.metrics.matthews_corrcoef, acc=sklearn.metrics.balanced_accuracy_score, ) log.info(f"Finished finetuning and evaluating our fine-tuned model on multi-class classification task. Check the folder '{self.modelOutputDir}' for finetuned weights.") log.info(f"It took {round((time.time() - startTime) / 3600, 1)} hours to finetune and evaluate our fine-tuned model on multi-class classification task.") except: exc_type, exc_value, exc_traceback = sys.exc_info() err = f"Error occurred while training and evaluating the finetuned model on multi-class classification task. Error is: {exc_type}; {exc_value}." log.error(err) wandb.join() wandb.agent(sweep_id, function=train) except: exc_type, exc_value, exc_traceback = sys.exc_info() err = f"** ERROR ** occurred while finetuning our BERT model on multi-classification task and evaluating it. Error is: {exc_type}; {exc_value}." raise Exception(err)
def test_create_sweep(live_mock_server, test_settings): live_mock_server.set_ctx({"resume": True}) sweep_config = { "name": "My Sweep", "method": "grid", "parameters": {"parameter1": {"values": [1, 2, 3]}}, } sweep_id = wandb.sweep(sweep_config) assert sweep_id == "test"
def run_sweep(config_yaml): """ Set up and run Weights and Biases hyperparameter sweep from config file. """ print("Setting sweep") sweep_id = wandb.sweep(yaml.load(config_yaml)) print("Setting agent") wandb.agent(sweep_id, wandb_run)
def __init__(self, config_file: str, base_config: str, id: Optional[str], gpus: str) -> None: assert gpus, 'No GPUs specified, specify with --gpus or by setting the CUDA_VISIBLE_DEVICES env var.' self.gpus = gpus.split(',') self.config = Config(config_file) self.base_config = base_config self.sweep_id = id or wandb.sweep(self.config.dict) self.tuner = wandb.controller(self.sweep_id) self.tuner.configure(self.config.dict) self.processes: Dict[str, Process] = {}
def sweep(self, tags = [], saveroot = None): ''' run sweep Parameters ---------- tags : list, optional list of strings to add as tags to sweep runs saveroot : str, optional root name to use for saving ''' # setup root saving directoring if saveroot is not None: if not os.path.exists(saveroot): os.mkdir(saveroot) # define internal train function to wrap Train def train(): # setup wandb config_defaults = {'kernel': 5, 'filters': 8, 'fc_size': 32, 'drop_rate': 0.1, 'batch_size': 16, 'lr': 1e-3, 'weight_decay': 1e-4} tags.append(datetime.today().strftime('%Y-%m-%d')) wandb.init(config = config_defaults, tags = tags) config = wandb.config # create run results directory save = False if saveroot is not None: runpath = os.path.join(saveroot, wandb.run.id) os.mkdir(runpath) save = True # instantiate trainer and run trainer = Train(self.trainX, self.trainY, self.valX, self.valY, testX = self.testX, testY = self.testY, Ylim = self.Ylim, kernel = config.kernel, filters = config.filters, drop_rate = config.drop_rate, epochs = self.epochs, early_stop = self.early_stop, fc_size = config.fc_size, batch_size = config.batch_size, lr = config.lr, weight_decay = config.weight_decay, verbose = False, mcnum = self.mcnum, regression = self.regression, seed = self.seed, wandb = wandb, save = save, savedir = runpath) trainer.train() # run sweep sweep_id = wandb.sweep(self.sweep_config, entity = self.entity, project = self.project) wandb.agent(sweep_id, train)
def sweep_bayes_nested(args): config = dict(method="bayes", metric=dict(name="feat1.val_acc", goal="maximize"), parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train_nested, count=9) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L)
def sweep_grid(args): config = dict(method="grid", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L)
def run_sweep(sweep_name: str, window_length: int): print("START OF SCRIPT111111111") #gpus = tf.config.experimental.list_physical_devices('GPU') #for gpu in gpus: # tf.config.experimental.set_memory_growth(gpu, True) # load the data and labels train, dev, test = load_data() gc.collect() sweep_config = { 'name': sweep_name, 'method': 'random', 'metric': { 'name': 'val_loss', 'goal': 'minimize' }, 'parameters': { 'optimizer': { 'values': ['Adam', 'SGD', 'Nadam'] }, 'learning_rate_max': { 'distribution': 'uniform', 'max': 0.01, 'min': 0.0001 }, 'learning_rate_min': { 'distribution': 'uniform', 'max': 0.0001, 'min': 0.000001 }, 'lr_scheduller': { 'values': ['Cyclic', 'reduceLRonPlateau'] }, 'num_layers': { 'values': [1, 2, 3] }, 'num_neurons': { 'values': [64, 128, 256, 512] }, 'window_length': { 'values': [window_length] } } } # focal loss sweep_id = wandb.sweep(sweep_config, project='NoXi_Seq_emb_training') wandb.agent(sweep_id, function=lambda: train_model(train, dev, 'focal_loss'), count=195, project='NoXi_Seq_emb_training') tf.keras.backend.clear_session() gc.collect()
def sweep_quick(args): config = dict(method="random", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train, count=1) check(sweep_id, num=1)
def main(): # Parse the command line args = parse_args() # Load config YAML with open(args.config) as file: sweep_config = yaml.load(file, Loader=yaml.FullLoader) # Instantiate WandB sweep ID sweep_id = wandb.sweep(sweep_config, entity= "murnanedaniel", project= "node_regression_sweep") # Run WandB weep agent wandb.agent(sweep_id, function=train)
def main(args): wandb.init(project="fact2021") def sweep_iteration_with_args(): sweep_iteration(args) if args.sweep_id is None: sweep_config = get_config(args.dataset) sweep_id = wandb.sweep(sweep_config, project="fact2021") print(f"new sweep. sweep_id: {sweep_id}") else: sweep_id = args.sweep_id print(f"continuing sweep. sweep_id: {sweep_id}") wandb.agent(sweep_id, function=sweep_iteration_with_args)
def test_sweep_entity_project_callable(live_mock_server, test_settings): sweep_config = { "name": "My Sweep", "method": "grid", "parameters": {"parameter1": {"values": [1, 2, 3]}}, } sweep_callable = lambda: sweep_config sweep_id = wandb.sweep(sweep_callable, project="test", entity="test") assert os.environ["WANDB_ENTITY"] == "test" assert os.environ["WANDB_PROJECT"] == "test" assert sweep_id == "test"
def sweep_chdir(args): config = dict(method="grid", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), ), root=os.getcwd()) sweep_id = wandb.sweep(config, project=PROJECT) wandb.agent(sweep_id, function=train_and_check_chdir, count=2) # clean up os.chdir('../') os.removedirs('./test_chdir')
def main(): parser = argparse.ArgumentParser() parser.add_argument("entity", type=str) args = parser.parse_args() entity = args.entity project = "test" with open(defs.SWEEP_CONFIG_FILE, "r") as f: sweep_config = yaml.safe_load(f) sweep_id = wandb.sweep(sweep=sweep_config, project=project) print("Run agent") wandb.agent(sweep_id, entity=entity, project=project) print("[ Done ]")
def wandb_agent(script_path, sweep, entity=None, project=None, count=None, run=True): try: import wandb except ImportError: raise ImportError('You need to install wandb to run sweeps!') if 'program' not in sweep.keys(): sweep["program"] = script_path sweep_id = wandb.sweep(sweep, entity=entity, project=project) entity = ifnone(entity, os.environ['WANDB_ENTITY']) project = ifnone(project, os.environ['WANDB_PROJECT']) print(f"\nwandb agent {entity}/{project}/{sweep_id}\n") if run: wandb.agent(sweep_id, function=None, count=count)
def main(): parser = argparse.ArgumentParser() parser.add_argument("entity", type=str) args = parser.parse_args() entity = args.entity project = "test" sweep_config_file = os.path.join(defs.SOURCE_DIR, "simple_sweep.yaml") with open(sweep_config_file, "r") as f: sweep_config = yaml.safe_load(f) sweep_id = wandb.sweep(sweep=sweep_config, project=project) wandb.agent(sweep_id, entity=entity, project=project) print("[ Done ]")
def main(): if args.sweep: sweep_config = sweep_registry.get(args.sweep) if not sweep_config: print("[ERROR] can not find sweep: {}".format(args.sweep)) sys.exit(1) if args.file: # construct config sweep_config = sweep_config() sweep_config.save(args.file) if args.create: sweep_id = wandb.sweep(sweep_config) if args.controller: sweep = wandb.controller(sweep_id) sweep.run() return train()
def generate_vargp_sweep(project='continual_gp', submit=False, method='random', dataset=None, epochs=500, M=60, lr=3e-3, beta=10.0): assert dataset in ['s_mnist', 'p_mnist'] md = datetime.now().strftime('%h%d') name = f'[{md}] {dataset}-{method}-{M}-{lr}-{beta}' sweep_config = { 'name': name, 'method': method, 'parameters': { 'epochs': { 'value': epochs }, 'M': { 'value': M #[10*i for i in range(2, 21, 2)] }, 'lr': { 'value': lr }, 'beta': { 'value': beta } }, 'program': 'experiments/vargp.py', 'command': [ '${env}', '${interpreter}', '${program}', dataset, '${args}', ] } if submit: sweep_id = wandb.sweep(sweep_config, project=project) return sweep_id return sweep_config
def sweep_grid_hyperband(args): config = dict( method="grid", metric=dict(name="val_acc", goal="maximize"), parameters=dict( param0=dict(values=[2]), param1=dict(values=[4, 1, 0]), param2=dict(values=[1.5, 0.5, 0]), delay=dict(value=args.grid_hyper_delay or 1), epochs=dict(value=27), ), early_terminate=dict(type="hyperband", max_iter=27, s=2, eta=3), ) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train, count=9) # TODO(check stopped) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L, stopped=3)
def test_sweep_pause(runner, mock_server, test_settings, stop_method): sweep_config = { "name": "My Sweep", "method": "grid", "parameters": { "parameter1": { "values": [1, 2, 3] } }, } sweep_id = wandb.sweep(sweep_config) assert sweep_id == "test" assert runner.invoke(cli.sweep, ["--pause", sweep_id]).exit_code == 0 assert runner.invoke(cli.sweep, ["--resume", sweep_id]).exit_code == 0 if stop_method == "stop": assert runner.invoke(cli.sweep, ["--stop", sweep_id]).exit_code == 0 else: assert runner.invoke(cli.sweep, ["--cancel", sweep_id]).exit_code == 0
def setup_wandb_sweep(self): # Parse sweep config path and read in the config if possible. sweep_config_path = self.get_arg('wandb.sweep_config', ensure_exists=True) if not os.path.exists(sweep_config_path): raise FileNotFoundError( f"The file {sweep_config_path} does not exist.") sweep_config = read_yaml(sweep_config_path) # Set sweep id, dump info to file and exit. self.wandb_sweep_id = sweep_id = wandb.sweep( sweep_config, project=self.WANDB_PROJECT, entity=self.WANDB_ENTITY) dump_yaml({'wandb_sweep_id': sweep_id}, os.path.join(self.configuration_directory, 'wandb_sweep_info.yml')) dump_yaml( sweep_config, os.path.join(self.configuration_directory, 'wandb_sweep_config.yml')) return sweep_id
def get_sweep_id(method): """return us a sweep id (required for running the sweep)""" sweep_config = { 'method': method, 'metric': { 'name': 'cv_mean', 'goal': 'minimize' }, 'early_terminate': { 'type': 'hyperband', 's': 2, 'eta': 3, 'max_iter': 30 }, 'parameters': config, } sweep_id = wandb.sweep(sweep_config, project='dispersant_screener') return sweep_id
def run(config_yaml, train_file, project_name): wandb.init(project=project_name) with open(config_yaml) as file: config_dict = yaml.load(file, Loader=yaml.FullLoader) config_dict['program'] = train_file sweep_id = wandb.sweep(config_dict, project=project_name) sp = [] for node in node_list: sp.append( subprocess.Popen([ 'srun', '--nodes=1', '--ntasks=1', '-w', node, 'start-agent.sh', sweep_id, project_name ])) exit_codes = [p.wait() for p in sp] # wait for processes to finish return exit_codes
def main(): parser = argparse.ArgumentParser() parser.add_argument("entity", type=str) args = parser.parse_args() entity = args.entity project = "test" with open(defs.SWEEP_CONFIG_FILE, "r") as f: sweep_config = yaml.safe_load(f) sweep_id = wandb.sweep(sweep=sweep_config, project=project) def run_agent(): wandb.agent(sweep_id, entity=entity, project=project) print("Run failing agent") defs.set_should_fail(True) run_agent() print("Reset for success") defs.set_should_fail(False) print("restart sweep") resume_main(["-e", entity, "-p", project, "-y", sweep_id]) # revive agent try: run_agent() except wandb.errors.CommError as e: # this will most likely happen? assert "Sweep" in str(e) and "is not running" in str(e) print("Please open sweep url and manually resume") print("Press ENTER when done") input() print("Rerunning") run_agent() print("[ Done ]")