def sweep(count=10): sweep_config = { 'method': 'random', 'metric': { 'name': 'avg_reward', 'goal': 'maximize' }, 'parameters': { 'batch_size': { 'distribution': 'q_log_uniform', 'q': 1.0, 'min': math.log(800), 'max': math.log(33000) }, 'epochs': { 'values': [30, 40, 50, 80, 100] }, 'n_layers': { 'values': [1, 2, 3, 5] }, 'hidden_size': { 'distribution': 'q_log_uniform', 'q': 1.0, 'min': math.log(4), 'max': math.log(128) }, 'clip_ratio': { 'values': [0.02, 0.06, 0.1, 0.2, 0.3, 0.4] } } } sweep_id = wandb.sweep(sweep_config, project="lunar-lander") wandb.agent(sweep_id, function=wandb_train, count=count)
def run_agent(sweep_id: str, gpu: str, config: str, verbosity: str = 'INFO') -> None: wandb.wandb_agent.logger.setLevel = lambda _: None # make wandb_agent quiet def train() -> None: try: os.environ['CUDA_VISIBLE_DEVICES'] = gpu os.environ[wandb.env.SILENT] = 'true' wandb.init() extra_options = tuple(f'{name}={val}' for name, val in wandb.config.user_items()) print(f'Job on GPU {gpu} starting with options:\n' + '\n'.join(extra_options)) with main_configure(config, extra_options, verbosity, catch_exceptions=False, extension=gpu) as main: main.train() except Exception as e: # An exception in this function would cause an infinite hang print( f'Job on GPU {gpu} failed with exception of type {type(e).__name__}' ) wandb.agent(sweep_id, function=train) print('Agent finished')
def sweep(sweep_config): # load data # data = pd.read_csv(data_path, sep=',', header=0) # train, test, label_encoder = train_test_from_df_categorical(data, 'cls', 0.9, seed) sweep_id = wandb.sweep(sweep_config, project='network_from_scratch') wandb.agent(sweep_id, function=train)
def finetuneBertOnMultiClassClsTask(self): ##################################################################################### # This method evaluates the finetuned BERT model on multi-class classification task. ##################################################################################### global SECTOR_LABELS, WAND_PROJECT_NAME, WAND_API_KEY try: # Build training and eval datasets if self.__buildTrainingAndEvalDatasets() is False: log.error(f"Error building training / eval dataset to train / eval finetuned BERT embeddings on multi-classification task! Cannot continue with evaluation.") return time.sleep(60) # Check if CUDA is available for doing training on a GPU system if torch.cuda.is_available() is False: log.error(f"CUDA libs not found. A new language model can be trained from scratch only on a GPU system with CUDA libs!") # Build WandB sweep params that are used to automatically pick up the hyper-params during training subprocess.run(["wandb", "login", WAND_API_KEY]) time.sleep(1) sweep_defaults = self.wandbDefaults sweep_id = wandb.sweep(self.wandbConfig, project=WAND_PROJECT_NAME) # Start training startTime = time.time() def train(): wandb.init(WAND_PROJECT_NAME) modelArgs = { "max_seq_length": self.maxSeqLength, "output_dir": self.modelOutputDir, "overwrite_output_dir": True, "best_model_dir": self.bestModelOutputDir, "wandb_project": WAND_PROJECT_NAME, "num_training_epochs": wandb.config.epochs, "learning_rate": wandb.config.learning_rate, "do_lower_case": True, "cache_dir": self.modelCacheDir, "encoding": "utf-8", "train_batch_size": 5, "eval_batch_size": 5, "evaluate_during_training_steps": 50, "evaluate_during_training_verbose": True, "logging_steps": 5, "sliding_window": True, "reprocess_input_data": True, "evaluate_during_training": True, "use_multiprocessing": True, "labels_list": SECTOR_LABELS } model = ClassificationModel(self.modelType, self.modelNameOrPath, args=modelArgs, sweep_config=wandb.config, use_cuda=torch.cuda.is_available(), num_labels=len(SECTOR_LABELS), ) # Training and evaluation try: log.info(f"Started training/finetuning BERT on multi-class classification task..") model.train_model(train_df=self.trainDataset, eval_df=self.evalDataset, show_running_loss=True, output_dir=self.modelOutputDir, mcc=sklearn.metrics.matthews_corrcoef, acc=sklearn.metrics.balanced_accuracy_score, ) log.info(f"Finished finetuning and evaluating our fine-tuned model on multi-class classification task. Check the folder '{self.modelOutputDir}' for finetuned weights.") log.info(f"It took {round((time.time() - startTime) / 3600, 1)} hours to finetune and evaluate our fine-tuned model on multi-class classification task.") except: exc_type, exc_value, exc_traceback = sys.exc_info() err = f"Error occurred while training and evaluating the finetuned model on multi-class classification task. Error is: {exc_type}; {exc_value}." log.error(err) wandb.join() wandb.agent(sweep_id, function=train) except: exc_type, exc_value, exc_traceback = sys.exc_info() err = f"** ERROR ** occurred while finetuning our BERT model on multi-classification task and evaluating it. Error is: {exc_type}; {exc_value}." raise Exception(err)
def main(): print("START") gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # load the data and labels train, dev, test = load_NoXi_data_all_languages() # shuffle one more time train data train = train.sample(frac=1).reset_index(drop=True) sweep_config = { 'method': 'random', 'metric': { 'name': 'val_loss', 'goal': 'minimize' }, 'parameters': { 'optimizer': { 'values': ['Adam', 'SGD', 'Nadam'] }, 'learning_rate_max': { 'distribution': 'uniform', 'max': 0.001, 'min': 0.0001 }, 'learning_rate_min': { 'distribution': 'uniform', 'max': 0.00001, 'min': 0.000001 }, 'lr_scheduller': { 'values': ['Cyclic', 'reduceLRonPlateau'] }, 'augmentation_rate': { 'values': [0.1, 0.2, 0.3] } } } # categorical crossentropy sweep_id = wandb.sweep(sweep_config, project='VGGFace2_FtF_training') wandb.agent( sweep_id, function=lambda: train_model(train, dev, 'categorical_crossentropy'), count=30, project='VGGFace2_FtF_training') tf.keras.backend.clear_session() gc.collect() # focal loss print("Wandb with focal loss") sweep_id = wandb.sweep(sweep_config, project='VGGFace2_FtF_training') wandb.agent(sweep_id, function=lambda: train_model(train, dev, 'focal_loss'), count=30, project='VGGFace2_FtF_training') tf.keras.backend.clear_session() gc.collect()
def run_sweep(config_yaml): """ Set up and run Weights and Biases hyperparameter sweep from config file. """ print("Setting sweep") sweep_id = wandb.sweep(yaml.load(config_yaml)) print("Setting agent") wandb.agent(sweep_id, wandb_run)
def test_agent_ignore_runid(live_mock_server): sweep_run_ids = [] def train(): run = wandb.init(id="ignored") sweep_run_ids.append(run.id) wandb.agent("test-sweep-id-3", function=train, count=1) assert len(sweep_run_ids) == 1 assert sweep_run_ids[0] == "mocker-sweep-run-x9"
def sweep(self, tags = [], saveroot = None): ''' run sweep Parameters ---------- tags : list, optional list of strings to add as tags to sweep runs saveroot : str, optional root name to use for saving ''' # setup root saving directoring if saveroot is not None: if not os.path.exists(saveroot): os.mkdir(saveroot) # define internal train function to wrap Train def train(): # setup wandb config_defaults = {'kernel': 5, 'filters': 8, 'fc_size': 32, 'drop_rate': 0.1, 'batch_size': 16, 'lr': 1e-3, 'weight_decay': 1e-4} tags.append(datetime.today().strftime('%Y-%m-%d')) wandb.init(config = config_defaults, tags = tags) config = wandb.config # create run results directory save = False if saveroot is not None: runpath = os.path.join(saveroot, wandb.run.id) os.mkdir(runpath) save = True # instantiate trainer and run trainer = Train(self.trainX, self.trainY, self.valX, self.valY, testX = self.testX, testY = self.testY, Ylim = self.Ylim, kernel = config.kernel, filters = config.filters, drop_rate = config.drop_rate, epochs = self.epochs, early_stop = self.early_stop, fc_size = config.fc_size, batch_size = config.batch_size, lr = config.lr, weight_decay = config.weight_decay, verbose = False, mcnum = self.mcnum, regression = self.regression, seed = self.seed, wandb = wandb, save = save, savedir = runpath) trainer.train() # run sweep sweep_id = wandb.sweep(self.sweep_config, entity = self.entity, project = self.project) wandb.agent(sweep_id, train)
def test_agent_config_ignore(live_mock_server): sweep_configs = [] def train(): run = wandb.init(config={"learning_rate": "ignored", "extra": 2}) sweep_configs.append(dict(run.config)) wandb.agent("test-sweep-id-3", function=train, count=1) assert len(sweep_configs) == 1 assert sweep_configs[0] == {"learning_rate": 0.99124, "extra": 2}
def sweep_quick(args): config = dict(method="random", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train, count=1) check(sweep_id, num=1)
def sweep_grid(args): config = dict(method="grid", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L)
def run_sweep(sweep_name: str, window_length: int): print("START OF SCRIPT111111111") #gpus = tf.config.experimental.list_physical_devices('GPU') #for gpu in gpus: # tf.config.experimental.set_memory_growth(gpu, True) # load the data and labels train, dev, test = load_data() gc.collect() sweep_config = { 'name': sweep_name, 'method': 'random', 'metric': { 'name': 'val_loss', 'goal': 'minimize' }, 'parameters': { 'optimizer': { 'values': ['Adam', 'SGD', 'Nadam'] }, 'learning_rate_max': { 'distribution': 'uniform', 'max': 0.01, 'min': 0.0001 }, 'learning_rate_min': { 'distribution': 'uniform', 'max': 0.0001, 'min': 0.000001 }, 'lr_scheduller': { 'values': ['Cyclic', 'reduceLRonPlateau'] }, 'num_layers': { 'values': [1, 2, 3] }, 'num_neurons': { 'values': [64, 128, 256, 512] }, 'window_length': { 'values': [window_length] } } } # focal loss sweep_id = wandb.sweep(sweep_config, project='NoXi_Seq_emb_training') wandb.agent(sweep_id, function=lambda: train_model(train, dev, 'focal_loss'), count=195, project='NoXi_Seq_emb_training') tf.keras.backend.clear_session() gc.collect()
def sweep_bayes_nested(args): config = dict(method="bayes", metric=dict(name="feat1.val_acc", goal="maximize"), parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), )) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train_nested, count=9) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L)
def test_agent_config_merge(live_mock_server): sweep_configs = [] os.environ["WANDB_CONSOLE"] = "off" def train(): run = wandb.init(config={"extra": 2}) sweep_configs.append(dict(run.config)) wandb.agent("test-sweep-id-2", function=train, count=1) assert len(sweep_configs) == 1 assert sweep_configs[0] == {"learning_rate": 0.99124, "extra": 2}
def test_agent_ignore(live_mock_server): sweep_entities = [] sweep_projects = [] def train(): run = wandb.init(entity="ign", project="ignored") sweep_projects.append(run.project) sweep_entities.append(run.entity) wandb.agent("test-sweep-id-3", function=train, count=1) assert len(sweep_projects) == len(sweep_entities) == 1 assert sweep_projects[0] == "test" assert sweep_entities[0] == "mock_server_entity"
def main(): # Parse the command line args = parse_args() # Load config YAML with open(args.config) as file: sweep_config = yaml.load(file, Loader=yaml.FullLoader) # Instantiate WandB sweep ID sweep_id = wandb.sweep(sweep_config, entity= "murnanedaniel", project= "node_regression_sweep") # Run WandB weep agent wandb.agent(sweep_id, function=train)
def test_agent_basic(live_mock_server): sweep_ids = [] sweep_configs = [] def train(): run = wandb.init() sweep_ids.append(run.sweep_id) sweep_configs.append(dict(run.config)) wandb.agent("test-sweep-id", function=train, count=1) assert len(sweep_ids) == len(sweep_configs) == 1 assert sweep_ids[0] == "test-sweep-id" assert sweep_configs[0] == {"learning_rate": 0.99124}
def main(args): wandb.init(project="fact2021") def sweep_iteration_with_args(): sweep_iteration(args) if args.sweep_id is None: sweep_config = get_config(args.dataset) sweep_id = wandb.sweep(sweep_config, project="fact2021") print(f"new sweep. sweep_id: {sweep_id}") else: sweep_id = args.sweep_id print(f"continuing sweep. sweep_id: {sweep_id}") wandb.agent(sweep_id, function=sweep_iteration_with_args)
def train_inception_opp(): print("Running Opportunity") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = InceptionModel(num_blocks=1, in_channels=113, out_channels=32, bottleneck_channels=2, kernel_sizes=20, use_residuals=True, num_pred_classes=17) model.to(device) trainer = OPPTrainer(model=model) #trainer.fit() wandb.agent(sweep_id, function=trainer.fit, count=15)
def sweep_chdir(args): config = dict(method="grid", parameters=dict( param0=dict(values=[2]), param1=dict(values=[0, 1, 4]), param2=dict(values=[0, 0.5, 1.5]), epochs=dict(value=4), ), root=os.getcwd()) sweep_id = wandb.sweep(config, project=PROJECT) wandb.agent(sweep_id, function=train_and_check_chdir, count=2) # clean up os.chdir('../') os.removedirs('./test_chdir')
def wandb_agent(script_path, sweep, entity=None, project=None, count=None, run=True): try: import wandb except ImportError: raise ImportError('You need to install wandb to run sweeps!') if 'program' not in sweep.keys(): sweep["program"] = script_path sweep_id = wandb.sweep(sweep, entity=entity, project=project) entity = ifnone(entity, os.environ['WANDB_ENTITY']) project = ifnone(project, os.environ['WANDB_PROJECT']) print(f"\nwandb agent {entity}/{project}/{sweep_id}\n") if run: wandb.agent(sweep_id, function=None, count=count)
def main(): parser = argparse.ArgumentParser() parser.add_argument("entity", type=str) args = parser.parse_args() entity = args.entity project = "test" sweep_config_file = os.path.join(defs.SOURCE_DIR, "simple_sweep.yaml") with open(sweep_config_file, "r") as f: sweep_config = yaml.safe_load(f) sweep_id = wandb.sweep(sweep=sweep_config, project=project) wandb.agent(sweep_id, entity=entity, project=project) print("[ Done ]")
def main(): parser = argparse.ArgumentParser() parser.add_argument("entity", type=str) args = parser.parse_args() entity = args.entity project = "test" with open(defs.SWEEP_CONFIG_FILE, "r") as f: sweep_config = yaml.safe_load(f) sweep_id = wandb.sweep(sweep=sweep_config, project=project) print("Run agent") wandb.agent(sweep_id, entity=entity, project=project) print("[ Done ]")
def papersweep_exec(input_nb:Param("Input notebook", str), sweep_config:Param("YAML file with the sweep config", str), entity:Param("wandb entity", str), project:Param("wandb project", str), pm_params:Param("YAML file with papermill parameters", str)=None, sweep_id:Param("Sweep ID. This option omits `sweep_config`", str)=None, login_key:Param("Login key for wandb", str)=None): """ Executes the notebook `input_nb` with the sweep configuration given in `sweep_config`. Optionally, in case the notebook has one cell tagged as 'parameters', those will be injected from the file `pm_params`. """ if login_key: wandb.login(key=login_key) with maybe_open(sweep_config, 'r') as f: sc = yaml.safe_load(f) if pm_params: with maybe_open(pm_params, 'r') as f: _pm_params = yaml.safe_load(f) else: _pm_params = None sid = wandb.sweep(sweep=sc, entity=entity, project=project) if not sweep_id else sweep_id sweep_agent = wandb.agent(sid, function=partial(pm.execute_notebook, input_path=input_nb, output_path='__.ipynb', parameters=_pm_params)) return sid
def train_inception_har(): print("Running HAR") data_folder = Path('../data/UCI_HAR_Dataset') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) model = InceptionModel(num_blocks=1, in_channels=9, out_channels=32, bottleneck_channels=2, kernel_sizes=20, use_residuals=True, num_pred_classes=6) model.to(device) trainer = HARTrainer(model=model, data_folder=data_folder) #trainer.fit() wandb.agent(sweep_id, function=trainer.fit, count=648)
def sweep_grid_hyperband(args): config = dict( method="grid", metric=dict(name="val_acc", goal="maximize"), parameters=dict( param0=dict(values=[2]), param1=dict(values=[4, 1, 0]), param2=dict(values=[1.5, 0.5, 0]), delay=dict(value=args.grid_hyper_delay or 1), epochs=dict(value=27), ), early_terminate=dict(type="hyperband", max_iter=27, s=2, eta=3), ) sweep_id = wandb.sweep(config, project=PROJECT) print("sweep:", sweep_id) wandb.agent(sweep_id, function=train, count=9) # TODO(check stopped) check(sweep_id, num=9, result=2 + 4 * L + 1.5 * L * L, stopped=3)
def test_agent(live_mock_server, dummy_api_key): assert True def train(): # Here we're in a different process. It's hard to communicate # back to the main process for assertions. settings = wandb.Settings(base_url="http://localhost", api_key=dummy_api_key) # TODO: Fix this. # There is an issue here, the agent sets the environment variable # WANDB_SWEEP_ID and wandb.init() should pick that up. But it doesn't, # I think because the settings object has been frozen at some other time. run = wandb.init(settings=settings) # If this assertion fails, the test will timeout (because we # never complete 1 agent run) assert run.sweep_id == 'test-sweep-id' wandb.agent('test-sweep-id', function=train, count=1)
def run(self, *args, **kwargs): self._sweep_experiment_run_args = args self._sweep_experiment_run_kwargs = kwargs if self._wandb_sweep_id is not None and self.get_arg( 'wandb.sweep', False): return wandb.agent(self._wandb_sweep_id, self.run_sweep_experiment, project=self.wandb_project, entity=self.wandb_entity, count=1) else: return self.run_sweep_experiment()
def sweep(self): ########## # Sweeps # ########## sweep_config = { 'method': 'random', # grid, random 'metric': { 'name': 'accuracy_score', 'goal': 'maximize' }, 'parameters': { 'model': { 'values': ['randomForest', 'logistic', 'xgboost'] } } } config_defaults = { 'model': 'logistic' } sweep_id = wandb.sweep(sweep_config) wandb.agent(sweep_id, function=self.train)
def wandb_sweep(dataset, config): import wandb config_defaults = { 'n_epochs': 150, 'batch_size': 32, 'weight_decay': 1e-04, 'lr': 1e-4, 'optimizer_name': 'adam', } wandb.init(config=config_defaults) config = wandb.config deep_SVDD.pretrain( dataset, optimizer_name='adam', lr=config.lr, n_epochs=config.n_epochs, #250, 1e-4,32,1e-04 lr_milestones=(100, ), batch_size=config.batch_size, weight_decay=config.weight_decay, #cfg.settings['ae_weight_decay'], device=device, n_jobs_dataloader=0, use_wandb=True) wandb.log({"auc": deep_SVDD.ae_trainer.test(dataset, deep_SVDD.ae_net)}) wandb.agent(sweep_id, train)