def wandb_init_run(tmpdir): """Fixture that calls wandb.init(), yields the run that gets created, then cleans up afterward. """ # save the environment so we can restore it later. pytest # may actually do this itself. didn't check. orig_environ = dict(os.environ) try: os.environ[ 'WANDB_MODE'] = 'clirun' # no i/o wrapping - it breaks pytest os.environ['WANDB_PROJECT'] = 'unit-test-project' os.environ['WANDB_RUN_DIR'] = str(tmpdir) assert wandb.run is None assert wandb.config is None orig_namespace = vars(wandb) run = wandb.init() assert run is wandb.run assert run.config is wandb.config yield run wandb.uninit() assert vars(wandb) == orig_namespace finally: # restore the original environment os.environ.clear() os.environ.update(orig_environ)
def pytest_runtest_setup(item): wandb.reset_env() wandb.uninit() global_settings = os.path.expanduser("~/.config/wandb/settings") if os.path.exists(global_settings): os.remove(global_settings) # This is used to find tests that are leaking outside of tmp directories os.environ["WANDB_DESCRIPTION"] = item.parent.name + "#" + item.name
def dryrun(): orig_environ = dict(os.environ) try: with CliRunner().isolated_filesystem(): os.environ["WANDB_MODE"] = "dryrun" yield os.environ finally: os.environ.clear() os.environ.update(orig_environ) wandb.uninit()
def loggedin(): orig_environ = dict(os.environ) try: with CliRunner().isolated_filesystem(): os.environ["WANDB_API_KEY"] = "X" * 40 yield os.environ finally: os.environ.clear() os.environ.update(orig_environ) wandb.uninit()
def run_manager(mocker, mock_server): """This fixture emulates the run_manager headless mode in a single process Just call run_manager.test_shutdown() to join the threads """ # Reset the tensorboard state wandb.tensorboard.reset_state() with CliRunner().isolated_filesystem(): run_manager = fake_run_manager(mocker) yield run_manager wandb.uninit()
def run_manager(mocker, request_mocker, upsert_run, query_viewer): """This fixture emulates the run_manager headless mode in a single process Just call run_manager.test_shutdown() to join the threads """ with CliRunner().isolated_filesystem(): query_viewer(request_mocker) upsert_run(request_mocker) run_manager = fake_run_manager(mocker) yield run_manager wandb.uninit()
def wandb_init_run(request, tmpdir, request_mocker, upsert_run, query_run_resume_status, upload_logs, monkeypatch): """Fixture that calls wandb.init(), yields the run that gets created, then cleans up afterward. """ # save the environment so we can restore it later. pytest # may actually do this itself. didn't check. orig_environ = dict(os.environ) try: if request.node.get_marker('jupyter'): upsert_run(request_mocker) query_run_resume_status(request_mocker) def get_ipython(): class Jupyter(): __module__ = "jupyter" pass return Jupyter() wandb.get_ipython = get_ipython # no i/o wrapping - it breaks pytest os.environ['WANDB_MODE'] = 'clirun' if not request.node.get_marker('unconfigured'): os.environ['WANDB_API_KEY'] = 'test' os.environ['WANDB_ENTITY'] = 'test' os.environ['WANDB_PROJECT'] = 'unit-test-project' os.environ['WANDB_RUN_DIR'] = str(tmpdir) # Re-initialize the Api monkeypatch.setattr(wandb, "http_api", wandb.api.Api()) assert wandb.run is None assert wandb.config is None orig_namespace = vars(wandb) run = wandb.init() upload_logs(request_mocker, run) assert run is wandb.run assert run.config is wandb.config yield run wandb.uninit() assert vars(wandb) == orig_namespace finally: # restore the original environment os.environ.clear() os.environ.update(orig_environ)
def run(self): import os import wandb wandb.uninit() wandb.reset_env() run = wandb.init(group=self.group_name, job_type='rollout') print('Worker started. wandb run ID: %s' % run.id) ep_rew = 0 ep_len = 0 while True: command, data = self.remote.recv() if command == 'step': observation, reward, done, info = ( self.empty_step() if self.done else self.env.step(data)) ep_rew += reward ep_len += 1 if done: wandb.log({'ep_rew': ep_rew, 'ep_len': ep_len}) ep_rew = 0 ep_len = 0 # print('Worker step: ', observation, data, reward, done, info) if done and (not self.done): observation = self.try_reset() self.remote.send( (observation, reward, done, self.task_id, info)) elif command == 'reset': observation = self.try_reset() self.remote.send((observation, self.task_id)) elif command == 'reset_task': self.env.unwrapped.reset_task(data) self.remote.send(True) elif command == 'close': self.remote.close() break elif command == 'get_spaces': self.remote.send( (self.env.observation_space, self.env.action_space)) else: raise NotImplementedError()
def wandb_init_run(request, tmpdir, request_mocker, mock_server, monkeypatch, mocker, capsys, local_netrc): """Fixture that calls wandb.init(), yields a run (or an exception) that gets created, then cleans up afterward. This is meant to test the logic in wandb.init, it should generally not spawn a run_manager. If you need to test run_manager logic use that fixture. """ # save the environment so we can restore it later. pytest # may actually do this itself. didn't check. orig_environ = dict(os.environ) orig_namespace = None run = None # Reset the tensorboard and pytest state wandb.tensorboard.reset_state() wandb._global_watch_idx = 0 try: with CliRunner().isolated_filesystem(): if request.node.get_closest_marker('jupyter'): def fake_ipython(): class Jupyter(object): __module__ = "jupyter" def __init__(self): class Hook(object): def register(self, what, where): pass class Pub(object): def publish(self, **kwargs): pass class Hist(object): def get_range(self, **kwargs): return [[None, 1, ('#source code', None)]] self.events = Hook() self.display_pub = Pub() self.history_manager = Hist() def register_magics(self, magic): pass return Jupyter() wandb.get_ipython = fake_ipython wandb.jupyter.get_ipython = fake_ipython # no i/o wrapping - it breaks pytest os.environ['WANDB_MODE'] = 'clirun' if request.node.get_closest_marker('headless'): mocker.patch('subprocess.Popen') else: def mock_headless(run, cloud=True): print("_init_headless called with cloud=%s" % cloud) mocker.patch('wandb._init_headless', mock_headless) if not request.node.get_closest_marker('unconfigured'): os.environ['WANDB_API_KEY'] = 'test' os.environ['WANDB_ENTITY'] = 'test' os.environ['WANDB_PROJECT'] = 'unit-test-project' else: # when unconfigured we enable run mode to test missing creds os.environ['WANDB_MODE'] = 'run' monkeypatch.setattr('wandb.apis.InternalApi.api_key', None) monkeypatch.setattr( 'getpass.getpass', lambda x: "0123456789012345678901234567890123456789") assert InternalApi().api_key == None os.environ['WANDB_RUN_DIR'] = str(tmpdir) if request.node.get_closest_marker('silent'): os.environ['WANDB_SILENT'] = "true" orig_namespace = vars(wandb) assert wandb.run is None # Mock out run_manager, we add it to run to access state in tests orig_rm = wandb.run_manager.RunManager mock = mocker.patch('wandb.run_manager.RunManager') def fake_init(run, port=None, output=None, cloud=True): print("Initialized fake run manager") rm = fake_run_manager(mocker, run, cloud=cloud, rm_class=orig_rm) rm._block_file_observer() run.run_manager = rm return rm mock.side_effect = fake_init if request.node.get_closest_marker('args'): kwargs = request.node.get_closest_marker('args').kwargs # Unfortunate to enable the test to work if kwargs.get("dir"): del os.environ['WANDB_RUN_DIR'] if kwargs.get("tensorboard"): # The test uses tensorboardX so we need to be sure it's imported # we use get_module because tensorboardX isn't available in py2 wandb.util.get_module("tensorboardX") if kwargs.get("error"): err = kwargs["error"] del kwargs['error'] if err == "io": @classmethod def error(cls): raise IOError monkeypatch.setattr( 'wandb.wandb_run.Run.from_environment_or_defaults', error) elif err == "socket": class Error(object): @property def port(self): return 123 def listen(self, secs): return False, None monkeypatch.setattr("wandb.wandb_socket.Server", Error) if kwargs.get('k8s') is not None: token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" crt_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" orig_exist = os.path.exists def exists(path): return True if path in token_path else orig_exist(path) def magic(path, *args, **kwargs): if path == token_path: return six.StringIO('token') mocker.patch('wandb.util.open', magic, create=True) mocker.patch('wandb.util.os.path.exists', exists) os.environ["KUBERNETES_SERVICE_HOST"] = "k8s" os.environ["KUBERNETES_PORT_443_TCP_PORT"] = "123" os.environ["HOSTNAME"] = "test" if kwargs["k8s"]: request_mocker.register_uri( "GET", "https://*****:*****@sha256:1234"}]}}' ) else: request_mocker.register_uri( "GET", "https://k8s:123/api/v1/namespaces/default/pods/test", content=b'{}', status_code=500) del kwargs["k8s"] if kwargs.get('sagemaker'): del kwargs['sagemaker'] config_path = "/opt/ml/input/config/hyperparameters.json" resource_path = "/opt/ml/input/config/resourceconfig.json" secrets_path = "secrets.env" os.environ['TRAINING_JOB_NAME'] = 'sage' os.environ['CURRENT_HOST'] = 'maker' orig_exist = os.path.exists def exists(path): return True if path in ( config_path, secrets_path, resource_path) else orig_exist(path) mocker.patch('wandb.os.path.exists', exists) def magic(path, *args, **kwargs): if path == config_path: return six.StringIO('{"f****n": "A"}') elif path == resource_path: return six.StringIO('{"hosts":["a", "b"]}') elif path == secrets_path: return six.StringIO('WANDB_TEST_SECRET=TRUE') else: return six.StringIO() mocker.patch('wandb.open', magic, create=True) mocker.patch('wandb.util.open', magic, create=True) elif kwargs.get("tf_config"): os.environ['TF_CONFIG'] = json.dumps(kwargs['tf_config']) del kwargs['tf_config'] elif kwargs.get("env"): for k, v in six.iteritems(kwargs["env"]): os.environ[k] = v del kwargs["env"] else: kwargs = {} if request.node.get_closest_marker('resume'): # env was leaking when running the whole suite... if os.getenv(env.RUN_ID): del os.environ[env.RUN_ID] os.mkdir(wandb.wandb_dir()) with open( os.path.join(wandb.wandb_dir(), wandb_run.RESUME_FNAME), "w") as f: f.write(json.dumps({"run_id": "test"})) try: print("Initializing with", kwargs) run = wandb.init(**kwargs) if request.node.get_closest_marker( 'resume') or request.node.get_closest_marker( 'mocked_run_manager'): # Reset history run._history = None rm = wandb.run_manager.RunManager(run) rm.init_run(os.environ) if request.node.get_closest_marker('mock_socket'): run.socket = mocker.MagicMock() assert run is wandb.run assert run.config is wandb.config except wandb.LaunchError as e: print("!!! wandb LaunchError raised") run = e yield run if hasattr(run, "run_manager"): print("Shutting down run manager") run.run_manager.test_shutdown() finally: # restore the original environment os.environ.clear() os.environ.update(orig_environ) wandb.uninit() wandb.get_ipython = lambda: None assert vars(wandb) == orig_namespace
def lr_grid_search(self, min_pow=-5, max_pow=-1, resolution=20, n_epochs=5, random_lr=False, report_intermediate_steps=False): self.is_grid_search = True self.save_best_model = False self.epochs = n_epochs self.scheduler = None pref_m = self.model_name self.model_name = 'grid_search' self.save_every = float("inf") self.report_intermediate_steps = report_intermediate_steps if self.report_intermediate_steps: self.val_every = 1 else: self.log_every = float("inf") self.val_every = float("inf") v_losses = [] v_accs = [] if not random_lr: e = np.linspace(min_pow, max_pow, resolution) lr_points = 10**(e) else: lr_points = np.random.uniform(min_pow, max_pow, resolution) lr_points = 10**(e) out_name = pref_m + "_grid_search_out.txt" with open(out_name, "w") as text_file: print('learning rate \t val_loss \t val_AUC', file=text_file) for lr in tqdm(lr_points, desc='Grid search cycles', leave=False): wandb.init(project=pref_m + '_grid_search', name=str(lr), reinit=True) self.optimizer.param_groups[0]['lr'] = lr self.train() self.evaluate() v_losses.append(self.val_loss) v_accs.append(self.val_acc) self.logging({'val_loss': self.val_loss, 'val_acc': self.val_acc}) with open(out_name, "a") as text_file: print('{} \t {} \t {}'.format(lr, self.val_loss, self.val_acc), file=text_file) self.reset() self.val_loss = float("inf") self.best_val_loss = float("inf") self.val_acc = 0. self.best_val_acc = 0. self.iters = 0 self.epoch0 = 0 self.epoch = 0 wandb.uninit() arg_best_acc = np.argmax(v_accs) best_acc = v_accs[arg_best_acc] best_lr_acc = lr_points[arg_best_acc] arg_best_vloss = np.argmin(v_losses) best_vloss = v_losses[arg_best_vloss] best_lr_vloss = lr_points[arg_best_vloss] print("The best val_AUC is {} for lr = {}".format( best_acc, best_lr_acc)) print("The best val_loss is {} for lr = {}".format( best_vloss, best_lr_vloss)) fig, axs = plt.subplots(1, 2, figsize=(15, 6)) axs = axs.ravel() fig.suptitle('Grid search results') axs[0].plot(lr_points, v_losses) axs[0].scatter(best_lr_vloss, best_vloss, marker='*', c='r', s=100) axs[0].plot([best_lr_vloss] * 2, [0, best_vloss], linestyle='--', c='r', alpha=0.5) axs[0].plot([lr_points[0], best_lr_vloss], [best_vloss] * 2, linestyle='--', c='r', alpha=0.5) axs[0].set_xlabel('Learning rate') axs[0].set_ylabel('Validation loss') axs[0].set_xscale('log') axs[1].plot(lr_points, v_accs) axs[1].scatter(best_lr_acc, best_acc, marker='*', c='r', s=100) axs[1].plot([best_lr_acc] * 2, [0, best_acc], linestyle='--', c='r', alpha=0.5) axs[1].plot([lr_points[0], best_lr_acc], [best_acc] * 2, linestyle='--', c='r', alpha=0.5) axs[1].set_xlabel('Learning rate') axs[1].set_ylabel('Validation AUC') axs[1].set_xscale('log') plt.savefig(pref_m + '_grid_search_out.png')