def no_retry_4xx(e): if not isinstance(e, requests.HTTPError): return True if not (e.response.status_code >= 400 and e.response.status_code < 500): return True body = json.loads(e.response.content) raise UsageError(body["errors"][0]["message"])
def prompt_api_key(self): api = Api(self._settings) key = apikey.prompt_api_key( self._settings, api=api, no_offline=self._settings.force, no_create=self._settings.force, ) if key is False: raise UsageError("api_key not configured (no-tty). Run wandb login") self.update_session(key) self._key = key
def parse_config(params, exclude=None, include=None): if exclude and include: raise UsageError("Expected at most only one of exclude or include") params = _to_dict(params) if include: params = {key: value for key, value in six.iteritems(params) if key in include} if exclude: params = { key: value for key, value in six.iteritems(params) if key not in exclude } return params
def parse_config(params, exclude=None, include=None): if exclude and include: raise UsageError("Expected at most only one of exclude or include") if isinstance(params, six.string_types): params = config_util.dict_from_config_file(params, must_exist=True) params = _to_dict(params) if include: params = {key: value for key, value in six.iteritems(params) if key in include} if exclude: params = { key: value for key, value in six.iteritems(params) if key not in exclude } return params
def init(self): # noqa: C901 trigger.call("on_init", **self.kwargs) s = self.settings config = self.config if s._noop: run = Dummy() run.config = wandb.wandb_sdk.wandb_config.Config() run.config.update(config) run.summary = DummyDict() run.log = lambda data, *_, **__: run.summary.update(data) run.finish = lambda *_, **__: module.unset_globals() run.step = 0 run.resumed = False run.disabled = True run.id = shortuuid.uuid() run.name = "dummy-" + run.id run.dir = "/" module.set_global( run=run, config=run.config, log=run.log, summary=run.summary, save=run.save, use_artifact=run.use_artifact, log_artifact=run.log_artifact, plot_table=run.plot_table, alert=run.alert, ) return run if s.reinit or (s._jupyter and s.reinit is not False): if len(self._wl._global_run_stack) > 0: if len(self._wl._global_run_stack) > 1: wandb.termwarn( "If you want to track multiple runs concurrently in wandb you should use multi-processing not threads" # noqa: E501 ) last_id = self._wl._global_run_stack[-1]._run_id if s._jupyter and not s._silent: ipython.display_html( "Finishing last run (ID:{}) before initializing another...".format( last_id ) ) self._wl._global_run_stack[-1].finish() if s._jupyter and not s._silent: ipython.display_html( "...Successfully finished last run (ID:{}). Initializing new run:<br/><br/>".format( last_id ) ) elif isinstance(wandb.run, Run): logger.info("wandb.init() called when a run is still active") return wandb.run use_redirect = True stdout_master_fd, stderr_master_fd = None, None stdout_slave_fd, stderr_slave_fd = None, None backend = Backend() backend.ensure_launched( settings=s, stdout_fd=stdout_master_fd, stderr_fd=stderr_master_fd, use_redirect=use_redirect, ) backend.server_connect() # Make sure we are logged in # wandb_login._login(_backend=backend, _settings=self.settings) # resuming needs access to the server, check server_status()? run = Run(config=config, settings=s) run._set_console( use_redirect=use_redirect, stdout_slave_fd=stdout_slave_fd, stderr_slave_fd=stderr_slave_fd, ) run._set_library(self._wl) run._set_backend(backend) run._set_reporter(self._reporter) run._set_teardown_hooks(self._teardown_hooks) # TODO: pass mode to backend # run_synced = None backend._hack_set_run(run) backend.interface.publish_header() if s._offline: run_proto = backend.interface._make_run(run) backend.interface._publish_run(run_proto) run._set_run_obj_offline(run_proto) else: ret = backend.interface.communicate_check_version( current_version=wandb.__version__ ) if ret: if ret.upgrade_message: run._set_upgraded_version_message(ret.upgrade_message) if ret.delete_message: run._set_deleted_version_message(ret.delete_message) if ret.yank_message: run._set_yanked_version_message(ret.yank_message) run._on_init() ret = backend.interface.communicate_run(run, timeout=30) error_message = None if not ret: error_message = "Error communicating with backend" if ret and ret.error: error_message = ret.error.message if error_message: # Shutdown the backend and get rid of the logger # we don't need to do console cleanup at this point backend.cleanup() self.teardown() raise UsageError(error_message) run._set_run_obj(ret.run) # initiate run (stats and metadata probing) _ = backend.interface.communicate_run_start() self._wl._global_run_stack.append(run) self.run = run self.backend = backend module.set_global( run=run, config=run.config, log=run.log, summary=run.summary, save=run.save, use_artifact=run.use_artifact, log_artifact=run.log_artifact, plot_table=run.plot_table, alert=run.alert, ) self._reporter.set_context(run=run) run._on_start() run._freeze() return run
def init(self): # noqa: C901 assert logger logger.info("calling init triggers") trigger.call("on_init", **self.kwargs) s = self.settings sweep_config = self.sweep_config config = self.config logger.info( "wandb.init called with sweep_config: {}\nconfig: {}".format( sweep_config, config)) if s._noop: return self._make_run_disabled() if s.reinit or (s._jupyter and s.reinit is not False): if len(self._wl._global_run_stack) > 0: if len(self._wl._global_run_stack) > 1: wandb.termwarn( "If you want to track multiple runs concurrently in wandb you should use multi-processing not threads" # noqa: E501 ) last_id = self._wl._global_run_stack[-1]._run_id logger.info( "re-initializing run, found existing run on stack: {}". format(last_id)) jupyter = (s._jupyter and not s._silent and ipython._get_python_type() == "jupyter") if jupyter: ipython.display_html( "Finishing last run (ID:{}) before initializing another..." .format(last_id)) self._wl._global_run_stack[-1].finish() if jupyter: ipython.display_html( "...Successfully finished last run (ID:{}). Initializing new run:<br/><br/>" .format(last_id)) elif isinstance(wandb.run, Run): logger.info("wandb.init() called when a run is still active") return wandb.run logger.info("starting backend") backend = Backend(settings=s) backend.ensure_launched() backend.server_connect() logger.info("backend started and connected") # Make sure we are logged in # wandb_login._login(_backend=backend, _settings=self.settings) # resuming needs access to the server, check server_status()? run = Run(config=config, settings=s, sweep_config=sweep_config) # probe the active start method active_start_method = None if s.start_method == "thread": active_start_method = s.start_method else: get_start_fn = getattr(backend._multiprocessing, "get_start_method", None) active_start_method = get_start_fn() if get_start_fn else None # Populate intial telemetry with telemetry.context(run=run) as tel: tel.cli_version = wandb.__version__ tel.python_version = platform.python_version() hf_version = _huggingface_version() if hf_version: tel.huggingface_version = hf_version if s._jupyter: tel.env.jupyter = True if s._kaggle: tel.env.kaggle = True if s._windows: tel.env.windows = True run._telemetry_imports(tel.imports_init) if active_start_method == "spawn": tel.env.start_spawn = True elif active_start_method == "fork": tel.env.start_fork = True elif active_start_method == "forkserver": tel.env.start_forkserver = True elif active_start_method == "thread": tel.env.start_thread = True logger.info("updated telemetry") run._set_library(self._wl) run._set_backend(backend) run._set_reporter(self._reporter) run._set_teardown_hooks(self._teardown_hooks) # TODO: pass mode to backend # run_synced = None backend._hack_set_run(run) backend.interface.publish_header() if s._offline: with telemetry.context(run=run) as tel: tel.feature.offline = True run_proto = backend.interface._make_run(run) backend.interface._publish_run(run_proto) run._set_run_obj_offline(run_proto) else: logger.info("communicating current version") ret = backend.interface.communicate_check_version( current_version=wandb.__version__) if ret: logger.info("got version response {}".format(ret)) if ret.upgrade_message: run._set_upgraded_version_message(ret.upgrade_message) if ret.delete_message: run._set_deleted_version_message(ret.delete_message) if ret.yank_message: run._set_yanked_version_message(ret.yank_message) run._on_init() logger.info("communicating run to backend with 30 second timeout") ret = backend.interface.communicate_run(run, timeout=30) error_message = None if not ret: logger.error("backend process timed out") error_message = "Error communicating with wandb process" if active_start_method != "fork": error_message += "\ntry: wandb.init(settings=wandb.Settings(start_method='fork'))" error_message += "\nor: wandb.init(settings=wandb.Settings(start_method='thread'))" error_message += "\nFor more info see: https://docs.wandb.ai/library/init#init-start-error" if ret and ret.error: error_message = ret.error.message if error_message: logger.error("encountered error: {}".format(error_message)) # Shutdown the backend and get rid of the logger # we don't need to do console cleanup at this point backend.cleanup() self.teardown() raise UsageError(error_message) if ret.run.resumed: logger.info("run resumed") with telemetry.context(run=run) as tel: tel.feature.resumed = True run._set_run_obj(ret.run) logger.info("starting run threads in backend") # initiate run (stats and metadata probing) run_obj = run._run_obj or run._run_obj_offline _ = backend.interface.communicate_run_start(run_obj) self._wl._global_run_stack.append(run) self.run = run self.backend = backend module.set_global( run=run, config=run.config, log=run.log, summary=run.summary, save=run.save, use_artifact=run.use_artifact, log_artifact=run.log_artifact, define_metric=run._define_metric, plot_table=run.plot_table, alert=run.alert, ) self._reporter.set_context(run=run) run._on_start() run._freeze() logger.info("run started, returning control to user process") return run
def _login( anonymous=None, key=None, relogin=None, force=None, host=None, _backend=None, _disable_warning=None, _settings=None, ): """Log in to W&B. Args: settings (dict, optional): Override settings. relogin (bool, optional): If true, will re-prompt for API key. host (string, optional): The host to connect to anonymous (string, optional): Can be "must", "allow", or "never". If set to "must" we'll always login anonymously, if set to "allow" we'll only create an anonymous user if the user isn't already logged in. Returns: bool: if key is configured Raises: UsageError - if api_key can not configured and no tty """ if wandb.run is not None: if not _disable_warning: wandb.termwarn( "Calling wandb.login() after wandb.init() is a no-op.") return True settings_dict = {} api = Api() if anonymous is not None: # TODO: Move this check into wandb_settings probably. if not _validate_anonymous_setting(anonymous): wandb.termwarn( "Invalid value passed for argument `anonymous` to " "wandb.login(). Can be 'must', 'allow', or 'never'.") return False settings_dict.update({"anonymous": anonymous}) if host is not None: settings_dict.update({"base_url": host}) if key: settings_dict.update({"api_key": key}) # Note: This won't actually do anything if called from a codepath where # wandb.setup was previously called. If wandb.setup is called further up, # you must make sure the anonymous setting (and any other settings) are # already properly set up there. wl = wandb.setup(settings=wandb.Settings(**settings_dict)) wl_settings = wl.settings() if _settings: wl_settings._apply_settings(settings=_settings) settings = wl_settings if settings._offline: return False active_entity = None logged_in = is_logged_in(settings=settings) if logged_in: # TODO: do we want to move all login logic to the backend? if _backend: res = _backend.interface.communicate_login(key, anonymous) active_entity = res.active_entity else: active_entity = wl._get_entity() if active_entity and not relogin: login_state_str = "Currently logged in as:" login_info_str = "(use `wandb login --relogin` to force relogin)" wandb.termlog( "{} {} {}".format(login_state_str, click.style(active_entity, fg="yellow"), login_info_str), repeat=False, ) return True jupyter = settings._jupyter or False if key: if jupyter: wandb.termwarn( ("If you're specifying your api key in code, ensure this " "code is not shared publically.\nConsider setting the " "WANDB_API_KEY environment variable, or running " "`wandb login` from the command line.")) apikey.write_key(settings, key) else: key = apikey.prompt_api_key(settings, api=api, no_offline=force, no_create=force) if key is False: raise UsageError( "api_key not configured (no-tty). Run wandb login") if _backend and not logged_in: # TODO: calling this twice is gross, this deserves a refactor # Make sure our backend picks up the new creds _ = _backend.interface.communicate_login(key, anonymous) return key or False
def init(self): trigger.call("on_init", **self.kwargs) s = self.settings config = self.config if s._noop: run = RunDummy() module.set_global( run=run, config=run.config, log=run.log, summary=run.summary, save=run.save, restore=run.restore, use_artifact=run.use_artifact, log_artifact=run.log_artifact, plot_table=run.plot_table, ) return run if s.reinit or (s._jupyter and s.reinit is not False): if len(self._wl._global_run_stack) > 0: if len(self._wl._global_run_stack) > 1: wandb.termwarn( "If you want to track multiple runs concurrently in wandb you should use multi-processing not threads" # noqa: E501 ) self._wl._global_run_stack[-1].finish() elif wandb.run: logger.info("wandb.init() called when a run is still active") return wandb.run use_redirect = True stdout_master_fd, stderr_master_fd = None, None stdout_slave_fd, stderr_slave_fd = None, None backend = Backend() backend.ensure_launched( settings=s, stdout_fd=stdout_master_fd, stderr_fd=stderr_master_fd, use_redirect=use_redirect, ) backend.server_connect() # Make sure we are logged in # wandb_login._login(_backend=backend, _settings=self.settings) # resuming needs access to the server, check server_status()? run = Run(config=config, settings=s) run._set_console( use_redirect=use_redirect, stdout_slave_fd=stdout_slave_fd, stderr_slave_fd=stderr_slave_fd, ) run._set_library(self._wl) run._set_backend(backend) run._set_reporter(self._reporter) run._set_teardown_hooks(self._teardown_hooks) # TODO: pass mode to backend # run_synced = None backend._hack_set_run(run) backend.interface.publish_header() if s._offline: run_proto = backend.interface._make_run(run) backend.interface._publish_run(run_proto) run._set_run_obj_offline(run_proto) else: ret = backend.interface.communicate_check_version( current_version=wandb.__version__) if ret: if ret.upgrade_message: run._set_upgraded_version_message(ret.upgrade_message) if ret.delete_message: run._set_deleted_version_message(ret.delete_message) if ret.yank_message: run._set_yanked_version_message(ret.yank_message) run._on_init() ret = backend.interface.communicate_run(run, timeout=30) error_message = None if not ret: error_message = "Error communicating with backend" if ret and ret.error: error_message = ret.error.message if error_message: # Shutdown the backend and get rid of the logger # we don't need to do console cleanup at this point backend.cleanup() self.teardown() raise UsageError(error_message) run._set_run_obj(ret.run) # initiate run (stats and metadata probing) _ = backend.interface.communicate_run_start() self._wl._global_run_stack.append(run) self.run = run self.backend = backend module.set_global( run=run, config=run.config, log=run.log, summary=run.summary, save=run.save, restore=run.restore, use_artifact=run.use_artifact, log_artifact=run.log_artifact, plot_table=run.plot_table, ) self._reporter.set_context(run=run) run._on_start() run._freeze() return run
def init(self): s = self.settings config = self.config if s.reinit: if len(self._wl._global_run_stack) > 0: if len(self._wl._global_run_stack) > 1: wandb.termwarn( "If you want to track multiple runs concurrently in wandb you should use multi-processing not threads" # noqa: E501 ) self._wl._global_run_stack[-1].join() if s.mode == "noop": # TODO(jhr): return dummy object return None console = s.console use_redirect = True stdout_master_fd, stderr_master_fd = None, None stdout_slave_fd, stderr_slave_fd = None, None if console == "iowrap": stdout_master_fd, stdout_slave_fd = io_wrap.wandb_pty(resize=False) stderr_master_fd, stderr_slave_fd = io_wrap.wandb_pty(resize=False) elif console == "_win32": # Not used right now stdout_master_fd, stdout_slave_fd = lib_console.win32_create_pipe() stderr_master_fd, stderr_slave_fd = lib_console.win32_create_pipe() backend = Backend(mode=s.mode) backend.ensure_launched( settings=s, stdout_fd=stdout_master_fd, stderr_fd=stderr_master_fd, use_redirect=use_redirect, ) backend.server_connect() # Make sure we are logged in wandb.login(backend=backend) # resuming needs access to the server, check server_status()? run = RunManaged(config=config, settings=s) run._set_console( use_redirect=use_redirect, stdout_slave_fd=stdout_slave_fd, stderr_slave_fd=stderr_slave_fd, ) run._set_library(self._wl) run._set_backend(backend) run._set_reporter(self._reporter) # TODO: pass mode to backend # run_synced = None backend._hack_set_run(run) if s.mode == "online": ret = backend.interface.send_run_sync(run, timeout=30) # TODO: fail on more errors, check return type # TODO: make the backend log stacktraces on catostrophic failure if ret.HasField("error"): # Shutdown the backend and get rid of the logger # we don't need to do console cleanup at this point backend.cleanup() self._wl.on_finish() raise UsageError(ret.error.message) run._set_run_obj(ret.run) elif s.mode in ("offline", "dryrun"): backend.interface.send_run(run) elif s.mode in ("async", "run"): ret = backend.interface.send_run_sync(run, timeout=10) # TODO: on network error, do async run save backend.interface.send_run(run) self._wl._global_run_stack.append(run) self.run = run self.backend = backend module.set_global( run=run, config=run.config, log=run.log, join=run.join, summary=run.summary, save=run.save, restore=run.restore, use_artifact=run.use_artifact, log_artifact=run.log_artifact, ) self._reporter.set_context(run=run) run._on_start() return run