def run(ctx, program, args, id, resume, dir, configs, message, name, notes, show, tags, run_group, job_type): wandb.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) tags = [tag for tag in tags.split(",") if tag] if tags else None # populate run parameters from env if not specified id = id or os.environ.get(env.RUN_ID) message = message or os.environ.get(env.DESCRIPTION) tags = tags or env.get_tags() run_group = run_group or os.environ.get(env.RUN_GROUP) job_type = job_type or os.environ.get(env.JOB_TYPE) name = name or os.environ.get(env.NAME) notes = notes or os.environ.get(env.NOTES) resume = resume or os.environ.get(env.RESUME) run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, tags=tags, group=run_group, job_type=job_type, name=name, notes=notes, resume=resume) run.enable_logging() environ = dict(os.environ) if configs: environ[env.CONFIG_PATHS] = configs if show: environ[env.SHOW_RUN] = 'True' if not run.api.api_key: util.prompt_api_key(run.api, input_callback=click.prompt) try: rm = run_manager.RunManager(run) rm.init_run(environ) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, environ)
def jupyter_login(force=True, api=None): """Attempt to login from a jupyter environment If force=False, we'll only attempt to auto-login, otherwise we'll prompt the user """ def get_api_key_from_browser(): key, anonymous = None, False if 'google.colab' in sys.modules: key = jupyter.attempt_colab_login(api.app_url) elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules: # Databricks does not seem to support getpass() so we need to fail # early and prompt the user to configure the key manually for now. termerror( "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks" ) raise LaunchError( "Databricks integration requires api_key to be configured.") if not key and os.environ.get(env.ALLOW_ANONYMOUS) == "true": key = api.create_anonymous_api_key() anonymous = True if not key and force: termerror( "Not authenticated. Copy a key from https://app.wandb.ai/authorize" ) key = getpass.getpass("API Key: ").strip() return key, anonymous api = api or (run.api if run else None) if not api: raise LaunchError("Internal error: api required for jupyter login") return util.prompt_api_key(api, browser_callback=get_api_key_from_browser)
def _jupyter_login(force=True, api=None): """Attempt to login from a jupyter environment If force=False, we'll only attempt to auto-login, otherwise we'll prompt the user """ def get_api_key_from_browser(signup=False): key, anonymous = None, False if 'google.colab' in sys.modules: key = jupyter.attempt_colab_login(api.app_url) elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules: # Databricks does not seem to support getpass() so we need to fail # early and prompt the user to configure the key manually for now. termerror( "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks") raise LaunchError("Databricks integration requires api_key to be configured.") # For jupyter we default to not allowing anonymous if not key and os.environ.get(env.ANONYMOUS, "never") != "never": key = api.create_anonymous_api_key() anonymous = True if not key and force: try: termerror("Not authenticated. Copy a key from https://app.wandb.ai/authorize") key = getpass.getpass("API Key: ").strip() except NotImplementedError: termerror( "Can't accept input in this environment, you should set WANDB_API_KEY or call wandb.login(key='YOUR_API_KEY')") return key, anonymous api = api or (run.api if run else None) if not api: raise LaunchError("Internal error: api required for jupyter login") return util.prompt_api_key(api, browser_callback=get_api_key_from_browser)
def login(anonymous=None, key=None): """Ensure this machine is logged in You can manually specify a key, but this method is intended to prompt for user input. anonymous can be "never", "must", or "allow". If set to "must" we'll always login anonymously, if set to "allow" we'll only create an anonymous user if the user isn't already logged in. Returns: True if login was successful False on failure """ # This ensures we have a global api object ensure_configured() if anonymous: os.environ[env.ANONYMOUS] = anonymous anonymous = anonymous or "never" in_jupyter = _get_python_type() != "python" if key: termwarn("If you're specifying your api key in code, ensure this code is not shared publically.\nConsider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.") if in_jupyter: termwarn("Calling wandb.login() without arguments from jupyter should prompt you for an api key.") util.set_api_key(api, key) elif api.api_key and anonymous != "must": key = api.api_key elif in_jupyter: os.environ[env.JUPYTER] = "true" # Don't return key to ensure it's not displayed in the notebook. key = _jupyter_login(api=api) else: key = util.prompt_api_key(api) return True if key else False
def login(key, host, anonymously, server=LocalServer(), browser=True, no_offline=False): global api if host == "https://api.wandb.ai": api.clear_setting("base_url", globally=True) elif host: if not host.startswith("http"): raise ClickException("host must start with http(s)://") api.set_setting("base_url", host, globally=True) key = key[0] if len(key) > 0 else None # Import in here for performance reasons import webbrowser browser = util.launch_browser(browser) def get_api_key_from_browser(signup=False): if not browser: return None query = '?signup=true' if signup else '' webbrowser.open_new_tab('{}/authorize{}'.format(api.app_url, query)) #Getting rid of the server for now. We would need to catch Abort from server.stop and deal accordingly #server.start(blocking=False) #if server.result.get("key"): # return server.result["key"][0] return None if key: util.set_api_key(api, key) else: if anonymously: os.environ[env.ANONYMOUS] = "must" # Don't allow signups or dryrun for local local = host != None or host != "https://api.wandb.ai" key = util.prompt_api_key(api, input_callback=click.prompt, browser_callback=get_api_key_from_browser, no_offline=no_offline, local=local) if key: api.clear_setting('disabled') click.secho("Successfully logged in to Weights & Biases!", fg="green") elif not no_offline: api.set_setting('disabled', 'true') click.echo( "Disabling Weights & Biases. Run 'wandb login' again to re-enable." ) # reinitialize API to create the new client api = InternalApi() return key
def login(key, server=LocalServer(), browser=True, anonymous=False): global api key = key[0] if len(key) > 0 else None # Import in here for performance reasons import webbrowser browser = util.launch_browser(browser) def get_api_key_from_browser(): if not browser: return None launched = webbrowser.open_new_tab('{}/authorize?{}'.format( api.app_url, server.qs())) if not launched: return None server.start(blocking=True) if server.result.get("key"): return server.result["key"][0] return None if key: util.set_api_key(api, key) else: key = util.prompt_api_key(api, browser_callback=get_api_key_from_browser, anonymous=anonymous) if key: api.clear_setting('disabled') click.secho("Successfully logged in to Weights & Biases!", fg="green") else: api.set_setting('disabled', 'true') click.echo( "Disabling Weights & Biases. Run 'wandb login' again to re-enable." ) # reinitialize API to create the new client api = InternalApi() return key
def login(key, anonymously, server=LocalServer(), browser=True): global api key = key[0] if len(key) > 0 else None # Import in here for performance reasons import webbrowser browser = util.launch_browser(browser) def get_api_key_from_browser(signup=False): if not browser: return None query = '?signup=true' if signup else '' webbrowser.open_new_tab('{}/authorize{}'.format(api.app_url, query)) #Getting rid of the server for now. We would need to catch Abort from server.stop and deal accordingly #server.start(blocking=False) #if server.result.get("key"): # return server.result["key"][0] return None if key: util.set_api_key(api, key) else: if anonymously: os.environ[env.ANONYMOUS] = "must" key = util.prompt_api_key(api, input_callback=click.prompt, browser_callback=get_api_key_from_browser) if key: api.clear_setting('disabled') click.secho("Successfully logged in to Weights & Biases!", fg="green") else: api.set_setting('disabled', 'true') click.echo( "Disabling Weights & Biases. Run 'wandb login' again to re-enable." ) # reinitialize API to create the new client api = InternalApi() return key
def init(job_type=None, dir=None, config=None, project=None, entity=None, reinit=None, tags=None, group=None, allow_val_change=False, resume=False, force=False, tensorboard=False, sync_tensorboard=False, name=None, notes=None, id=None, magic=None, allow_anonymous=False): """Initialize W&B If called from within Jupyter, initializes a new run and waits for a call to `wandb.log` to begin pushing metrics. Otherwise, spawns a new process to communicate with W&B. Args: job_type (str, optional): The type of job running, defaults to 'train' config (dict, argparse, or tf.FLAGS, optional): The hyper parameters to store with the run project (str, optional): The project to push metrics to entity (str, optional): The entity to push metrics to dir (str, optional): An absolute path to a directory where metadata will be stored group (str, optional): A unique string shared by all runs in a given group tags (list, optional): A list of tags to apply to the run id (str, optional): A globally unique (per project) identifier for the run name (str, optional): A display name which does not have to be unique notes (str, optional): A multiline string associated with the run reinit (bool, optional): Allow multiple calls to init in the same process resume (bool, str, optional): Automatically resume this run if run from the same machine, you can also pass a unique run_id sync_tensorboard (bool, optional): Synchronize wandb logs to tensorboard or tensorboardX force (bool, optional): Force authentication with wandb, defaults to False magic (bool, dict, or str, optional): magic configuration as bool, dict, json string, yaml filename Returns: A wandb.run object for metric and config logging. """ trigger.call('on_init', **locals()) global run global __stage_dir__ # We allow re-initialization when we're in Jupyter or explicity opt-in to it. in_jupyter = _get_python_type() != "python" if reinit or (in_jupyter and reinit != False): reset_env(exclude=env.immutable_keys()) run = None # TODO: deprecate tensorboard if tensorboard or sync_tensorboard and len(patched["tensorboard"]) == 0: util.get_module("wandb.tensorboard").patch() sagemaker_config = util.parse_sm_config() tf_config = util.parse_tfjob_config() if group == None: group = os.getenv(env.RUN_GROUP) if job_type == None: job_type = os.getenv(env.JOB_TYPE) if sagemaker_config: # Set run_id and potentially grouping if we're in SageMaker run_id = os.getenv('TRAINING_JOB_NAME') if run_id: os.environ[env.RUN_ID] = '-'.join( [run_id, os.getenv('CURRENT_HOST', socket.gethostname())]) conf = json.load(open("/opt/ml/input/config/resourceconfig.json")) if group == None and len(conf["hosts"]) > 1: group = os.getenv('TRAINING_JOB_NAME') # Set secret variables if os.path.exists("secrets.env"): for line in open("secrets.env", "r"): key, val = line.strip().split('=', 1) os.environ[key] = val elif tf_config: cluster = tf_config.get('cluster') job_name = tf_config.get('task', {}).get('type') task_index = tf_config.get('task', {}).get('index') if job_name is not None and task_index is not None: # TODO: set run_id for resuming? run_id = cluster[job_name][task_index].rsplit(":")[0] if job_type == None: job_type = job_name if group == None and len(cluster.get("worker", [])) > 0: group = cluster[job_name][0].rsplit("-" + job_name, 1)[0] image = util.image_id_from_k8s() if image: os.environ[env.DOCKER] = image if project: os.environ[env.PROJECT] = project if entity: os.environ[env.ENTITY] = entity if group: os.environ[env.RUN_GROUP] = group if job_type: os.environ[env.JOB_TYPE] = job_type if tags: os.environ[env.TAGS] = ",".join(tags) if id: os.environ[env.RUN_ID] = id if name is None: # We do this because of https://github.com/wandb/core/issues/2170 # to ensure that the run's name is explicitly set to match its # id. If we don't do this and the id is eight characters long, the # backend will set the name to a generated human-friendly value. # # In any case, if the user is explicitly setting `id` but not # `name`, their id is probably a meaningful string that we can # use to label the run. name = os.environ.get( env.NAME, id) # environment variable takes precedence over this. if name: os.environ[env.NAME] = name if notes: os.environ[env.NOTES] = notes if magic is not None and magic is not False: if isinstance(magic, dict): os.environ[env.MAGIC] = json.dumps(magic) elif isinstance(magic, str): os.environ[env.MAGIC] = magic elif isinstance(magic, bool): pass else: termwarn("wandb.init called with invalid magic parameter type", repeat=False) from wandb import magic_impl magic_impl.magic_install() if dir: os.environ[env.DIR] = dir util.mkdir_exists_ok(wandb_dir()) if allow_anonymous: os.environ[env.ALLOW_ANONYMOUS] = str(allow_anonymous).lower() resume_path = os.path.join(wandb_dir(), wandb_run.RESUME_FNAME) if resume == True: os.environ[env.RESUME] = "auto" elif resume: os.environ[env.RESUME] = os.environ.get(env.RESUME, "allow") # TODO: remove allowing resume as a string in the future os.environ[env.RUN_ID] = id or resume elif os.path.exists(resume_path): os.remove(resume_path) if os.environ.get(env.RESUME) == 'auto' and os.path.exists(resume_path): if not os.environ.get(env.RUN_ID): os.environ[env.RUN_ID] = json.load(open(resume_path))["run_id"] # the following line is useful to ensure that no W&B logging happens in the user # process that might interfere with what they do # logging.basicConfig(format='user process %(asctime)s - %(name)s - %(levelname)s - %(message)s') # If a thread calls wandb.init() it will get the same Run object as # the parent. If a child process with distinct memory space calls # wandb.init(), it won't get an error, but it will get a result of # None. # This check ensures that a child process can safely call wandb.init() # after a parent has (only the parent will create the Run object). # This doesn't protect against the case where the parent doesn't call # wandb.init but two children do. if run or os.getenv(env.INITED): return run if __stage_dir__ is None: __stage_dir__ = "wandb" util.mkdir_exists_ok(wandb_dir()) try: signal.signal(signal.SIGQUIT, _debugger) except AttributeError: pass try: run = wandb_run.Run.from_environment_or_defaults() except IOError as e: termerror('Failed to create run directory: {}'.format(e)) raise LaunchError("Could not write to filesystem.") run.set_environment() def set_global_config(run): global config # because we already have a local config config = run.config set_global_config(run) global summary summary = run.summary # set this immediately after setting the run and the config. if there is an # exception after this it'll probably break the user script anyway os.environ[env.INITED] = '1' # we do these checks after setting the run and the config because users scripts # may depend on those things if sys.platform == 'win32' and run.mode != 'clirun': termerror( 'To use wandb on Windows, you need to run the command "wandb run python <your_train_script>.py"' ) return run if in_jupyter: _init_jupyter(run) elif run.mode == 'clirun': pass elif run.mode == 'run': api = InternalApi() # let init_jupyter handle this itself if not in_jupyter and not api.api_key: termlog( "W&B is a tool that helps track and visualize machine learning experiments" ) if force: termerror( "No credentials found. Run \"wandb login\" or \"wandb off\" to disable wandb" ) else: if util.prompt_api_key(api): _init_headless(run) else: termlog( "No credentials found. Run \"wandb login\" to visualize your metrics" ) run.mode = "dryrun" _init_headless(run, False) else: _init_headless(run) elif run.mode == 'dryrun': termlog('Dry run mode, not syncing to the cloud.') _init_headless(run, False) else: termerror('Invalid run mode "%s". Please unset WANDB_MODE.' % run.mode) raise LaunchError("The WANDB_MODE environment variable is invalid.") # set the run directory in the config so it actually gets persisted run.config.set_run_dir(run.dir) if sagemaker_config: run.config.update(sagemaker_config) allow_val_change = True if config: run.config.update(config, allow_val_change=allow_val_change) # Access history to ensure resumed is set when resuming run.history # Load the summary to support resuming run.summary.load() atexit.register(run.close_files) return run