示例#1
0
def jupyter_login(force=True, api=None):
    """Attempt to login from a jupyter environment

    If force=False, we'll only attempt to auto-login, otherwise we'll prompt the user
    """
    def get_api_key_from_browser():
        key, anonymous = None, False
        if 'google.colab' in sys.modules:
            key = jupyter.attempt_colab_login(api.app_url)
        elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules:
            # Databricks does not seem to support getpass() so we need to fail
            # early and prompt the user to configure the key manually for now.
            termerror(
                "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks"
            )
            raise LaunchError(
                "Databricks integration requires api_key to be configured.")
        if not key and os.environ.get(env.ALLOW_ANONYMOUS) == "true":
            key = api.create_anonymous_api_key()
            anonymous = True
        if not key and force:
            termerror(
                "Not authenticated.  Copy a key from https://app.wandb.ai/authorize"
            )
            key = getpass.getpass("API Key: ").strip()
        return key, anonymous

    api = api or (run.api if run else None)
    if not api:
        raise LaunchError("Internal error: api required for jupyter login")
    return util.prompt_api_key(api, browser_callback=get_api_key_from_browser)
示例#2
0
 def get_api_key_from_browser(signup=False):
     key, anonymous = None, False
     if 'google.colab' in sys.modules:
         key = jupyter.attempt_colab_login(api.app_url)
     elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules:
         # Databricks does not seem to support getpass() so we need to fail
         # early and prompt the user to configure the key manually for now.
         termerror(
             "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks"
         )
         raise LaunchError(
             "Databricks integration requires api_key to be configured.")
     # For jupyter we default to not allowing anonymous
     if not key and os.environ.get(env.ANONYMOUS, "never") != "never":
         key = api.create_anonymous_api_key()
         anonymous = True
     if not key and force:
         try:
             termerror(
                 "Not authenticated.  Copy a key from https://app.wandb.ai/authorize"
             )
             key = getpass.getpass("API Key: ").strip()
         except NotImplementedError:
             termerror(
                 "Can't accept input in this environment, you should set WANDB_API_KEY or call wandb.login(key='YOUR_API_KEY')"
             )
     return key, anonymous
示例#3
0
 def get_api_key_from_browser():
     key, anonymous = None, False
     if 'google.colab' in sys.modules:
         key = jupyter.attempt_colab_login(api.app_url)
     elif 'databricks_cli' in sys.modules and 'dbutils' in sys.modules:
         # Databricks does not seem to support getpass() so we need to fail
         # early and prompt the user to configure the key manually for now.
         termerror(
             "Databricks requires api_key to be configured manually, instructions at: http://docs.wandb.com/integrations/databricks"
         )
         raise LaunchError(
             "Databricks integration requires api_key to be configured.")
     if not key and os.environ.get(env.ALLOW_ANONYMOUS) == "true":
         key = api.create_anonymous_api_key()
         anonymous = True
     if not key and force:
         termerror(
             "Not authenticated.  Copy a key from https://app.wandb.ai/authorize"
         )
         key = getpass.getpass("API Key: ").strip()
     return key, anonymous
示例#4
0
def init(job_type=None, dir=None, config=None, project=None, entity=None, reinit=None, tags=None,
         group=None, allow_val_change=False, resume=False, force=False, tensorboard=False,
         sync_tensorboard=False, name=None, notes=None, id=None, magic=None):
    """Initialize W&B

    If called from within Jupyter, initializes a new run and waits for a call to
    `wandb.log` to begin pushing metrics.  Otherwise, spawns a new process
    to communicate with W&B.

    Args:
        job_type (str, optional): The type of job running, defaults to 'train'
        config (dict, argparse, or tf.FLAGS, optional): The hyper parameters to store with the run
        project (str, optional): The project to push metrics to
        entity (str, optional): The entity to push metrics to
        dir (str, optional): An absolute path to a directory where metadata will be stored
        group (str, optional): A unique string shared by all runs in a given group
        tags (list, optional): A list of tags to apply to the run
        id (str, optional): A globally unique (per project) identifier for the run
        name (str, optional): A display name which does not have to be unique
        notes (str, optional): A multiline string associated with the run
        reinit (bool, optional): Allow multiple calls to init in the same process
        resume (bool, str, optional): Automatically resume this run if run from the same machine,
            you can also pass a unique run_id
        sync_tensorboard (bool, optional): Synchronize wandb logs to tensorboard or tensorboardX
        force (bool, optional): Force authentication with wandb, defaults to False
        magic (bool, dict, or str, optional): magic configuration as bool, dict, json string,
            yaml filename

    Returns:
        A wandb.run object for metric and config logging.
    """
    trigger.call('on_init', **locals())
    global run
    global __stage_dir__

    # We allow re-initialization when we're in Jupyter or explicity opt-in to it.
    in_jupyter = _get_python_type() != "python"
    if reinit or (in_jupyter and reinit != False):
        reset_env(exclude=env.immutable_keys())
        run = None

    # TODO: deprecate tensorboard
    if tensorboard or sync_tensorboard and len(patched["tensorboard"]) == 0:
        util.get_module("wandb.tensorboard").patch()

    sagemaker_config = util.parse_sm_config()
    tf_config = util.parse_tfjob_config()
    if group == None:
        group = os.getenv(env.RUN_GROUP)
    if job_type == None:
        job_type = os.getenv(env.JOB_TYPE)
    if sagemaker_config:
        # Set run_id and potentially grouping if we're in SageMaker
        run_id = os.getenv('TRAINING_JOB_NAME')
        if run_id:
            os.environ[env.RUN_ID] = '-'.join([
                run_id,
                os.getenv('CURRENT_HOST', socket.gethostname())])
        conf = json.load(
            open("/opt/ml/input/config/resourceconfig.json"))
        if group == None and len(conf["hosts"]) > 1:
            group = os.getenv('TRAINING_JOB_NAME')
        # Set secret variables
        if os.path.exists("secrets.env"):
            for line in open("secrets.env", "r"):
                key, val = line.strip().split('=', 1)
                os.environ[key] = val
    elif tf_config:
        cluster = tf_config.get('cluster')
        job_name = tf_config.get('task', {}).get('type')
        task_index = tf_config.get('task', {}).get('index')
        if job_name is not None and task_index is not None:
            # TODO: set run_id for resuming?
            run_id = cluster[job_name][task_index].rsplit(":")[0]
            if job_type == None:
                job_type = job_name
            if group == None and len(cluster.get("worker", [])) > 0:
                group = cluster[job_name][0].rsplit("-"+job_name, 1)[0]
    image = util.image_id_from_k8s()
    if image:
        os.environ[env.DOCKER] = image
    if project:
        os.environ[env.PROJECT] = project
    if entity:
        os.environ[env.ENTITY] = entity
    if group:
        os.environ[env.RUN_GROUP] = group
    if job_type:
        os.environ[env.JOB_TYPE] = job_type
    if tags:
        os.environ[env.TAGS] = ",".join(tags)
    if id:
        os.environ[env.RUN_ID] = id
        if name is None:
            # We do this because of https://github.com/wandb/core/issues/2170
            # to ensure that the run's name is explicitly set to match its
            # id. If we don't do this and the id is eight characters long, the
            # backend will set the name to a generated human-friendly value.
            #
            # In any case, if the user is explicitly setting `id` but not
            # `name`, their id is probably a meaningful string that we can
            # use to label the run.
            name = os.environ.get(env.NAME, id)  # environment variable takes precedence over this.
    if name:
        os.environ[env.NAME] = name
    if notes:
        os.environ[env.NOTES] = notes
    if magic is not None and magic is not False:
        if isinstance(magic, dict):
            os.environ[env.MAGIC] = json.dumps(magic)
        elif isinstance(magic, str):
            os.environ[env.MAGIC] = magic
        elif isinstance(magic, bool):
            pass
        else:
            termwarn("wandb.init called with invalid magic parameter type", repeat=False)
        from wandb import magic_impl
        magic_impl.magic_install()
    if dir:
        os.environ[env.DIR] = dir
        util.mkdir_exists_ok(wandb_dir())
    resume_path = os.path.join(wandb_dir(), wandb_run.RESUME_FNAME)
    if resume == True:
        os.environ[env.RESUME] = "auto"
    elif resume:
        os.environ[env.RESUME] = os.environ.get(env.RESUME, "allow")
        # TODO: remove allowing resume as a string in the future
        os.environ[env.RUN_ID] = id or resume
    elif os.path.exists(resume_path):
        os.remove(resume_path)
    if os.environ.get(env.RESUME) == 'auto' and os.path.exists(resume_path):
        if not os.environ.get(env.RUN_ID):
            os.environ[env.RUN_ID] = json.load(open(resume_path))["run_id"]

    # the following line is useful to ensure that no W&B logging happens in the user
    # process that might interfere with what they do
    # logging.basicConfig(format='user process %(asctime)s - %(name)s - %(levelname)s - %(message)s')

    # If a thread calls wandb.init() it will get the same Run object as
    # the parent. If a child process with distinct memory space calls
    # wandb.init(), it won't get an error, but it will get a result of
    # None.
    # This check ensures that a child process can safely call wandb.init()
    # after a parent has (only the parent will create the Run object).
    # This doesn't protect against the case where the parent doesn't call
    # wandb.init but two children do.
    if run or os.getenv(env.INITED):
        return run

    if __stage_dir__ is None:
        __stage_dir__ = "wandb"
        util.mkdir_exists_ok(wandb_dir())

    try:
        signal.signal(signal.SIGQUIT, _debugger)
    except AttributeError:
        pass

    try:
        run = wandb_run.Run.from_environment_or_defaults()
    except IOError as e:
        termerror('Failed to create run directory: {}'.format(e))
        raise LaunchError("Could not write to filesystem.")

    run.set_environment()

    def set_global_config(run):
        global config  # because we already have a local config
        config = run.config
    set_global_config(run)
    global summary
    summary = run.summary

    # set this immediately after setting the run and the config. if there is an
    # exception after this it'll probably break the user script anyway
    os.environ[env.INITED] = '1'

    # we do these checks after setting the run and the config because users scripts
    # may depend on those things
    if sys.platform == 'win32' and run.mode != 'clirun':
        termerror(
            'To use wandb on Windows, you need to run the command "wandb run python <your_train_script>.py"')
        return run

    if in_jupyter:
        _init_jupyter(run)
    elif run.mode == 'clirun':
        pass
    elif run.mode == 'run':
        api = InternalApi()
        # let init_jupyter handle this itself
        if not in_jupyter and not api.api_key:
            termlog(
                "W&B is a tool that helps track and visualize machine learning experiments")
            if force:
                termerror(
                    "No credentials found.  Run \"wandb login\" or \"wandb off\" to disable wandb")
            else:
                if run.check_anonymous():
                    _init_headless(run)
                else:
                    termlog(
                        "No credentials found.  Run \"wandb login\" to visualize your metrics")
                    run.mode = "dryrun"
                    _init_headless(run, False)
        else:
            _init_headless(run)
    elif run.mode == 'dryrun':
        termlog(
            'Dry run mode, not syncing to the cloud.')
        _init_headless(run, False)
    else:
        termerror(
            'Invalid run mode "%s". Please unset WANDB_MODE.' % run.mode)
        raise LaunchError("The WANDB_MODE environment variable is invalid.")

    # set the run directory in the config so it actually gets persisted
    run.config.set_run_dir(run.dir)

    if sagemaker_config:
        run.config.update(sagemaker_config)
        allow_val_change = True
    if config:
        run.config.update(config, allow_val_change=allow_val_change)

    # Access history to ensure resumed is set when resuming
    run.history
    # Load the summary to support resuming
    run.summary.load()

    atexit.register(run.close_files)

    return run
示例#5
0
def _init_headless(run, cloud=True):
    global join
    global _user_process_finished_called

    environ = dict(os.environ)
    run.set_environment(environ)

    server = wandb_socket.Server()
    run.socket = server
    hooks = ExitHooks()
    hooks.hook()

    if sys.platform == "win32":
        # PTYs don't work in windows so we use pipes.
        stdout_master_fd, stdout_slave_fd = os.pipe()
        stderr_master_fd, stderr_slave_fd = os.pipe()
    else:
        stdout_master_fd, stdout_slave_fd = io_wrap.wandb_pty(resize=False)
        stderr_master_fd, stderr_slave_fd = io_wrap.wandb_pty(resize=False)

    headless_args = {
        'command': 'headless',
        'pid': os.getpid(),
        'stdout_master_fd': stdout_master_fd,
        'stderr_master_fd': stderr_master_fd,
        'cloud': cloud,
        'port': server.port
    }
    internal_cli_path = os.path.join(
        os.path.dirname(__file__), 'internal_cli.py')

    if six.PY2:
        # TODO(adrian): close_fds=False is bad for security. we set
        # it so we can pass the PTY FDs to the wandb process. We
        # should use subprocess32, which has pass_fds.
        popen_kwargs = {'close_fds': False}
    else:
        popen_kwargs = {'pass_fds': [stdout_master_fd, stderr_master_fd]}

    # TODO(adrian): ensure we use *exactly* the same python interpreter
    # TODO(adrian): make wandb the foreground process so we don't give
    # up terminal control until syncing is finished.
    # https://stackoverflow.com/questions/30476971/is-the-child-process-in-foreground-or-background-on-fork-in-c
    wandb_process = subprocess.Popen([sys.executable, internal_cli_path, json.dumps(
        headless_args)], env=environ, **popen_kwargs)
    termlog('Started W&B process version {} with PID {}'.format(
        __version__, wandb_process.pid))
    os.close(stdout_master_fd)
    os.close(stderr_master_fd)
    # Listen on the socket waiting for the wandb process to be ready
    try:
        success, message = server.listen(30)
    except KeyboardInterrupt:
        success = False
    else:
        if not success:
            termerror('W&B process (PID {}) did not respond'.format(
                wandb_process.pid))
    if not success:
        wandb_process.kill()
        for i in range(20):
            time.sleep(0.1)
            if wandb_process.poll() is not None:
                break
        if wandb_process.poll() is None:
            termerror('Failed to kill wandb process, PID {}'.format(
                wandb_process.pid))
        # TODO attempt to upload a debug log
        path = GLOBAL_LOG_FNAME.replace(os.getcwd()+os.sep, "")
        raise LaunchError(
            "W&B process failed to launch, see: {}".format(path))

    stdout_slave = os.fdopen(stdout_slave_fd, 'wb')
    stderr_slave = os.fdopen(stderr_slave_fd, 'wb')

    stdout_redirector = io_wrap.FileRedirector(sys.stdout, stdout_slave)
    stderr_redirector = io_wrap.FileRedirector(sys.stderr, stderr_slave)

    # TODO(adrian): we should register this right after starting the wandb process to
    # make sure we shut down the W&B process eg. if there's an exception in the code
    # above
    atexit.register(_user_process_finished, server, hooks,
                    wandb_process, stdout_redirector, stderr_redirector)

    def _wandb_join():
        _user_process_finished(server, hooks,
                               wandb_process, stdout_redirector, stderr_redirector)
    join = _wandb_join
    _user_process_finished_called = False

    # redirect output last of all so we don't miss out on error messages
    stdout_redirector.redirect()
    if not env.is_debug():
        stderr_redirector.redirect()
示例#6
0
def _init_headless(run, cloud=True):
    global join
    global _user_process_finished_called

    program = util.get_program()
    if program:
        os.environ[env.PROGRAM] = os.getenv(env.PROGRAM) or program

    environ = dict(os.environ)
    run.set_environment(environ)

    server = wandb_socket.Server()
    run.socket = server
    hooks = ExitHooks()
    hooks.hook()

    if platform.system() == "Windows":
        try:
            import win32api
            # Make sure we are not ignoring CTRL_C_EVENT
            # https://docs.microsoft.com/en-us/windows/console/setconsolectrlhandler
            # https://stackoverflow.com/questions/1364173/stopping-python-using-ctrlc
            win32api.SetConsoleCtrlHandler(None, False)
        except ImportError:
            termerror(
                "Install the win32api library with `pip install pypiwin32`")

        # PTYs don't work in windows so we create these unused pipes and
        # mirror stdout to run.dir/output.log.  There should be a way to make
        # pipes work, but I haven't figured it out.  See links in compat/windows
        stdout_master_fd, stdout_slave_fd = os.pipe()
        stderr_master_fd, stderr_slave_fd = os.pipe()
    else:
        stdout_master_fd, stdout_slave_fd = io_wrap.wandb_pty(resize=False)
        stderr_master_fd, stderr_slave_fd = io_wrap.wandb_pty(resize=False)

    headless_args = {
        'command': 'headless',
        'pid': os.getpid(),
        'stdout_master_fd': stdout_master_fd,
        'stderr_master_fd': stderr_master_fd,
        'cloud': cloud,
        'port': server.port
    }
    internal_cli_path = os.path.join(os.path.dirname(__file__),
                                     'internal_cli.py')

    if six.PY2 or platform.system() == "Windows":
        # TODO(adrian): close_fds=False is bad for security. we set
        # it so we can pass the PTY FDs to the wandb process. We
        # should use subprocess32, which has pass_fds.
        popen_kwargs = {'close_fds': False}
    else:
        popen_kwargs = {'pass_fds': [stdout_master_fd, stderr_master_fd]}

    # TODO(adrian): ensure we use *exactly* the same python interpreter
    # TODO(adrian): make wandb the foreground process so we don't give
    # up terminal control until syncing is finished.
    # https://stackoverflow.com/questions/30476971/is-the-child-process-in-foreground-or-background-on-fork-in-c
    wandb_process = subprocess.Popen(
        [sys.executable, internal_cli_path,
         json.dumps(headless_args)],
        env=environ,
        **popen_kwargs)
    termlog('Tracking run with wandb version {}'.format(__version__))
    os.close(stdout_master_fd)
    os.close(stderr_master_fd)
    # Listen on the socket waiting for the wandb process to be ready
    try:
        success, _ = server.listen(30)
    except KeyboardInterrupt:
        success = False
    else:
        if not success:
            termerror('W&B process (PID {}) did not respond'.format(
                wandb_process.pid))
    if not success:
        wandb_process.kill()
        for _ in range(20):
            time.sleep(0.1)
            if wandb_process.poll() is not None:
                break
        if wandb_process.poll() is None:
            termerror('Failed to kill wandb process, PID {}'.format(
                wandb_process.pid))
        # TODO attempt to upload a debug log
        path = GLOBAL_LOG_FNAME.replace(os.getcwd() + os.sep, "")
        raise LaunchError("W&B process failed to launch, see: {}".format(path))

    if platform.system() == "Windows":
        output = open(os.path.join(run.dir, "output.log"), "wb")
        stdout_redirector = io_wrap.WindowsRedirector(sys.stdout, output)
        stderr_redirector = io_wrap.WindowsRedirector(sys.stderr, output)
    else:
        stdout_slave = os.fdopen(stdout_slave_fd, 'wb')
        stderr_slave = os.fdopen(stderr_slave_fd, 'wb')
        try:
            stdout_redirector = io_wrap.FileRedirector(sys.stdout,
                                                       stdout_slave)
            stderr_redirector = io_wrap.FileRedirector(sys.stderr,
                                                       stderr_slave)
        except (ValueError, AttributeError):
            # stdout / err aren't files
            output = open(os.path.join(run.dir, "output.log"), "wb")
            stdout_redirector = io_wrap.WindowsRedirector(sys.stdout, output)
            stderr_redirector = io_wrap.WindowsRedirector(sys.stderr, output)

    # TODO(adrian): we should register this right after starting the wandb process to
    # make sure we shut down the W&B process eg. if there's an exception in the code
    # above
    atexit.register(_user_process_finished, server, hooks, wandb_process,
                    stdout_redirector, stderr_redirector)

    def _wandb_join(exit_code=None):
        global _global_run_stack
        shutdown_async_log_thread()
        run.close_files()
        if exit_code is not None:
            hooks.exit_code = exit_code
        _user_process_finished(server, hooks, wandb_process, stdout_redirector,
                               stderr_redirector)
        if len(_global_run_stack) > 0:
            _global_run_stack.pop()

    join = _wandb_join
    _user_process_finished_called = False

    # redirect output last of all so we don't miss out on error messages
    stdout_redirector.redirect()
    if not env.is_debug():
        stderr_redirector.redirect()