示例#1
0
 def listen(self, max_seconds=30):
     """Waits to receive up to two bytes for up to max_seconds"""
     if not self.connection:
         self.connect()
     start = time.time()
     conn, _, err = select([self.connection], [], [self.connection],
                           max_seconds)
     try:
         if len(err) > 0:
             raise socket.error("Couldn't open socket")
         message = b''
         while True:
             if time.time() - start > max_seconds:
                 raise socket.error(
                     "Timeout of %s seconds waiting for W&B process" %
                     max_seconds)
             res = self.connection.recv(1024)
             term = res.find(b'\0')
             if term != -1:
                 message += res[:term]
                 break
             else:
                 message += res
         message = json.loads(message.decode('utf8'))
         if message['status'] == 'done':
             return True, None
         elif message['status'] == 'ready':
             return True, message
         elif message['status'] == 'launch_error':
             return False, None
         else:
             raise socket.error("Invalid status: %s" % message['status'])
     except (socket.error, ValueError) as e:
         util.sentry_exc(e)
         return False, None
 def _thread_except_body(self):
     # TODO: Consolidate with internal_util.ExceptionThread
     try:
         self._thread_body()
     except Exception as e:
         exc_info = sys.exc_info()
         self._exc_info = exc_info
         logger.exception("generic exception in filestream thread")
         util.sentry_exc(exc_info, delay=True)
         raise e
示例#3
0
def wandb_stream_read(fd):
    # print("start reading", file=sys.stderr)
    while True:
        try:
            data = os.read(fd, 200)
        except OSError as e:
            sentry_exc(e)
            # print("problem", e, file=sys.stderr)
            return
        if len(data) == 0:
            break
示例#4
0
def init(
    job_type = None,
    dir=None,
    config = None,
    project = None,
    entity = None,
    reinit = None,
    tags = None,
    group = None,
    name = None,
    notes = None,
    magic = None,
    config_exclude_keys=None,
    config_include_keys=None,
    anonymous = None,
    mode = None,
    allow_val_change = None,
    resume = None,
    force = None,
    tensorboard=None,  # alias for sync_tensorboard
    sync_tensorboard=None,
    monitor_gym=None,
    save_code=None,
    id=None,
    settings = None,
):
    """Initialize W&B
    Spawns a new process to start or resume a run locally and communicate with a
    wandb server. Should be called before any calls to wandb.log.

    Arguments:
        job_type (str, optional): The type of job running, defaults to 'train'
        dir (str, optional): An absolute path to a directory where metadata will
            be stored.
        config (dict, argparse, or absl.flags, str, optional):
            Sets the config parameters (typically hyperparameters) to store with the
            run. See also wandb.config.
            If dict, argparse or absl.flags: will load the key value pairs into
                the runs config object.
            If str: will look for a yaml file that includes config parameters and
                load them into the run's config object.
        project (str, optional): W&B Project.
        entity (str, optional): W&B Entity.
        reinit (bool, optional): Allow multiple calls to init in the same process.
        tags (list, optional): A list of tags to apply to the run.
        group (str, optional): A unique string shared by all runs in a given group.
        name (str, optional): A display name for the run which does not have to be
            unique.
        notes (str, optional): A multiline string associated with the run.
        magic (bool, dict, or str, optional): magic configuration as bool, dict,
            json string, yaml filename.
        config_exclude_keys (list, optional): string keys to exclude storing in W&B
            when specifying config.
        config_include_keys (list, optional): string keys to include storing in W&B
            when specifying config.
        anonymous (str, optional): Can be "allow", "must", or "never". Controls
            whether anonymous logging is allowed.  Defaults to never.
        mode (str, optional): Can be "online", "offline" or "disabled". Defaults to
            online.
        allow_val_change (bool, optional): allow config values to be changed after
            setting. Defaults to true in jupyter and false otherwise.
        resume (bool, str, optional): Sets the resuming behavior. Should be one of:
            "allow", "must", "never", "auto" or None. Defaults to None.
            Cases:
            - "auto" (or True): automatically resume the previous run on the same machine.
                if the previous run crashed, otherwise starts a new run.
            - "allow": if id is set with init(id="UNIQUE_ID") or WANDB_RUN_ID="UNIQUE_ID"
                and it is identical to a previous run, wandb will automatically resume the
                run with the id. Otherwise wandb will start a new run.
            - "never": if id is set with init(id="UNIQUE_ID") or WANDB_RUN_ID="UNIQUE_ID"
                and it is identical to a previous run, wandb will crash.
            - "must": if id is set with init(id="UNIQUE_ID") or WANDB_RUN_ID="UNIQUE_ID"
                and it is identical to a previous run, wandb will automatically resume the
                run with the id. Otherwise wandb will crash.
            - None: never resumes - if a run has a duplicate run_id the previous run is
                overwritten.
            See https://docs.wandb.com/library/advanced/resuming for more detail.
        force (bool, optional): If true, will cause script to crash if user can't or isn't
            logged in to a wandb server.  If false, will cause script to run in offline
            modes if user can't or isn't logged in to a wandb server. Defaults to false.
        sync_tensorboard (bool, optional): Synchronize wandb logs from tensorboard or
            tensorboardX and saves the relevant events file. Defaults to false.
        monitor_gym: (bool, optional): automatically logs videos of environment when
            using OpenAI Gym (see https://docs.wandb.com/library/integrations/openai-gym)
            Defaults to false.
        save_code (bool, optional): Save the entrypoint or jupyter session history
            source code.
        id (str, optional): A globally unique (per project) identifier for the run. This
            is primarily used for resuming.

    Examples:
        Basic usage
        ```
        wandb.init()
        ```

        Launch multiple runs from the same script
        ```
        for x in range(10):
            with wandb.init(project="my-projo") as run:
                for y in range(100):
                    run.log({"metric": x+y})
        ```

    Raises:
        Exception: if problem.

    Returns:
        A `Run` object.
    """
    assert not wandb._IS_INTERNAL_PROCESS
    kwargs = dict(locals())
    error_seen = None
    except_exit = None
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        except_exit = wi.settings._except_exit
        try:
            run = wi.init()
            except_exit = wi.settings._except_exit
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            if not (
                wandb.wandb_agent._is_running() and isinstance(e, KeyboardInterrupt)
            ):
                getcaller()
            assert logger
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            # TODO(jhr): figure out how to make this RunDummy
            run = None
    except UsageError:
        raise
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interrupted", exc_info=e)
        raise e
    except Exception as e:
        error_seen = e
        traceback.print_exc()
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        # reraise(*sys.exc_info())
        # six.raise_from(Exception("problem"), e)
    finally:
        if error_seen:
            wandb.termerror("Abnormal program exit")
            if except_exit:
                os._exit(-1)
            six.raise_from(Exception("problem"), error_seen)
    return run
示例#5
0
def init(
    job_type = None,
    dir=None,
    config = None,
    project = None,
    entity = None,
    reinit = None,
    tags = None,
    group = None,
    name = None,
    notes = None,
    magic = None,
    config_exclude_keys=None,
    config_include_keys=None,
    anonymous = None,
    mode = None,
    allow_val_change = None,
    resume = None,
    force = None,
    tensorboard=None,  # alias for sync_tensorboard
    sync_tensorboard=None,
    monitor_gym=None,
    save_code=None,
    id=None,
    settings = None,
):
    """
    Start a new tracked run with `wandb.init()`.

    In an ML training pipeline, you could add `wandb.init()`
    to the beginning of your training script as well as your evaluation
    script, and each piece would be tracked as a run in W&B.

    `wandb.init()` spawns a new background process to log data to a run, and it
    also syncs data to wandb.ai by default so you can see live visualizations.
    Call `wandb.init()` to start a run before logging data with `wandb.log()`.

    `wandb.init()` returns a run object, and you can also access the run object
    with wandb.run.

    Arguments:
        project: (str, optional) The name of the project where you're sending
            the new run. If the project is not specified, the run is put in an
            "Uncategorized" project.
        entity: (str, optional) An entity is a username or team name where
            you're sending runs. This entity must exist before you can send runs
            there, so make sure to create your account or team in the UI before
            starting to log runs.
            If you don't specify an entity, the run will be sent to your default
            entity, which is usually your username. Change your default entity
            in [Settings](wandb.ai/settings) under "default location to create
            new projects".
        config: (dict, argparse, absl.flags, str, optional)
            This sets wandb.config, a dictionary-like object for saving inputs
            to your job, like hyperparameters for a model or settings for a data
            preprocessing job. The config will show up in a table in the UI that
            you can use to group, filter, and sort runs. Keys should not contain
            `.` in their names, and values should be under 10 MB.
            If dict, argparse or absl.flags: will load the key value pairs into
                the wandb.config object.
            If str: will look for a yaml file by that name, and load config from
                that file into the wandb.config object.
        save_code: (bool, optional) Turn this on to save the main script or
            notebook to W&B. This is valuable for improving experiment
            reproducibility and to diff code across experiments in the UI. By
            default this is off, but you can flip the default behavior to "on"
            in [Settings](wandb.ai/settings).
        group: (str, optional) Specify a group to organize individual runs into
            a larger experiment. For example, you might be doing cross
            validation, or you might have multiple jobs that train and evaluate
            a model against different test sets. Group gives you a way to
            organize runs together into a larger whole, and you can toggle this
            on and off in the UI. For more details, see
            [Grouping](docs.wandb.com/library/grouping).
        job_type: (str, optional) Specify the type of run, which is useful when
            you're grouping runs together into larger experiments using group.
            For example, you might have multiple jobs in a group, with job types
            like train and eval. Setting this makes it easy to filter and group
            similar runs together in the UI so you can compare apples to apples.
        tags: (list, optional) A list of strings, which will populate the list
            of tags on this run in the UI. Tags are useful for organizing runs
            together, or applying temporary labels like "baseline" or
            "production". It's easy to add and remove tags in the UI, or filter
            down to just runs with a specific tag.
        name: (str, optional) A short display name for this run, which is how
            you'll identify this run in the UI. By default we generate a random
            two-word name that lets you easily cross-reference runs from the
            table to charts. Keeping these run names short makes the chart
            legends and tables easier to read. If you're looking for a place to
            save your hyperparameters, we recommend saving those in config.
        notes: (str, optional) A longer description of the run, like a -m commit
            message in git. This helps you remember what you were doing when you
            ran this run.
        dir: (str, optional) An absolute path to a directory where metadata will
            be stored. When you call download() on an artifact, this is the
            directory where downloaded files will be saved. By default this is
            the ./wandb directory.
        resume (bool, str, optional): Sets the resuming behavior. Options:
            "allow", "must", "never", "auto" or None. Defaults to None.
            Cases:
            - None (default): If the new run has the same ID as a previous run,
                this run overwrites that data.
            - "auto" (or True): if the preivous run on this machine crashed,
                automatically resume it. Otherwise, start a new run.
            - "allow": if id is set with init(id="UNIQUE_ID") or
                WANDB_RUN_ID="UNIQUE_ID" and it is identical to a previous run,
                wandb will automatically resume the run with that id. Otherwise,
                wandb will start a new run.
            - "never": if id is set with init(id="UNIQUE_ID") or
                WANDB_RUN_ID="UNIQUE_ID" and it is identical to a previous run,
                wandb will crash.
            - "must": if id is set with init(id="UNIQUE_ID") or
                WANDB_RUN_ID="UNIQUE_ID" and it is identical to a previous run,
                wandb will automatically resume the run with the id. Otherwise
                wandb will crash.
            See https://docs.wandb.com/library/advanced/resuming for more.
        reinit: (bool, optional) Allow multiple wandb.init() calls in the same
            process. (default: False)
        magic: (bool, dict, or str, optional) The bool controls whether we try to
            auto-instrument your script, capturing basic details of your run
            without you having to add more wandb code. (default: False)
            You can also pass a dict, json string, or yaml filename.
        config_exclude_keys: (list, optional) string keys to exclude from
            `wandb.config`.
        config_include_keys: (list, optional) string keys to include in
            wandb.config.
        anonymous: (str, optional) Controls anonymous data logging. Options:
            - "never" (default): requires you to link your W&B account before
                tracking the run so you don't accidentally create an anonymous
                run.
            - "allow": lets a logged-in user track runs with their account, but
                lets someone who is running the script without a W&B account see
                the charts in the UI.
            - "must": sends the run to an anonymous account instead of to a
                signed-up user account.
        mode: (str, optional) Can be "online", "offline" or "disabled". Defaults to
            online.
        allow_val_change: (bool, optional) Whether to allow config values to
            change after setting the keys once. By default we throw an exception
            if a config value is overwritten. If you want to track something
            like a varying learning_rate at multiple times during training, use
            wandb.log() instead. (default: False in scripts, True in Jupyter)
        force: (bool, optional) If True, this crashes the script if a user isn't
            logged in to W&B. If False, this will let the script run in offline
            mode if a user isn't logged in to W&B. (default: False)
        sync_tensorboard: (bool, optional) Synchronize wandb logs from tensorboard or
            tensorboardX and saves the relevant events file. (default: False)
        monitor_gym: (bool, optional) automatically logs videos of environment when
            using OpenAI Gym. (default: False)
            See https://docs.wandb.com/library/integrations/openai-gym
        id: (str, optional) A unique ID for this run, used for Resuming. It must
            be unique in the project, and if you delete a run you can't reuse
            the ID. Use the name field for a short descriptive name, or config
            for saving hyperparameters to compare across runs. The ID cannot
            contain special characters.
            See https://docs.wandb.com/library/resuming


    Examples:
        Basic usage
        ```
        wandb.init()
        ```

        Launch multiple runs from the same script
        ```
        for x in range(10):
            with wandb.init(project="my-projo") as run:
                for y in range(100):
                    run.log({"metric": x+y})
        ```

    Raises:
        Exception: if problem.

    Returns:
        A `Run` object.
    """
    wandb._assert_is_user_process()
    kwargs = dict(locals())
    error_seen = None
    except_exit = None
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        except_exit = wi.settings._except_exit
        try:
            run = wi.init()
            except_exit = wi.settings._except_exit
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            if not (
                wandb.wandb_agent._is_running() and isinstance(e, KeyboardInterrupt)
            ):
                getcaller()
            assert logger
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            # TODO(jhr): figure out how to make this RunDummy
            run = None
    except UsageError:
        raise
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interrupted", exc_info=e)
        raise e
    except Exception as e:
        error_seen = e
        traceback.print_exc()
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        # reraise(*sys.exc_info())
        # six.raise_from(Exception("problem"), e)
    finally:
        if error_seen:
            wandb.termerror("Abnormal program exit")
            if except_exit:
                os._exit(-1)
            six.raise_from(Exception("problem"), error_seen)
    return run
示例#6
0
文件: internal.py 项目: nbardy/client
def wandb_internal(
    settings,
    record_q,
    result_q,
):
    """Internal process function entrypoint.

    Read from record queue and dispatch work to various threads.

    Arguments:
        settings: dictionary of configuration parameters.
        record_q: records to be handled
        result_q: for sending results back

    """
    # mark this process as internal
    wandb._set_internal_process()
    started = time.time()

    # register the exit handler only when wandb_internal is called, not on import
    @atexit.register
    def handle_exit(*args):
        logger.info("Internal process exited")

    # Lets make sure we dont modify settings so use a static object
    _settings = settings_static.SettingsStatic(settings)
    if _settings.log_internal:
        configure_logging(_settings.log_internal, _settings._log_level)

    parent_pid = os.getppid()
    pid = os.getpid()

    logger.info(
        "W&B internal server running at pid: %s, started at: %s",
        pid,
        datetime.fromtimestamp(started),
    )

    publish_interface = interface.BackendSender(record_q=record_q)

    stopped = threading.Event()
    threads = []

    send_record_q = queue.Queue()
    record_sender_thread = SenderThread(
        settings=_settings,
        record_q=send_record_q,
        result_q=result_q,
        stopped=stopped,
        interface=publish_interface,
        debounce_interval_ms=30000,
    )
    threads.append(record_sender_thread)

    write_record_q = queue.Queue()
    record_writer_thread = WriterThread(
        settings=_settings,
        record_q=write_record_q,
        result_q=result_q,
        stopped=stopped,
        writer_q=write_record_q,
    )
    threads.append(record_writer_thread)

    record_handler_thread = HandlerThread(
        settings=_settings,
        record_q=record_q,
        result_q=result_q,
        stopped=stopped,
        sender_q=send_record_q,
        writer_q=write_record_q,
        interface=publish_interface,
    )
    threads.append(record_handler_thread)

    process_check = ProcessCheck(settings=_settings, pid=parent_pid)

    for thread in threads:
        thread.start()

    interrupt_count = 0
    while not stopped.is_set():
        try:
            # wait for stop event
            while not stopped.is_set():
                time.sleep(1)
                if process_check.is_dead():
                    logger.error("Internal process shutdown.")
                    stopped.set()
        except KeyboardInterrupt:
            interrupt_count += 1
            logger.warning(
                "Internal process interrupt: {}".format(interrupt_count))
        finally:
            if interrupt_count >= 2:
                logger.error("Internal process interrupted.")
                stopped.set()

    for thread in threads:
        thread.join()

    for thread in threads:
        exc_info = thread.get_exception()
        if exc_info:
            logger.error("Thread {}:".format(thread.name), exc_info=exc_info)
            print("Thread {}:".format(thread.name), file=sys.stderr)
            traceback.print_exception(*exc_info)
            sentry_exc(exc_info, delay=True)
            wandb.termerror("Internal wandb error: file data was not synced")
            sys.exit(-1)
示例#7
0
def wandb_internal(settings, record_q, result_q):
    """Internal process function entrypoint.

    Read from record queue and dispatch work to various threads.

    Args:
        settings: dictionary of configuration parameters.
        record_q: records to be handled
        result_q: for sending results back

    """
    # mark this process as internal
    wandb._IS_INTERNAL_PROCESS = True

    # Lets make sure we dont modify settings so use a static object
    settings = settings_static.SettingsStatic(settings)
    if settings.log_internal:
        configure_logging(settings.log_internal, settings._log_level)

    parent_pid = os.getppid()
    pid = os.getpid()

    logger.info("W&B internal server running at pid: %s", pid)

    publish_interface = interface.BackendSender(record_q=record_q)

    stopped = threading.Event()
    send_record_q = queue.Queue()
    record_sender_thread = SenderThread(
        settings=settings,
        record_q=send_record_q,
        result_q=result_q,
        stopped=stopped,
        interface=publish_interface,
    )
    threads = [record_sender_thread]
    write_record_q = queue.Queue()
    record_writer_thread = WriterThread(
        settings=settings,
        record_q=write_record_q,
        result_q=result_q,
        stopped=stopped,
        writer_q=write_record_q,
    )
    threads.append(record_writer_thread)

    record_handler_thread = HandlerThread(
        settings=settings,
        record_q=record_q,
        result_q=result_q,
        stopped=stopped,
        sender_q=send_record_q,
        writer_q=write_record_q,
        interface=publish_interface,
    )
    threads.append(record_handler_thread)

    process_check = ProcessCheck(settings=settings, pid=parent_pid)

    for thread in threads:
        thread.start()

    interrupt_count = 0
    while not stopped.isSet():
        try:
            # wait for stop event
            while not stopped.isSet():
                time.sleep(1)
                if process_check.is_dead():
                    logger.error("Internal process shutdown.")
                    stopped.set()
        except KeyboardInterrupt:
            interrupt_count += 1
            logger.warning(
                "Internal process interrupt: {}".format(interrupt_count))
        finally:
            if interrupt_count >= 2:
                logger.error("Internal process interrupted.")
                stopped.set()

    for thread in threads:
        thread.join()

    for thread in threads:
        if exc_info := thread.get_exception():
            logger.error("Thread {}:".format(thread.name), exc_info=exc_info)
            print("Thread {}:".format(thread.name), file=sys.stderr)
            traceback.print_exception(*exc_info)
            sentry_exc(exc_info, delay=True)
            sys.exit(-1)
示例#8
0
def init(
    job_type = None,
    dir=None,
    config = None,  # TODO(jhr): type is a union for argparse/absl
    project = None,
    entity = None,
    reinit = None,
    tags = None,
    group = None,
    name = None,
    notes = None,
    magic = None,  # TODO(jhr): type is union
    config_exclude_keys=None,
    config_include_keys=None,
    anonymous = None,
    mode = None,
    allow_val_change = None,
    resume = None,
    force = None,
    tensorboard=None,  # alias for sync_tensorboard
    sync_tensorboard=None,
    monitor_gym=None,
    save_code=None,
    id=None,
    settings = None,
):
    """Initialize a wandb Run.

    Args:
        entity: alias for team.
        team: personal user or team to use for Run.
        project: project name for the Run.

    Raises:
        Exception: if problem.

    Returns:
        wandb Run object

    """
    assert not wandb._IS_INTERNAL_PROCESS
    kwargs = dict(locals())
    error_seen = None
    except_exit = None
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        except_exit = wi.settings._except_exit
        try:
            run = wi.init()
            except_exit = wi.settings._except_exit
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            if not (
                wandb.wandb_agent._is_running() and isinstance(e, KeyboardInterrupt)
            ):
                getcaller()
            assert logger
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            # TODO(jhr): figure out how to make this RunDummy
            run = None
    except UsageError:
        raise
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interrupted", exc_info=e)
        raise e
    except Exception as e:
        error_seen = e
        traceback.print_exc()
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        # reraise(*sys.exc_info())
        # six.raise_from(Exception("problem"), e)
    finally:
        if error_seen:
            wandb.termerror("Abnormal program exit")
            if except_exit:
                os._exit(-1)
            six.raise_from(Exception("problem"), error_seen)
    return run
示例#9
0
def init(
    settings=None,
    entity=None,
    team=None,
    project=None,
    mode=None,
    group=None,
    job_type=None,
    tags=None,
    name=None,
    config=None,  # TODO(jhr): type is a union for argparse/absl
    notes=None,
    magic=None,  # TODO(jhr): type is union
    config_exclude_keys=None,
    config_include_keys=None,
    reinit=None,
    anonymous=None,
    dir=None,
    allow_val_change=None,
    resume=None,
    force=None,
    tensorboard=None,
    sync_tensorboard=None,
    monitor_gym=None,
    id=None,
):
    """Initialize a wandb Run.

    Args:
        entity: alias for team.
        team: personal user or team to use for Run.
        project: project name for the Run.

    Raises:
        Exception: if problem.

    Returns:
        wandb Run object

    """
    assert not wandb._IS_INTERNAL_PROCESS
    kwargs = locals()
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        try:
            run = wi.init()
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            getcaller()
            assert logger
            logger.exception("we got issues")
            wi._atexit_cleanup()
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            run = RunDummy()
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interupted", exc_info=e)
        raise_from(Exception("interrupted"), e)
    except Exception as e:
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        raise_from(Exception("problem"), e)

    return run
示例#10
0
def init(
    job_type: Optional[str] = None,
    dir=None,
    config: Union[Dict,
                  None] = None,  # TODO(jhr): type is a union for argparse/absl
    project: Optional[str] = None,
    entity: Optional[str] = None,
    reinit: bool = None,
    tags: Optional[List] = None,
    team: Optional[str] = None,
    group: Optional[str] = None,
    name: Optional[str] = None,
    notes: Optional[str] = None,
    magic: bool = None,  # TODO(jhr): type is union
    config_exclude_keys=None,
    config_include_keys=None,
    anonymous: Optional[str] = None,
    disable: bool = None,
    offline: bool = None,
    allow_val_change: bool = None,
    resume: Optional[Union[bool, str]] = None,
    force=None,
    tensorboard=None,  # alias for sync_tensorboard
    sync_tensorboard=None,
    monitor_gym=None,
    id=None,
    settings: Union[Settings, Dict[str, Any], str, None] = None,
) -> Run:
    """Initialize a wandb Run.

    Args:
        entity: alias for team.
        team: personal user or team to use for Run.
        project: project name for the Run.

    Raises:
        Exception: if problem.

    Returns:
        wandb Run object

    """
    assert not wandb._IS_INTERNAL_PROCESS
    kwargs = locals()
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        try:
            run = wi.init()
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            getcaller()
            assert logger
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            run = RunDummy()
    except UsageError:
        raise
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interrupted", exc_info=e)
        raise_from(Exception("interrupted"), e)
    except Exception as e:
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        reraise(*sys.exc_info())
        #  raise_from(Exception("problem"), e)
    return run
示例#11
0
def init(
    job_type: Optional[str] = None,
    dir=None,
    config: Union[Dict, str, None] = None,
    project: Optional[str] = None,
    entity: Optional[str] = None,
    reinit: bool = None,
    tags: Optional[Sequence] = None,
    group: Optional[str] = None,
    name: Optional[str] = None,
    notes: Optional[str] = None,
    magic: Union[dict, str, bool] = None,
    config_exclude_keys=None,
    config_include_keys=None,
    anonymous: Optional[str] = None,
    mode: Optional[str] = None,
    allow_val_change: Optional[bool] = None,
    resume: Optional[Union[bool, str]] = None,
    force: Optional[bool] = None,
    tensorboard=None,  # alias for sync_tensorboard
    sync_tensorboard=None,
    monitor_gym=None,
    save_code=None,
    id=None,
    settings: Union[Settings, Dict[str, Any], None] = None,
) -> Union[Run, RunDisabled, None]:
    """Starts a new run to track and log to W&B.

    In an ML training pipeline, you could add `wandb.init()`
    to the beginning of your training script as well as your evaluation
    script, and each piece would be tracked as a run in W&B.

    `wandb.init()` spawns a new background process to log data to a run, and it
    also syncs data to wandb.ai by default so you can see live visualizations.

    Call `wandb.init()` to start a run before logging data with `wandb.log()`:
    <!--yeadoc-test:init-method-log-->
    ```python
    import wandb

    wandb.init()
    # ... calculate metrics, generate media
    wandb.log({"accuracy": 0.9})
    ```

    `wandb.init()` returns a run object, and you can also access the run object
    via `wandb.run`:
    <!--yeadoc-test:init-and-assert-global-->
    ```python
    import wandb

    run = wandb.init()

    assert run is wandb.run
    ```

    At the end of your script, we will automatically call `wandb.finish` to
    finalize and cleanup the run. However, if you call `wandb.init` from a
    child process, you must explicitly call `wandb.finish` at the end of the
    child process.

    For more on using `wandb.init()`, including detailed examples, check out our
    [guide and FAQs](https://docs.wandb.ai/guides/track/launch).

    Arguments:
        project: (str, optional) The name of the project where you're sending
            the new run. If the project is not specified, the run is put in an
            "Uncategorized" project.
        entity: (str, optional) An entity is a username or team name where
            you're sending runs. This entity must exist before you can send runs
            there, so make sure to create your account or team in the UI before
            starting to log runs.
            If you don't specify an entity, the run will be sent to your default
            entity, which is usually your username. Change your default entity
            in [your settings](https://wandb.ai/settings) under "default location
            to create new projects".
        config: (dict, argparse, absl.flags, str, optional)
            This sets `wandb.config`, a dictionary-like object for saving inputs
            to your job, like hyperparameters for a model or settings for a data
            preprocessing job. The config will show up in a table in the UI that
            you can use to group, filter, and sort runs. Keys should not contain
            `.` in their names, and values should be under 10 MB.
            If dict, argparse or absl.flags: will load the key value pairs into
                the `wandb.config` object.
            If str: will look for a yaml file by that name, and load config from
                that file into the `wandb.config` object.
        save_code: (bool, optional) Turn this on to save the main script or
            notebook to W&B. This is valuable for improving experiment
            reproducibility and to diff code across experiments in the UI. By
            default this is off, but you can flip the default behavior to on
            in [your settings page](https://wandb.ai/settings).
        group: (str, optional) Specify a group to organize individual runs into
            a larger experiment. For example, you might be doing cross
            validation, or you might have multiple jobs that train and evaluate
            a model against different test sets. Group gives you a way to
            organize runs together into a larger whole, and you can toggle this
            on and off in the UI. For more details, see our
            [guide to grouping runs](https://docs.wandb.com/library/grouping).
        job_type: (str, optional) Specify the type of run, which is useful when
            you're grouping runs together into larger experiments using group.
            For example, you might have multiple jobs in a group, with job types
            like train and eval. Setting this makes it easy to filter and group
            similar runs together in the UI so you can compare apples to apples.
        tags: (list, optional) A list of strings, which will populate the list
            of tags on this run in the UI. Tags are useful for organizing runs
            together, or applying temporary labels like "baseline" or
            "production". It's easy to add and remove tags in the UI, or filter
            down to just runs with a specific tag.
        name: (str, optional) A short display name for this run, which is how
            you'll identify this run in the UI. By default we generate a random
            two-word name that lets you easily cross-reference runs from the
            table to charts. Keeping these run names short makes the chart
            legends and tables easier to read. If you're looking for a place to
            save your hyperparameters, we recommend saving those in config.
        notes: (str, optional) A longer description of the run, like a `-m` commit
            message in git. This helps you remember what you were doing when you
            ran this run.
        dir: (str, optional) An absolute path to a directory where metadata will
            be stored. When you call `download()` on an artifact, this is the
            directory where downloaded files will be saved. By default this is
            the `./wandb` directory.
        resume: (bool, str, optional) Sets the resuming behavior. Options:
            `"allow"`, `"must"`, `"never"`, `"auto"` or `None`. Defaults to `None`.
            Cases:
            - `None` (default): If the new run has the same ID as a previous run,
                this run overwrites that data.
            - `"auto"` (or `True`): if the preivous run on this machine crashed,
                automatically resume it. Otherwise, start a new run.
            - `"allow"`: if id is set with `init(id="UNIQUE_ID")` or
                `WANDB_RUN_ID="UNIQUE_ID"` and it is identical to a previous run,
                wandb will automatically resume the run with that id. Otherwise,
                wandb will start a new run.
            - `"never"`: if id is set with `init(id="UNIQUE_ID")` or
                `WANDB_RUN_ID="UNIQUE_ID"` and it is identical to a previous run,
                wandb will crash.
            - `"must"`: if id is set with `init(id="UNIQUE_ID")` or
                `WANDB_RUN_ID="UNIQUE_ID"` and it is identical to a previous run,
                wandb will automatically resume the run with the id. Otherwise
                wandb will crash.
            See [our guide to resuming runs](https://docs.wandb.com/library/advanced/resuming)
            for more.
        reinit: (bool, optional) Allow multiple `wandb.init()` calls in the same
            process. (default: `False`)
        magic: (bool, dict, or str, optional) The bool controls whether we try to
            auto-instrument your script, capturing basic details of your run
            without you having to add more wandb code. (default: `False`)
            You can also pass a dict, json string, or yaml filename.
        config_exclude_keys: (list, optional) string keys to exclude from
            `wandb.config`.
        config_include_keys: (list, optional) string keys to include in
            `wandb.config`.
        anonymous: (str, optional) Controls anonymous data logging. Options:
            - `"never"` (default): requires you to link your W&B account before
                tracking the run so you don't accidentally create an anonymous
                run.
            - `"allow"`: lets a logged-in user track runs with their account, but
                lets someone who is running the script without a W&B account see
                the charts in the UI.
            - `"must"`: sends the run to an anonymous account instead of to a
                signed-up user account.
        mode: (str, optional) Can be `"online"`, `"offline"` or `"disabled"`. Defaults to
            online.
        allow_val_change: (bool, optional) Whether to allow config values to
            change after setting the keys once. By default we throw an exception
            if a config value is overwritten. If you want to track something
            like a varying learning rate at multiple times during training, use
            `wandb.log()` instead. (default: `False` in scripts, `True` in Jupyter)
        force: (bool, optional) If `True`, this crashes the script if a user isn't
            logged in to W&B. If `False`, this will let the script run in offline
            mode if a user isn't logged in to W&B. (default: `False`)
        sync_tensorboard: (bool, optional) Synchronize wandb logs from tensorboard or
            tensorboardX and save the relevant events file. (default: `False`)
        monitor_gym: (bool, optional) Automatically log videos of environment when
            using OpenAI Gym. (default: `False`)
            See [our guide to this integration](https://docs.wandb.com/library/integrations/openai-gym).
        id: (str, optional) A unique ID for this run, used for resuming. It must
            be unique in the project, and if you delete a run you can't reuse
            the ID. Use the name field for a short descriptive name, or config
            for saving hyperparameters to compare across runs. The ID cannot
            contain special characters.
            See [our guide to resuming runs](https://docs.wandb.com/library/resuming).

    Examples:
    ### Set where the run is logged

    You can change where the run is logged, just like changing
    the organization, repository, and branch in git:
    ```python
    import wandb

    user = "******"
    project = "capsules"
    display_name = "experiment-2021-10-31"

    wandb.init(entity=user, project=project, name=display_name)
    ```

    ### Add metadata about the run to the config

    Pass a dictionary-style object as the `config` keyword argument to add
    metadata, like hyperparameters, to your run.
    <!--yeadoc-test:init-set-config--->
    ```python
    import wandb

    config = {"lr": 3e-4, "batch_size": 32}
    config.update({"architecture": "resnet", "depth": 34})
    wandb.init(config=config)
    ```

    Raises:
        Exception: if problem.

    Returns:
        A `Run` object.
    """
    wandb._assert_is_user_process()

    if resume is True:
        resume = "auto"  # account for changing resume interface, True and auto should behave the same

    kwargs = dict(locals())
    error_seen = None
    except_exit = None
    try:
        wi = _WandbInit()
        wi.setup(kwargs)
        except_exit = wi.settings._except_exit
        try:
            run = wi.init()
            except_exit = wi.settings._except_exit
        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                sentry_exc(e)
            if not (wandb.wandb_agent._is_running()
                    and isinstance(e, KeyboardInterrupt)):
                getcaller()
            assert logger
            if wi.settings.problem == "fatal":
                raise
            if wi.settings.problem == "warn":
                pass
            # TODO(jhr): figure out how to make this RunDummy
            run = None
    except UsageError as e:
        wandb.termerror(str(e))
        raise
    except KeyboardInterrupt as e:
        assert logger
        logger.warning("interrupted", exc_info=e)
        raise e
    except Exception as e:
        error_seen = e
        traceback.print_exc()
        assert logger
        logger.error("error", exc_info=e)
        # Need to build delay into this sentry capture because our exit hooks
        # mess with sentry's ability to send out errors before the program ends.
        sentry_exc(e, delay=True)
        # reraise(*sys.exc_info())
        # six.raise_from(Exception("problem"), e)
    finally:
        if error_seen:
            wandb.termerror("Abnormal program exit")
            if except_exit:
                os._exit(-1)
            six.raise_from(Exception("problem"), error_seen)
    return run