示例#1
0
    def load_path(self, artifact, manifest_entry, local=False):
        self.init_gcs()
        bucket, key = self._parse_uri(manifest_entry.ref)
        version = manifest_entry.extra.get('versionID')

        extra_args = {}
        obj = None
        # First attempt to get the generation specified, this will return None if versioning is not enabled
        if version is not None:
            obj = self._client.bucket(bucket).get_blob(key, generation=version)

        if obj is None:
            # Object versioning is disabled on the bucket, so just get
            # the latest version and make sure the MD5 matches.
            obj = self._client.bucket(bucket).get_blob(key)
            if obj is None:
                raise ValueError('Unable to download object %s with generation %s' % (manifest_entry.ref, version))
            md5 = obj.md5_hash
            if md5 != manifest_entry.digest:
                raise ValueError('Digest mismatch for object %s: expected %s but found %s' %
                    (manifest_entry.ref, manifest_entry.digest, md5))

        if not local:
            return manifest_entry.ref

        path = '%s/%s' % (artifact.cache_dir, manifest_entry.path)

        # TODO: We only have etag for this file, so we can't compare to an md5 to skip
        # downloading. Switching to object caching (caching files by their digest instead
        # of file name), this would work. Or we can store a list of known etags for local
        # files.

        util.mkdir_exists_ok(os.path.dirname(path))
        obj.download_to_filename(path)
        return path
示例#2
0
 def transform(images, out_dir, fname):
     """
     Combines a list of images into a single sprite returning meta information
     """
     from PIL import Image as PILImage
     base = os.path.join(out_dir, "media", "images")
     width, height = images[0].image.size
     if len(images) > MAX_IMAGES:
         logging.warn(
             "The maximum number of images to store per step is %i." %
             MAX_IMAGES)
     sprite = PILImage.new(mode='RGB',
                           size=(width * len(images), height),
                           color=(0, 0, 0, 0))
     for i, image in enumerate(images[:MAX_IMAGES]):
         location = width * i
         sprite.paste(image.image, (location, 0))
     util.mkdir_exists_ok(base)
     sprite.save(os.path.join(base, fname), transparency=0)
     meta = {
         "width": width,
         "height": height,
         "count": len(images),
         "_type": "images"
     }
     captions = Image.captions(images[:MAX_IMAGES])
     if captions:
         meta["captions"] = captions
     return meta
示例#3
0
 def check_etag_obj_path(self, etag, size):
     path = os.path.join(self._cache_dir, "obj", "etag", etag[:2], etag[2:])
     opener = ArtifactsCache._cache_opener(path)
     if os.path.isfile(path) and os.path.getsize(path) == size:
         return path, True, opener
     util.mkdir_exists_ok(os.path.dirname(path))
     return path, False, opener
示例#4
0
    def seq_to_json(cls, seq, run, key, step):
        audio_list = list(seq)
        for audio in audio_list:
            if not audio.is_bound():
                audio.bind_to_run(run, key, step)

        sf = util.get_module(
            "soundfile",
            required=
            "wandb.Audio requires the soundfile package. To get it, run: pip install soundfile"
        )
        base_path = os.path.join(run.dir, "media", "audio")
        util.mkdir_exists_ok(base_path)
        meta = {
            "_type": "audio",
            "count": len(audio_list),
            'audio': [a.to_json(run) for a in audio_list],
        }
        sample_rates = cls.sample_rates(audio_list)
        if sample_rates:
            meta["sampleRates"] = sample_rates
        durations = cls.durations(audio_list)
        if durations:
            meta["durations"] = durations
        captions = cls.captions(audio_list)
        if captions:
            meta["captions"] = captions

        return meta
示例#5
0
def test_save_live_glob_multi_write(
    mocked_run,
    mock_server,
    sender,
    start_backend,
    stop_backend,
):
    start_backend()
    sender.publish_files({"files": [("checkpoints/*", "live")]})
    mkdir_exists_ok(os.path.join(mocked_run.dir, "checkpoints"))
    test_file_1 = os.path.join(mocked_run.dir, "checkpoints", "test_1.txt")
    test_file_2 = os.path.join(mocked_run.dir, "checkpoints", "test_2.txt")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST")
    time.sleep(1.5)
    with open(test_file_1, "w") as f:
        f.write("TEST TEST TEST TEST")
    # File system polling happens every second
    time.sleep(1.5)
    with open(test_file_2, "w") as f:
        f.write("TEST TEST TEST TEST")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST TEST TEST TEST TEST")
    stop_backend()
    assert len(mock_server.ctx["storage?file=checkpoints/test_1.txt"]) == 3
    assert len(mock_server.ctx["storage?file=checkpoints/test_2.txt"]) == 1
示例#6
0
def test_settings(test_dir, mocker, live_mock_server):
    """Settings object for tests"""
    #  TODO: likely not the right thing to do, we shouldn't be setting this
    wandb._IS_INTERNAL_PROCESS = False
    wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15
    wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None
    wandb_dir = os.path.join(test_dir, "wandb")
    mkdir_exists_ok(wandb_dir)
    # root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    settings = wandb.Settings(
        _start_datetime=datetime.datetime.now(),
        _start_time=time.time(),
        api_key=DUMMY_API_KEY,
        base_url=live_mock_server.base_url,
        console="off",
        host="test",
        project="test",
        root_dir=test_dir,
        run_id=wandb.util.generate_id(),
        save_code=False,
    )
    yield settings
    # Just in case someone forgets to join in tests. ...well, please don't!
    if wandb.run is not None:
        wandb.run.finish()
示例#7
0
    def download(self, replace=False, root="."):
        """Downloads a file previously saved by a run from the wandb server.

        Args:
            replace (boolean): If `True`, download will overwrite a local file 
                if it exists. Defaults to `False`.
            root (str): Local directory to save the file.  Defaults to ".".  

        Raises:
            `ValueError` if file already exists and replace=False
        """

        response = requests.get(self._attrs["url"],
                                auth=("api", Api().api_key),
                                stream=True,
                                timeout=5)
        response.raise_for_status()
        path = os.path.join(root, self._attrs["name"])
        if os.path.exists(path) and not replace:
            raise ValueError(
                "File already exists, pass replace=True to overwrite")
        if "/" in path:
            dir = "/".join(path.split("/")[0:-1])
            util.mkdir_exists_ok(dir)
        with open(path, "wb") as file:
            for data in response.iter_content(chunk_size=1024):
                file.write(data)
        return open(path, "r")
示例#8
0
 def check_md5_obj_path(self, b64_md5, size):
     hex_md5 = util.bytes_to_hex(base64.b64decode(b64_md5))
     path = os.path.join(self._cache_dir, "obj", "md5", hex_md5[:2], hex_md5[2:])
     if os.path.isfile(path) and os.path.getsize(path) == size:
         return path, True
     util.mkdir_exists_ok(os.path.dirname(path))
     return path, False
示例#9
0
def test_save_live_glob_multi_write(mocked_run, mock_server, internal_sender,
                                    start_backend, stop_backend):
    start_backend()
    internal_sender.publish_files({"files": [("checkpoints/*", "live")]})
    mkdir_exists_ok(os.path.join(mocked_run.dir, "checkpoints"))
    test_file_1 = os.path.join(mocked_run.dir, "checkpoints", "test_1.txt")
    test_file_2 = os.path.join(mocked_run.dir, "checkpoints", "test_2.txt")
    # To debug this test adds some prints to the dir_watcher.py _on_file_* handlers
    print("Wrote file 1")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST")
    time.sleep(2)
    print("Wrote file 1 2nd time")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST TEST TEST")
    # File system polling happens every second
    time.sleep(1.5)
    print("Wrote file 2")
    with open(test_file_2, "w") as f:
        f.write("TEST TEST TEST TEST")
    print("Wrote file 1 3rd time")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST TEST TEST TEST TEST")
    print("Stopping backend")
    stop_backend()
    print("Backend stopped")
    print("CTX:",
          [(k, v)
           for k, v in mock_server.ctx.items() if k.startswith("storage")])
    assert len(mock_server.ctx["storage?file=checkpoints/test_1.txt"]) == 3
    assert len(mock_server.ctx["storage?file=checkpoints/test_2.txt"]) == 1
示例#10
0
def test_save_glob_multi_write(
    mocked_run,
    mock_server,
    sender,
    start_backend,
    stop_backend,
):
    start_backend()
    sender.publish_files({"files": [("checkpoints/*", "now")]})
    mkdir_exists_ok(os.path.join(mocked_run.dir, "checkpoints"))
    test_file_1 = os.path.join(mocked_run.dir, "checkpoints", "test_1.txt")
    test_file_2 = os.path.join(mocked_run.dir, "checkpoints", "test_2.txt")
    print("Wrote file 1")
    with open(test_file_1, "w") as f:
        f.write("TEST TEST")
    # File system polling happens every second
    time.sleep(1.5)
    print("Wrote file 2")
    with open(test_file_2, "w") as f:
        f.write("TEST TEST TEST TEST")
    time.sleep(1.5)
    print("Stopping backend")
    stop_backend()
    print("Backend stopped")
    print("CTX",
          [(k, v)
           for k, v in mock_server.ctx.items() if k.startswith("storage")])
    assert len(mock_server.ctx["storage?file=checkpoints/test_1.txt"]) == 1
    assert len(mock_server.ctx["storage?file=checkpoints/test_2.txt"]) == 1
示例#11
0
 def transform(audio_list, out_dir, key, step):
     if len(audio_list) > Audio.MAX_AUDIO_COUNT:
         logging.warn(
             "The maximum number of audio files to store per step is %i." %
             Audio.MAX_AUDIO_COUNT)
     sf = util.get_module(
         "soundfile",
         required=
         "wandb.Audio requires the soundfile package. To get it, run: pip install soundfile"
     )
     base_path = os.path.join(out_dir, "media", "audio")
     util.mkdir_exists_ok(base_path)
     for i, audio in enumerate(audio_list[:Audio.MAX_AUDIO_COUNT]):
         sf.write(
             os.path.join(base_path, "{}_{}_{}.wav".format(key, step, i)),
             audio.audio_data, audio.sample_rate)
     meta = {
         "_type": "audio",
         "count": min(len(audio_list), Audio.MAX_AUDIO_COUNT)
     }
     sample_rates = Audio.sample_rates(audio_list[:Audio.MAX_AUDIO_COUNT])
     if sample_rates:
         meta["sampleRates"] = sample_rates
     durations = Audio.durations(audio_list[:Audio.MAX_AUDIO_COUNT])
     if durations:
         meta["durations"] = durations
     captions = Audio.captions(audio_list[:Audio.MAX_AUDIO_COUNT])
     if captions:
         meta["captions"] = captions
     return meta
示例#12
0
def test_settings(test_dir, mocker):
    """ Settings object for tests"""
    #  TODO: likely not the right thing to do, we shouldn't be setting this
    wandb._IS_INTERNAL_PROCESS = False
    wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15
    wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None
    wandb_dir = os.path.join(os.getcwd(), "wandb")
    mkdir_exists_ok(wandb_dir)
    # root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    # TODO: consider making a debugable directory that stays around...
    settings = wandb.Settings(
        _start_time=time.time(),
        base_url="http://localhost",
        root_dir=os.getcwd(),
        save_code=True,
        project="test",
        console="off",
        host="test",
        api_key=DUMMY_API_KEY,
        run_id=wandb.util.generate_id(),
        _start_datetime=datetime.datetime.now(),
    )
    settings.setdefaults()
    yield settings
    # Just incase someone forgets to join in tests
    if wandb.run is not None:
        wandb.run.join()
示例#13
0
def local_settings(mocker):
    """Place global settings in an isolated dir"""
    with CliRunner().isolated_filesystem():
        cfg_path = os.path.join(os.getcwd(), ".config", "wandb", "settings")
        mkdir_exists_ok(os.path.join(".config", "wandb"))
        mocker.patch("wandb.old.settings.Settings._global_path", return_value=cfg_path)
        yield
示例#14
0
 def check_etag_obj_path(self, etag: str, size: int) -> Tuple[str, bool, Callable]:
     path = os.path.join(self._cache_dir, "obj", "etag", etag[:2], etag[2:])
     opener = self._cache_opener(path)
     if os.path.isfile(path) and os.path.getsize(path) == size:
         return path, True, opener
     util.mkdir_exists_ok(os.path.dirname(path))
     return path, False, opener
示例#15
0
文件: core.py 项目: zbpjlc/client
def termlog(string='', newline=True, repeat=True):
    """Log to standard error with formatting.

    Args:
            string (str, optional): The string to print
            newline (bool, optional): Print a newline at the end of the string
            repeat (bool, optional): If set to False only prints the string once per process
    """
    if string:
        line = '\n'.join(
            ['{}: {}'.format(LOG_STRING, s) for s in string.split('\n')])
    else:
        line = ''
    if not repeat and line in PRINTED_MESSAGES:
        return
    # Repeated line tracking limited to 1k messages
    if len(PRINTED_MESSAGES) < 1000:
        PRINTED_MESSAGES.add(line)
    if os.getenv(env.SILENT):
        from wandb import util
        util.mkdir_exists_ok(os.path.dirname(util.get_log_file_path()))
        with open(util.get_log_file_path(), 'w') as log:
            click.echo(line, file=log, nl=newline)
    else:
        click.echo(line, file=sys.stderr, nl=newline)
示例#16
0
 def __init__(self, cache_dir):
     self._cache_dir = cache_dir
     util.mkdir_exists_ok(self._cache_dir)
     self._md5_obj_dir = os.path.join(self._cache_dir, "obj", "md5")
     self._etag_obj_dir = os.path.join(self._cache_dir, "obj", "etag")
     self._artifacts_by_id = {}
     self._random = random.Random()
     self._random.seed()
示例#17
0
 def check_md5_obj_path(self, b64_md5: str, size: int) -> Tuple[str, bool, Callable]:
     hex_md5 = util.bytes_to_hex(base64.b64decode(b64_md5))
     path = os.path.join(self._cache_dir, "obj", "md5", hex_md5[:2], hex_md5[2:])
     opener = self._cache_opener(path)
     if os.path.isfile(path) and os.path.getsize(path) == size:
         return path, True, opener
     util.mkdir_exists_ok(os.path.dirname(path))
     return path, False, opener
示例#18
0
 def new_file(self, name, mode="w"):
     self._ensure_can_add()
     path = os.path.join(self._artifact_dir.name, name.lstrip("/"))
     if os.path.exists(path):
         raise ValueError('File with name "%s" already exists' % name)
     util.mkdir_exists_ok(os.path.dirname(path))
     self._added_new = True
     return open(path, mode)
示例#19
0
def test_log_artifact_simple(runner, wandb_init_run):
    util.mkdir_exists_ok("artsy")
    open("artsy/file1.txt", "w").write("hello")
    open("artsy/file2.txt", "w").write("goodbye")
    with pytest.raises(ValueError):
        wandb.log_artifact("artsy")
    art = wandb.log_artifact("artsy", type="dataset")
    assert art.name == "run-" + wandb_init_run.id + "-artsy"
示例#20
0
def test_save_now_relative_path(mocked_run, mock_server, sender, sm, process_q):
    sender.send_files({"files": [("foo/test.txt", "now")]})
    sm.send(process_q.get())
    test_file = os.path.join(mocked_run.dir, "foo", "test.txt")
    mkdir_exists_ok(os.path.dirname(test_file))
    with open(test_file, "w") as f:
        f.write("TEST TEST")
    sm.finish()
    print("DAMN DUDE", mock_server.ctx)
    assert len(mock_server.ctx["storage?file=foo/test.txt"]) == 1
示例#21
0
def test_dir(request):
    orig_dir = os.getcwd()
    root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    test_dir = os.path.join(root, "tests", "logs", request.node.name)
    if os.path.exists(test_dir):
        shutil.rmtree(test_dir)
    mkdir_exists_ok(test_dir)
    os.chdir(test_dir)
    yield runner
    os.chdir(orig_dir)
示例#22
0
def test_save_now_relative_path(mocked_run, mock_server, sender, start_backend, stop_backend,):
    start_backend()
    sender.publish_files({"files": [("foo/test.txt", "now")]})
    test_file = os.path.join(mocked_run.dir, "foo", "test.txt")
    mkdir_exists_ok(os.path.dirname(test_file))
    with open(test_file, "w") as f:
        f.write("TEST TEST")
    stop_backend()
    print("DAMN DUDE", mock_server.ctx)
    assert len(mock_server.ctx["storage?file=foo/test.txt"]) == 1
示例#23
0
def git_repo(runner):
    with runner.isolated_filesystem():
        r = git.Repo.init(".")
        mkdir_exists_ok("wandb")
        # Because the forked process doesn't use my monkey patch above
        with open("wandb/settings", "w") as f:
            f.write("[default]\nproject: test")
        open("README", "wb").close()
        r.index.add(["README"])
        r.index.commit("Initial commit")
        yield GitRepo(lazy=False)
示例#24
0
文件: test_meta.py 项目: gampx/client
def test_git_untracked_notebook_env_subdir(monkeypatch, git_repo, mocker):
    mocker.patch('wandb._get_python_type', lambda: "jupyter")
    util.mkdir_exists_ok("sub")
    with open("sub/test.ipynb", "w") as f:
        f.write("{}")
    os.environ[env.NOTEBOOK_NAME] = "sub/test.ipynb"
    meta = Meta(InternalApi())
    assert meta.data["program"] == "sub/test.ipynb"
    assert meta.data["codePath"] == "sub/test.ipynb"
    assert os.path.exists("code/sub/test.ipynb")
    del os.environ[env.NOTEBOOK_NAME]
示例#25
0
 def seq_to_json(cls, html_list, run, key, step):
     base_path = os.path.join(run.dir, cls.get_media_subdir())
     util.mkdir_exists_ok(base_path)
     for i, h in enumerate(html_list):
         if not h.is_bound():
             h.bind_to_run(run, key, step, id_=i)
     meta = {
         "_type": "html",
         "count": len(html_list),
         'html': [h.to_json(run) for h in html_list]
     }
     return meta
示例#26
0
    def new_file(self, name: str, mode: str = "w"):
        self._ensure_can_add()
        path = os.path.join(self._artifact_dir.name, name.lstrip("/"))
        if os.path.exists(path):
            raise ValueError('File with name "%s" already exists at "%s"' %
                             (name, path))

        util.mkdir_exists_ok(os.path.dirname(path))
        with util.fsync_open(path, mode) as f:
            yield f

        self.add_file(path, name=name)
示例#27
0
文件: meta.py 项目: wnov/client
 def _setup_code_program(self):
     logger.debug("save program starting")
     program = os.path.join(self.data["root"], os.path.relpath(os.getcwd(), start=self.data["root"]), self.data["program"])
     logger.debug("save program starting: {}".format(program))
     if os.path.exists(program):
         relative_path = os.path.relpath(program, start=self.data["root"])
         util.mkdir_exists_ok(os.path.join(self.out_dir, "code", os.path.dirname(relative_path)))
         saved_program = os.path.join(self.out_dir, "code", relative_path)
         logger.debug("save program saved: {}".format(saved_program))
         if not os.path.exists(saved_program):
             logger.debug("save program")
             copyfile(program, saved_program)
             self.data["codePath"] = relative_path
示例#28
0
 def seq_to_json(cls, videos, run, key, step):
     base_path = os.path.join(run.dir, cls.get_media_subdir())
     util.mkdir_exists_ok(base_path)
     for i, v in enumerate(videos):
         if not v.is_bound():
             v.bind_to_run(run, key, step, id_=i)
     meta = {
         "_type": "videos",
         "count": len(videos),
         'videos': [v.to_json(run) for v in videos],
         "captions": Video.captions(videos)
     }
     return meta
示例#29
0
 def _setup_code_program(self):
     logger.debug("scan for untracked program")
     program = os.path.join(self.data["root"], self.data["program"])
     if os.path.exists(program) and self._api.git.is_untracked(
             self.data["program"]):
         util.mkdir_exists_ok(
             os.path.join(self.out_dir, "code",
                          os.path.dirname(self.data["program"])))
         saved_program = os.path.join(self.out_dir, "code",
                                      self.data["program"])
         if not os.path.exists(saved_program):
             logger.debug("save untracked program")
             copyfile(program, saved_program)
             self.data["codeSaved"] = True
示例#30
0
def save(glob_str, base_path=None, policy="live"):
    """ Ensure all files matching *glob_str* are synced to wandb with the policy specified.

    base_path: the base path to run the glob relative to
    policy:
        live: upload the file as it changes, overwriting the previous version
        end: only upload file when the run ends
    """
    global _saved_files
    if run is None:
        raise ValueError("You must call `wandb.init` before calling save")
    if policy not in ("live", "end"):
        raise ValueError(
            'Only "live" and "end" policies are currently supported.')
    if isinstance(glob_str, bytes):
        glob_str = glob_str.decode('utf-8')
    if not isinstance(glob_str, string_types):
        raise ValueError("Must call wandb.save(glob_str) with glob_str a str")

    if base_path is None:
        base_path = os.path.dirname(glob_str)
    wandb_glob_str = os.path.relpath(glob_str, base_path)
    if "../" in wandb_glob_str:
        raise ValueError("globs can't walk above base_path")
    if (glob_str, base_path, policy) in _saved_files:
        return []
    if glob_str.startswith("gs://") or glob_str.startswith("s3://"):
        termlog("%s is a cloud storage url, can't save file to wandb." %
                glob_str)
        return []
    run.send_message(
        {"save_policy": {
            "glob": wandb_glob_str,
            "policy": policy
        }})
    files = []
    for path in glob.glob(glob_str):
        file_name = os.path.relpath(path, base_path)
        abs_path = os.path.abspath(path)
        wandb_path = os.path.join(run.dir, file_name)
        util.mkdir_exists_ok(os.path.dirname(wandb_path))
        # We overwrite existing symlinks because namespaces can change in Tensorboard
        if os.path.islink(wandb_path) and abs_path != os.readlink(wandb_path):
            os.remove(wandb_path)
            os.symlink(abs_path, wandb_path)
        elif not os.path.exists(wandb_path):
            os.symlink(abs_path, wandb_path)
        files.append(wandb_path)
    _saved_files.add((glob_str, base_path, policy))
    return files