def from_directory(cls, directory, project=None, entity=None, run_id=None, api=None, ignore_globs=None): api = api or InternalApi() run_id = run_id or util.generate_id() run = Run(run_id=run_id, dir=directory) run_name = None project_from_meta = None snap = DirectorySnapshot(directory) meta = next((p for p in snap.paths if METADATA_FNAME in p), None) if meta: meta = json.load(open(meta)) run_name = meta.get("name") project_from_meta = meta.get("project") project = project or project_from_meta or api.settings( "project") or run.auto_project_name(api=api) if project is None: raise ValueError("You must specify project") api.set_current_run_id(run_id) api.set_setting("project", project) if entity: api.set_setting("entity", entity) res = api.upsert_run(name=run_id, project=project, entity=entity, display_name=run_name) entity = res["project"]["entity"]["name"] wandb.termlog("Syncing {} to:".format(directory)) wandb.termlog(res["displayName"] + " " + run.get_url(api)) file_api = api.get_file_stream_api() file_api.start() paths = [os.path.relpath(abs_path, directory) for abs_path in snap.paths if os.path.isfile(abs_path)] if ignore_globs: paths = set(paths) for g in ignore_globs: paths = paths - set(fnmatch.filter(paths, g)) paths = list(paths) run_update = {"id": res["id"]} tfevents = sorted([p for p in snap.paths if ".tfevents." in p]) history = next((p for p in snap.paths if HISTORY_FNAME in p), None) event = next((p for p in snap.paths if EVENTS_FNAME in p), None) config = next((p for p in snap.paths if CONFIG_FNAME in p), None) user_config = next( (p for p in snap.paths if USER_CONFIG_FNAME in p), None) summary = next((p for p in snap.paths if SUMMARY_FNAME in p), None) if history: wandb.termlog("Uploading history metrics") file_api.stream_file(history) snap.paths.remove(history) elif len(tfevents) > 0: from wandb import tensorflow as wbtf wandb.termlog("Found tfevents file, converting...") summary = {} for path in tfevents: filename = os.path.basename(path) namespace = path.replace(filename, "").replace(directory, "").strip(os.sep) summary.update(wbtf.stream_tfevents(path, file_api, run, namespace=namespace)) for path in glob.glob(os.path.join(directory, "media/**/*"), recursive=True): if os.path.isfile(path): paths.append(path) else: wandb.termerror( "No history or tfevents files found, only syncing files") if event: file_api.stream_file(event) snap.paths.remove(event) if config: run_update["config"] = util.load_yaml( open(config)) elif user_config: # TODO: half backed support for config.json run_update["config"] = {k: {"value": v} for k, v in six.iteritems(user_config)} if isinstance(summary, dict): #TODO: summary should already have data_types converted here... run_update["summary_metrics"] = util.json_dumps_safer(summary) elif summary: run_update["summary_metrics"] = open(summary).read() if meta: if meta.get("git"): run_update["commit"] = meta["git"].get("commit") run_update["repo"] = meta["git"].get("remote") run_update["host"] = meta["host"] run_update["program_path"] = meta["program"] run_update["job_type"] = meta.get("jobType") run_update["notes"] = meta.get("notes") else: run_update["host"] = run.host wandb.termlog("Updating run and uploading files") api.upsert_run(**run_update) pusher = FilePusher(api) for k in paths: path = os.path.abspath(os.path.join(directory, k)) pusher.update_file(k, path) pusher.file_changed(k, path) pusher.finish() pusher.print_status() file_api.finish(0) # Remove temporary media images generated from tfevents if history is None and os.path.exists(os.path.join(directory, "media")): shutil.rmtree(os.path.join(directory, "media")) wandb.termlog("Finished!") return run
def sweep(ctx, project, entity, controller, verbose, name, program, settings, update, config_yaml): def _parse_settings(settings): """settings could be json or comma seperated assignments.""" ret = {} # TODO(jhr): merge with magic_impl:_parse_magic if settings.find('=') > 0: for item in settings.split(","): kv = item.split("=") if len(kv) != 2: wandb.termwarn("Unable to parse sweep settings key value pair", repeat=False) ret.update(dict([kv])) return ret wandb.termwarn("Unable to parse settings parameter", repeat=False) return ret if api.api_key is None: termlog("Login to W&B to use the sweep feature") ctx.invoke(login, no_offline=True) sweep_obj_id = None if update: parts = dict(entity=entity, project=project, name=update) err = util.parse_sweep_id(parts) if err: wandb.termerror(err) return entity = parts.get("entity") or entity project = parts.get("project") or project sweep_id = parts.get("name") or update found = api.sweep(sweep_id, '{}', entity=entity, project=project) if not found: wandb.termerror('Could not find sweep {}/{}/{}'.format(entity, project, sweep_id)) return sweep_obj_id = found['id'] wandb.termlog('{} sweep from: {}'.format( 'Updating' if sweep_obj_id else 'Creating', config_yaml)) try: yaml_file = open(config_yaml) except (OSError, IOError): wandb.termerror('Couldn\'t open sweep file: %s' % config_yaml) return try: config = util.load_yaml(yaml_file) except yaml.YAMLError as err: wandb.termerror('Error in configuration file: %s' % err) return if config is None: wandb.termerror('Configuration file is empty') return # Set or override parameters if name: config["name"] = name if program: config["program"] = program if settings: settings = _parse_settings(settings) if settings: config.setdefault("settings", {}) config["settings"].update(settings) if controller: config.setdefault("controller", {}) config["controller"]["type"] = "local" is_local = config.get('controller', {}).get('type') == 'local' if is_local: tuner = wandb_controller.controller() err = tuner._validate(config) if err: wandb.termerror('Error in sweep file: %s' % err) return entity = entity or env.get_entity() or config.get('entity') project = project or env.get_project() or config.get('project') or util.auto_project_name( config.get("program"), api) sweep_id = api.upsert_sweep(config, project=project, entity=entity, obj_id=sweep_obj_id) wandb.termlog('{} sweep with ID: {}'.format( 'Updated' if sweep_obj_id else 'Created', click.style(sweep_id, fg="yellow"))) sweep_url = wandb_controller._get_sweep_url(api, sweep_id) if sweep_url: wandb.termlog("View sweep at: {}".format( click.style(sweep_url, underline=True, fg='blue'))) # reprobe entity and project if it was autodetected by upsert_sweep entity = entity or env.get_entity() project = project or env.get_project() if entity and project: sweep_path = "{}/{}/{}".format(entity, project, sweep_id) elif project: sweep_path = "{}/{}".format(project, sweep_id) else: sweep_path = sweep_id wandb.termlog("Run sweep agent with: {}".format( click.style("wandb agent %s" % sweep_path, fg="yellow"))) if controller: wandb.termlog('Starting wandb controller...') tuner = wandb_controller.controller(sweep_id) tuner.run(verbose=verbose)
def sweep( ctx, project, entity, controller, verbose, name, program, settings, update, config_yaml, ): # noqa: C901 def _parse_settings(settings): """settings could be json or comma seperated assignments.""" ret = {} # TODO(jhr): merge with magic:_parse_magic if settings.find("=") > 0: for item in settings.split(","): kv = item.split("=") if len(kv) != 2: wandb.termwarn( "Unable to parse sweep settings key value pair", repeat=False ) ret.update(dict([kv])) return ret wandb.termwarn("Unable to parse settings parameter", repeat=False) return ret api = InternalApi() if api.api_key is None: wandb.termlog("Login to W&B to use the sweep feature") ctx.invoke(login, no_offline=True) sweep_obj_id = None if update: parts = dict(entity=entity, project=project, name=update) err = util.parse_sweep_id(parts) if err: wandb.termerror(err) return entity = parts.get("entity") or entity project = parts.get("project") or project sweep_id = parts.get("name") or update found = api.sweep(sweep_id, "{}", entity=entity, project=project) if not found: wandb.termerror( "Could not find sweep {}/{}/{}".format(entity, project, sweep_id) ) return sweep_obj_id = found["id"] wandb.termlog( "{} sweep from: {}".format( "Updating" if sweep_obj_id else "Creating", config_yaml ) ) try: yaml_file = open(config_yaml) except OSError: wandb.termerror("Couldn't open sweep file: %s" % config_yaml) return try: config = util.load_yaml(yaml_file) except yaml.YAMLError as err: wandb.termerror("Error in configuration file: %s" % err) return if config is None: wandb.termerror("Configuration file is empty") return # Set or override parameters if name: config["name"] = name if program: config["program"] = program if settings: settings = _parse_settings(settings) if settings: config.setdefault("settings", {}) config["settings"].update(settings) if controller: config.setdefault("controller", {}) config["controller"]["type"] = "local" is_local = config.get("controller", {}).get("type") == "local" if is_local: tuner = wandb_controller.controller() err = tuner._validate(config) if err: wandb.termerror("Error in sweep file: %s" % err) return env = os.environ entity = entity or env.get("WANDB_ENTITY") or config.get("entity") project = ( project or env.get("WANDB_PROJECT") or config.get("project") or util.auto_project_name(config.get("program")) ) sweep_id = api.upsert_sweep( config, project=project, entity=entity, obj_id=sweep_obj_id ) wandb.termlog( "{} sweep with ID: {}".format( "Updated" if sweep_obj_id else "Created", click.style(sweep_id, fg="yellow") ) ) sweep_url = wandb_controller._get_sweep_url(api, sweep_id) if sweep_url: wandb.termlog( "View sweep at: {}".format( click.style(sweep_url, underline=True, fg="blue") ) ) # reprobe entity and project if it was autodetected by upsert_sweep entity = entity or env.get("WANDB_ENTITY") project = project or env.get("WANDB_PROJECT") if entity and project: sweep_path = "{}/{}/{}".format(entity, project, sweep_id) elif project: sweep_path = "{}/{}".format(project, sweep_id) else: sweep_path = sweep_id if sweep_path.find(" ") >= 0: sweep_path = '"{}"'.format(sweep_path) wandb.termlog( "Run sweep agent with: {}".format( click.style("wandb agent %s" % sweep_path, fg="yellow") ) ) if controller: wandb.termlog("Starting wandb controller...") tuner = wandb_controller.controller(sweep_id) tuner.run(verbose=verbose)
def from_directory(cls, directory, project=None, entity=None, run_id=None, api=None): api = api or InternalApi() run_id = run_id or util.generate_id() run = Run(run_id=run_id, dir=directory) project = project or api.settings("project") or run.auto_project_name( api=api) if project is None: raise ValueError("You must specify project") api.set_current_run_id(run_id) api.set_setting("project", project) if entity: api.set_setting("entity", entity) res = api.upsert_run(name=run_id, project=project, entity=entity) entity = res["project"]["entity"]["name"] wandb.termlog("Syncing {} to:".format(directory)) wandb.termlog(run.get_url(api)) file_api = api.get_file_stream_api() snap = DirectorySnapshot(directory) paths = [ os.path.relpath(abs_path, directory) for abs_path in snap.paths if os.path.isfile(abs_path) ] run_update = {"id": res["id"]} tfevents = sorted([p for p in snap.paths if ".tfevents." in p]) history = next((p for p in snap.paths if HISTORY_FNAME in p), None) event = next((p for p in snap.paths if EVENTS_FNAME in p), None) config = next((p for p in snap.paths if CONFIG_FNAME in p), None) user_config = next((p for p in snap.paths if USER_CONFIG_FNAME in p), None) summary = next((p for p in snap.paths if SUMMARY_FNAME in p), None) meta = next((p for p in snap.paths if METADATA_FNAME in p), None) if history: wandb.termlog("Uploading history metrics") file_api.stream_file(history) snap.paths.remove(history) elif len(tfevents) > 0: from wandb import tensorflow as wbtf wandb.termlog("Found tfevents file, converting.") for file in tfevents: summary = wbtf.stream_tfevents(file, file_api) else: wandb.termerror( "No history or tfevents files found, only syncing files") if event: file_api.stream_file(event) snap.paths.remove(event) if config: run_update["config"] = util.load_yaml(open(config)) elif user_config: # TODO: half backed support for config.json run_update["config"] = { k: { "value": v } for k, v in six.iteritems(user_config) } if summary: run_update["summary_metrics"] = open(summary).read() if meta: meta = json.load(open(meta)) if meta.get("git"): run_update["commit"] = meta["git"].get("commit") run_update["repo"] = meta["git"].get("remote") run_update["host"] = meta["host"] run_update["program_path"] = meta["program"] run_update["job_type"] = meta.get("jobType") else: run_update["host"] = socket.gethostname() wandb.termlog("Updating run and uploading files") api.upsert_run(**run_update) pusher = FilePusher(api) for k in paths: path = os.path.abspath(os.path.join(directory, k)) pusher.update_file(k, path) pusher.file_changed(k, path) pusher.finish() pusher.print_status() wandb.termlog("Finished!") return run