def log_env(self, rel_path: str = None, content: Dict = None): """Logs information about the environment. Called automatically if track_env is set to True. Can be called manually, and can accept a custom content as a form of a dictionary. Args: rel_path: str, optional, default "env.json". content: Dict, optional, default to current system information. """ if not os.path.exists(self._outputs_path): return if not content: content = get_run_env() rel_path = rel_path or "env.json" path = self._outputs_path if rel_path: path = os.path.join(path, rel_path) with open(os.path.join(path), "w") as env_file: env_file.write(ujson.dumps(content)) artifact_run = V1RunArtifact( name="env", kind=V1ArtifactKind.ENV, path=self.get_rel_asset_path(path=path), summary={"path": path}, is_input=False, ) self.log_artifact_lineage(body=artifact_run)
def log_tensorboard_ref( self, path: str, name: str = "tensorboard", is_input: bool = False, rel_path: str = None, ): """Logs dir reference. Args: path: str, path to the tensorboard logdir. name: str, if the name is passed it will be used instead of the dirname from the path. is_input: bool, if the tensorboard reference is an input or outputs rel_path: str, optional relative path to run the artifacts path. """ rel_path = self.get_rel_asset_path(path=path, rel_path=rel_path) artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.TENSORBOARD, path=rel_path, summary={"path": path}, is_input=is_input, ) self.log_artifact_lineage(body=artifact_run) self._log_meta(has_tensorboard=True)
def log_data_ref( self, name: str, hash: str = None, path: str = None, content=None, is_input: bool = True, ): """Logs data reference. Args: name: str, name of the data. hash: str, optional, default = None, the hash version of the data, if not provided it will be calculated based on the data in the content. path: str, optional, path of where the data is coming from. is_input: bool, if the data reference is an input or outputs. content: the data content. """ summary = {} if hash: summary["hash"] = hash elif content is not None: summary["hash"] = hash_value(content) if path is not None: summary["path"] = path if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DATA, path=path, summary=summary, is_input=is_input, ) self.log_artifact_lineage(body=artifact_run)
def log_file_ref(self, path: str, name: str = None, hash: str = None, content=None): """Logs file reference. Args: path: str, filepath, the name is extracted from the filepath. name: str, if the name is passed it will be used instead of the filename from the path. hash: str, optional, default = None, the hash version of the file, if not provided it will be calculated based on the file content. content: the file content. """ summary = {"path": path} if hash: summary["hash"] = hash elif content is not None: summary["hash"] = hash_value(content) name = name or os.path.basename(path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.FILE, summary=summary, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def runs_set_artifacts(run_id: int, run: Optional[BaseRun], artifacts: List[Dict]): run = get_run(run_id=run_id, run=run) if not run: return artifacts = [V1RunArtifact.from_dict(a) for a in artifacts] set_artifacts(run=run, artifacts=artifacts)
def handler(): runs = ( client.list( query=search.query, sort=search.sort, limit=search.limit, offset=search.offset, ).results or [] ) configs = [] metrics = [] run_uuids = [] for run in runs: if optimization_metric in run.outputs: run_uuids.append(run.uuid) configs.append(run.inputs) metrics.append(run.outputs[optimization_metric]) if configs or metrics or run_uuids: artifact_run = V1RunArtifact( name=name or "in-iteration-{}".format(iteration), kind=V1ArtifactKind.ITERATION, summary={ "iteration": iteration, "configs": [sanitize_dict(s) for s in configs], "metrics": [sanitize_np_types(s) for s in metrics], "uuid": run_uuids, }, is_input=True, ) client.log_artifact_lineage(artifact_run) return run_uuids, configs, metrics
def log_dir_ref( self, path: str, name: str = None, is_input: bool = False, rel_path: str = None, ): """Logs dir reference. Args: path: str, dir path, the name is extracted from the path. name: str, if the name is passed it will be used instead of the dirname from the path. is_input: bool, if the dir reference is an input or outputs. rel_path: str, optional relative path to the run artifacts path. """ name = name or os.path.basename(path) rel_path = self.get_rel_asset_path(path=path, rel_path=rel_path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DIR, path=rel_path, summary={"path": path}, is_input=is_input, ) self.log_artifact_lineage(body=artifact_run)
def create_code_repo(repo_path: str, url: str, revision: str, connection: str = None): try: clone_url = get_clone_url(url) except Exception as e: raise PolyaxonContainerException( "Error parsing url: {}.".format(url)) from e clone_git_repo(repo_path=repo_path, url=clone_url) set_remote(repo_path=repo_path, url=url) if revision: checkout_revision(repo_path=repo_path, revision=revision) if not settings.CLIENT_CONFIG.no_api: try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) code_ref = get_code_reference(path=repo_path, url=url) artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, connection=connection, summary=code_ref, is_input=True, ) RunClient(owner=owner, project=project, run_uuid=run_uuid).log_artifact_lineage(artifact_run)
def test_histograms_summaries(self): summaries, last_values = sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="histogram", last_check=None, ) events = V1Events.read( name="histogram_events", kind="histogram", data=os.path.abspath( "tests/fixtures/polyboard/histogram/histogram_events.plx"), ) assert events.name == "histogram_events" assert summaries == [ V1RunArtifact( name="histogram_events", kind="histogram", connection=None, summary=events.get_summary(), path=os.path.relpath( "tests/fixtures/polyboard/histogram/histogram_events.plx", CONTEXT_MOUNT_ARTIFACTS, ), is_input=False, ) ] assert last_values == {}
def test_models_summaries(self): summaries, last_values = sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="model", last_check=None, ) summaries = {s.name: s for s in summaries} events = V1Events.read( name="model_events", kind="model", data=os.path.abspath( "tests/fixtures/polyboard/model/model_events.plx"), ) assert events.name == "model_events" assert summaries["model_events"] == V1RunArtifact( name="model_events", kind="model", connection=None, summary=events.get_summary(), path=os.path.relpath( "tests/fixtures/polyboard/model/model_events.plx", CONTEXT_MOUNT_ARTIFACTS, ), is_input=False, ) events_without_step = V1Events.read( name="model_events_without_step", kind="model", data=os.path.abspath( "tests/fixtures/polyboard/model/model_events_without_step.plx" ), ) assert events_without_step.name == "model_events_without_step" assert summaries["model_events_without_step"] == V1RunArtifact( name="model_events_without_step", kind="model", connection=None, summary=events_without_step.get_summary(), path=os.path.relpath( "tests/fixtures/polyboard/model/model_events_without_step.plx", CONTEXT_MOUNT_ARTIFACTS, ), is_input=False, ) assert last_values == {}
def log_tensorboard_ref(self, path: str): artifact_run = V1RunArtifact( name="tensorboard", kind=V1ArtifactKind.TENSORBOARD, summary={"path": path}, is_input=True, ) self.log_artifact_lineage(body=artifact_run) self._log_meta(has_tensorboard=True)
def log_data_ref(self, name: str, data): if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DATA, summary={"hash": hash_value(data)}, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def collect_lineage_artifacts_path(artifact_path: str) -> Optional[V1RunArtifact]: name = os.path.basename(artifact_path) return V1RunArtifact( name=to_fqn_name(name), kind=V1ArtifactKind.DIR, path=artifact_path, summary={"path": artifact_path}, is_input=True, )
def log_dir_ref(self, path: str): name = os.path.basename(path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DIR, summary={"path": path}, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def log_code_ref(self): code_ref = get_code_reference() if code_ref: artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, summary=code_ref, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def handler(): if suggestions: artifact_run = V1RunArtifact( name=name or "out-iteration-{}".format(iteration), kind=V1ArtifactKind.ITERATION, summary=summary, is_input=False, ) client.log_artifact_lineage(artifact_run) Printer.print_success("Tuner generated new suggestions.") else: client.log_succeeded(message="Iterative operation has succeeded") Printer.print_success("Iterative optimization succeeded")
def log_file_ref(self, path: str, hash: str = None, content=None): summary = {"path": path} if hash: summary["hash"] = hash elif content is not None: summary["hash"] = hash_value(content) name = os.path.basename(path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.FILE, summary=summary, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def log_tensorboard_ref(self, path: str, name: str = "tensorboard"): """Logs dir reference. Args: path: str, path to the tensorboard logdir. name: str, if the name is passed it will be used instead of the dirname from the path. """ artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.TENSORBOARD, summary={"path": path}, is_input=True, ) self.log_artifact_lineage(body=artifact_run) self._log_meta(has_tensorboard=True)
def log_dir_ref(self, path: str, name: str = None): """Logs dir reference. Args: path: str, dir path, the name is extracted from the path. name: str, if the name is passed it will be used instead of the dirname from the path. """ name = name or os.path.basename(path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DIR, summary={"path": path}, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def log_code_ref(self, code_ref: Dict = None): """Logs code reference. Args: code_ref: dict, optional, if not provided, Polyaxon will detect the code reference from the git repo in the current path. """ code_ref = code_ref or get_code_reference() if code_ref and "commit" in code_ref: artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, summary=code_ref, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def collect_artifacts_from_io(io: V1IO, connection_by_names: Dict[str, V1ConnectionType], is_input: bool) -> Optional[V1RunArtifact]: if io.iotype not in LINEAGE_VALUES: return None if io.iotype == IMAGE: image = io.value["name"] connection = connection_by_names.get(io.value["connection"]) if connection and connection.schema and connection.schema.url: image = "{}/{}".format(connection.schema.url, io.value["name"]) return V1RunArtifact( name=io.name, kind=V1ArtifactKind.DOCKER_IMAGE, connection=io.value["connection"], summary={"image": image}, is_input=is_input, )
def create_code_repo( repo_path: str, url: str, revision: str, connection: str = None, flags: List[str] = None, ): try: clone_url = get_clone_url(url) except Exception as e: raise PolyaxonContainerException( "Error parsing url: {}.".format(url)) from e if flags and "--experimental-fetch" in flags: flags.remove("--experimental-fetch") fetch_git_repo(repo_path=repo_path, clone_url=clone_url, revision=revision, flags=flags) else: clone_and_checkout_git_repo(repo_path=repo_path, clone_url=clone_url, revision=revision, flags=flags) # Update remote set_remote(repo_path=repo_path, url=url) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) code_ref = get_code_reference(path=repo_path, url=url) artifact_run = V1RunArtifact( name=code_ref.get("commit"), kind=V1ArtifactKind.CODEREF, connection=connection, summary=code_ref, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
def log_data_ref(self, name: str, hash: str = None, path: str = None, content=None): summary = {} if hash: summary["hash"] = hash elif content is not None: summary["hash"] = hash_value(content) if path is not None: summary["path"] = path if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.DATA, summary=summary, is_input=True, ) self.log_artifact_lineage(body=artifact_run)
def create_dockerfile_lineage(dockerfile_path: str, summary: Dict): if not dockerfile_path: return filename = os.path.basename(dockerfile_path) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) artifact_run = V1RunArtifact( name=filename, kind=V1ArtifactKind.DOCKERFILE, path=get_rel_asset_path(dockerfile_path), summary=summary, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
def sync_events_summaries( events_path: str, events_kind: str, last_check: Optional[datetime], connection_name: str = None, ) -> Tuple[List, Dict]: current_events_path = get_path(events_path, events_kind) summaries = [] last_values = {} with get_files_in_path_context(current_events_path) as files: for f in files: if last_check and not file_modified_since(filepath=f, last_time=last_check): continue event_name = os.path.basename(f).split(".plx")[0] event = V1Events.read(kind=events_kind, name=event_name, data=f) if event.df.empty: continue # Get only the relpath from run uuid event_rel_path = os.path.relpath(f, CONTEXT_MOUNT_ARTIFACTS) summary = event.get_summary() run_artifact = V1RunArtifact( name=event_name, kind=events_kind, connection=connection_name, summary=summary, path=event_rel_path, is_input=False, ) summaries.append(run_artifact) if events_kind == V1ArtifactKind.METRIC: last_values[event_name] = summary[ V1ArtifactKind.METRIC]["last"] return summaries, last_values
def create_file_lineage(filepath: str, summary: Dict, kind: str): kind = kind or V1ArtifactKind.FILE if not filepath: return filename = os.path.basename(filepath) if settings.CLIENT_CONFIG.no_api: return try: run_client = RunClient() except PolyaxonClientException as e: raise PolyaxonContainerException(e) artifact_run = V1RunArtifact( name=get_base_filename(filename), kind=kind, path=get_rel_asset_path(filepath), summary=summary, is_input=True, ) run_client.log_artifact_lineage(artifact_run)
def log_file_ref( self, path: str, name: str = None, hash: str = None, content=None, is_input: bool = False, rel_path: str = None, ): """Logs file reference. Args: path: str, filepath, the name is extracted from the filepath. name: str, if the name is passed it will be used instead of the filename from the path. hash: str, optional, default = None, the hash version of the file, if not provided it will be calculated based on the file content. content: the file content. is_input: bool, if the file reference is an input or outputs. rel_path: str, optional relative path to the run artifacts path. """ summary = {"path": path} if hash: summary["hash"] = hash elif content is not None: summary["hash"] = hash_value(content) name = name or os.path.basename(path) rel_path = self._get_rel_asset_path(path=path, rel_path=rel_path) if name: artifact_run = V1RunArtifact( name=name, kind=V1ArtifactKind.FILE, path=rel_path, summary=summary, is_input=is_input, ) self.log_artifact_lineage(body=artifact_run)
def test_images_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="image", last_check=None, ) events = V1Events.read( name="image_events", kind="image", data=os.path.abspath( "tests/fixtures/polyboard/image/image_events.plx"), ) assert events.name == "image_events" assert summaries == [ V1RunArtifact( name="image_events", kind="image", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/image/image_events.plx", is_input=False, ) ] assert last_values == {}