Python Repository.get_instance示例，zenml.core.repo.repo.Repository.get_instance Python示例

示例#1

0

显示文件

文件： base_pipeline.py 项目： vingovan/zenml

    def register_pipeline(self, config: Dict[Text, Any]):
        """
        Registers a pipeline in the artifact store as a YAML file.

        Args:
            config: dict representation of ZenML config.
        """
        Repository.get_instance().register_pipeline(file_name=self.file_name,
                                                    config=config)

示例#2

0

显示文件

    def __init__(self,
                 name: Text,
                 schema: Dict = None,
                 _id: Text = None,
                 *args, **kwargs):
        """
        Construct the datasource

        Args:
            name (str): name of datasource
            schema (dict): schema of datasource
            _id: unique ID (for internal use)
        """
        if _id:
            # Its loaded from config
            self._id = _id
            logger.debug(f'Datasource {name} loaded.')
        else:
            # If none, then this is assumed to be 'new'. Check dupes.
            all_names = Repository.get_instance().get_datasource_names()
            if any(d == name for d in all_names):
                raise AlreadyExistsException(
                    name=name,
                    resource_type='datasource')
            self._id = str(uuid4())
            track(event=CREATE_DATASOURCE)
            logger.info(f'Datasource {name} created.')

        self.name = name
        self.schema = schema
        self._immutable = False
        self._source = source_utils.resolve_source_path(
            self.__class__.__module__ + '.' + self.__class__.__name__
        )

示例#3

0

显示文件

文件： orchestrator_kubernetes_backend.py 项目： zilongqiu/zenml

    def run(self, config: Dict[Text, Any]):
        # Extract the paths to create the tar
        logger.info('Orchestrating pipeline on Kubernetes..')

        repo: Repository = Repository.get_instance()
        repo_path = repo.path
        config_dir = repo.zenml_config.config_dir
        tar_file_name = \
            f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz'
        path_to_tar = os.path.join(config_dir, tar_file_name)

        # Create tarfile but exclude .zenml folder if exists
        path_utils.create_tarfile(repo_path, path_to_tar)
        logger.info(f'Created tar of current repository at: {path_to_tar}')

        # Upload tar to artifact store
        store_path = config[keys.GlobalKeys.ARTIFACT_STORE]
        store_staging_area = os.path.join(store_path, STAGING_AREA)
        store_path_to_tar = os.path.join(store_staging_area, tar_file_name)
        path_utils.copy(path_to_tar, store_path_to_tar)
        logger.info(f'Copied tar to artifact store at: {store_path_to_tar}')

        # Remove tar
        path_utils.rm_dir(path_to_tar)
        logger.info(f'Removed tar at: {path_to_tar}')

        # Append path of tar in config orchestrator utils
        config[keys.GlobalKeys.BACKEND][
            keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar

        # Launch the instance
        self.launch_job(config)

示例#4

0

显示文件

文件： base_pipeline.py 项目： zeta1999/zenml

 def _check_registered(self):
     if self.file_name in \
             Repository.get_instance().get_pipeline_file_paths(
                 only_file_names=True):
         raise AssertionError(
             f'Pipeline names must be unique in the repository. There '
             f'is already a pipeline called {self.name}')

示例#5

0

显示文件

文件： conftest.py 项目： syllogy/zenml

 def wrapper():
     repo: Repository = Repository.get_instance()
     pipelines_dir = repo.zenml_config.get_pipelines_dir()
     for p_config in path_utils.list_dir(pipelines_dir):
         try:
             os.remove(p_config)
         except Exception as e:
             print(e)

示例#6

0

显示文件

文件： conftest.py 项目： syllogy/zenml

    def wrapper():
        repo: Repository = Repository.get_instance()
        repo.zenml_config.set_pipelines_dir(pipeline_root)

        for p_config in path_utils.list_dir(pipeline_root):
            y = yaml_utils.read_yaml(p_config)
            p: TrainingPipeline = TrainingPipeline.from_config(y)
            p.run()

示例#7

0

显示文件

    def _get_one_pipeline(self):
        """Gets representative pipeline from all pipelines associated."""
        pipelines = \
            Repository.get_instance().get_pipelines_by_datasource(self)

        if len(pipelines) == 0:
            raise EmptyDatasourceException
        return pipelines[0]

示例#8

0

显示文件

    def _get_one_pipeline(self):
        """Gets representative pipeline from all pipelines associated."""
        pipelines = \
            Repository.get_instance().get_pipelines_by_datasource(self)

        if len(pipelines) == 0:
            raise Exception('This datasource is not associated with any '
                            'pipelines, therefore there is no data!')
        return pipelines[0]

示例#9

0

显示文件

文件： config.py 项目： zeta1999/zenml

def list_config():
    """Print the current ZenML config to the command line"""
    try:
        repo: Repository = Repository.get_instance()
    except Exception as e:
        error(e)
        return

    click.echo(to_pretty_string(repo.zenml_config))

示例#10

0

显示文件

文件： source_utils.py 项目： zilongqiu/zenml

def load_source_path_class(source_path: Text) -> Type:
    """
    Loads a Python class from the path provided.

    Args:
        source_path (str): relative module path e.g. this.module.Class[@sha]
    """
    source = source_path.split('@')[0]
    pin = source_path.split('@')[-1]
    is_standard = is_standard_pin(pin)

    if '@' in source_path and not is_standard:
        logger.debug('Pinned step found with git sha. '
                     'Loading class from git history.')
        wrapper: GitWrapper = Repository.get_instance().get_git_wrapper()

        module_path = get_module_path_from_source(source_path)
        relative_module_path = get_relative_path_from_module(module_path)

        logger.warning('Found source with a pinned sha. Will now checkout '
                       f'module: {module_path}')

        # critical step
        if not wrapper.check_module_clean(source_path):
            raise Exception(f'One of the files at {relative_module_path} '
                            f'is not committed and we '
                            f'are trying to load that directory from git '
                            f'history due to a pinned step in the pipeline. '
                            f'Please commit the file and then run the '
                            f'pipeline.')

        # Check out the directory at that sha
        wrapper.checkout(sha_or_branch=pin, directory=relative_module_path)

        # After this point, all exceptions will first undo the above
        try:
            class_ = import_class_by_path(source)
            wrapper.reset(relative_module_path)
            wrapper.checkout(directory=relative_module_path)
        except Exception:
            wrapper.reset(relative_module_path)
            wrapper.checkout(directory=relative_module_path)
            raise Exception
    elif '@' in source_path and is_standard:
        logger.debug(f'Default {APP_NAME} class used. Loading directly.')
        # TODO: [LOW] Check if ZenML version is installed before loading.
        class_ = import_class_by_path(source)
    else:
        logger.debug('Unpinned step found with no git sha. Attempting to '
                     'load class from current repository state.')
        class_ = import_class_by_path(source)

    return class_

示例#11

0

显示文件

    def run_pipeline(self, config_b64: str):
        # Load config from base64
        config = json.loads(base64.b64decode(config_b64))

        # Remove tar_path arg from config
        tar_path = config[keys.GlobalKeys.BACKEND][keys.BackendKeys.ARGS].pop(
            TAR_PATH_ARG)

        # Copy it over locally because it will be remote
        path_utils.copy(tar_path, EXTRACTED_TAR_FILE_PATH)

        # Extract it to EXTRACTED_TAR_DIR
        path_utils.extract_tarfile(EXTRACTED_TAR_FILE_PATH, EXTRACTED_TAR_DIR)

        # Append to sys to make user code discoverable
        sys.path.append(EXTRACTED_TAR_DIR)

        # Make sure the Repository is initialized at the right path
        Repository.get_instance(EXTRACTED_TAR_DIR)

        # Change orchestrator of pipeline to local
        OrchestratorBaseBackend().run(config)

示例#12

0

显示文件

文件： source_utils.py 项目： zilongqiu/zenml

def resolve_source_path(source_path: Text) -> Text:
    """
    Resolves source path with an optional sha using Git.

    Args:
        source_path (str): relative module path e.g. this.module.Class
    """
    if is_standard_step(source_path):
        # that means use standard version
        return resolve_standard_source_path(source_path)

    # otherwise use Git resolution
    wrapper: GitWrapper = Repository.get_instance().get_git_wrapper()
    source_path = wrapper.resolve_source_path(source_path)
    return source_path

示例#13

0

显示文件

文件： config.py 项目： zeta1999/zenml

def set_metadata_store(store_type, args):
    """Set metadata store for local config."""

    try:
        parsed_args = parse_unknown_options(args)
    except AssertionError as e:
        click.echo(str(e))
        return

    # TODO: [LOW] Hard-coded
    config = {'type': store_type, 'args': parsed_args}
    from zenml.core.metadata.metadata_wrapper import ZenMLMetadataStore

    store = ZenMLMetadataStore.from_config(config)
    repo: Repository = Repository.get_instance()
    repo.zenml_config.set_metadata_store(store)

    click.echo(f'Metadata store set to: {store.to_config()}')

示例#14

0

显示文件

    def get_config(self):
        predictor_path = self.predictor.__module__ + '.' + \
                         self.predictor.__name__
        p_file_path = \
            get_path_from_source(get_class_path_from_source(predictor_path))
        repo: Repository = Repository.get_instance()

        return {
            "cortex_serving_args": {
                "env": self.env,
                "api_config": self.api_config,
                "predictor_path": os.path.join(repo.path, p_file_path),
                "requirements": self.requirements,
                "conda_packages": self.conda_packages,
                "force": self.force,
                "wait": self.wait,
            }
        }

示例#15

0

显示文件

    def __init__(self, **params):
        super(Application, self).__init__(**params)

        # lists
        result_list = []
        hparam_list = []
        repo: Repository = Repository.get_instance()

        # get all pipelines in this workspace
        all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type(
            [TrainingPipeline.PIPELINE_TYPE])

        # get a dataframe of all results + all hyperparameter combinations
        for p in all_pipelines:
            # This is slowing the comparison down but
            # necessary to update the status of each run
            if p.get_status() == PipelineStatusTypes.Succeeded.name:
                eval_path = p.get_artifacts_uri_by_component(
                    GDPComponent.Evaluator.name)[0]

                evaluation = tfma.load_eval_result(eval_path)
                for s, m in evaluation.slicing_metrics:
                    result_list.append(
                        dict([('pipeline_name', '{}'.format(p.name)),
                              ('slice_name', s[0][0] if s else ''),
                              ('slice_value', s[0][1] if s else '')]))
                    result_list[-1].update(
                        {f'metric_{k}': m[k]['']
                         for k, v in m.items()})

                h_dict = p.get_hyperparameters()
                h_dict['pipeline_name'] = p.name
                hparam_list.append(h_dict)

        self.results = pd.DataFrame([parse_metrics(r) for r in result_list])
        self.hparam_info = pd.DataFrame(hparam_list)

        # set params
        self.param.pipeline_run_selector.objects = self.results[
            'pipeline_name'].unique()

示例#16

0

显示文件

文件： orchestrator_gcp_backend.py 项目： syllogy/zenml

    def run(self, config: Dict[Text, Any]):
        """
        This run function essentially calls an underlying TFX orchestrator run.
        However it is meant as a higher level abstraction with some
        opinionated decisions taken.

        Args:
            config: a ZenML config dict
        """
        # Extract the paths to create the tar
        logger.info('Orchestrating pipeline on GCP..')

        repo: Repository = Repository.get_instance()
        repo_path = repo.path
        config_dir = repo.zenml_config.config_dir
        tar_file_name = \
            f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz'
        path_to_tar = os.path.join(config_dir, tar_file_name)

        # Create tarfile but excluse .zenml folder if exists
        path_utils.create_tarfile(repo_path, path_to_tar)
        logger.info(f'Created tar of current repository at: {path_to_tar}')

        # Upload tar to artifact store
        store_path = config[keys.GlobalKeys.ARTIFACT_STORE]
        store_staging_area = os.path.join(store_path, STAGING_AREA)
        store_path_to_tar = os.path.join(store_staging_area, tar_file_name)
        path_utils.copy(path_to_tar, store_path_to_tar)
        logger.info(f'Copied tar to artifact store at: {store_path_to_tar}')

        # Remove tar
        path_utils.rm_dir(path_to_tar)
        logger.info(f'Removed tar at: {path_to_tar}')

        # Append path of tar in config orchestrator utils
        config[keys.GlobalKeys.BACKEND][
            keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar

        # Launch the instance
        self.launch_instance(config)

示例#17

0

显示文件

    def __init__(self,
                 name: Text = None,
                 enable_cache: Optional[bool] = True,
                 steps_dict: Dict[Text, BaseStep] = None,
                 backend: OrchestratorBaseBackend = None,
                 metadata_store: Optional[ZenMLMetadataStore] = None,
                 artifact_store: Optional[ArtifactStore] = None,
                 datasource: Optional[BaseDatasource] = None,
                 pipeline_name: Optional[Text] = None,
                 *args,
                 **kwargs):
        """
        Construct a base pipeline. This is a base interface that is meant
        to be overridden in multiple other pipeline use cases.

        Args:
            name: Outward-facing name of the pipeline.
            pipeline_name: A unique name that identifies the pipeline after
             it is run.
            enable_cache: Boolean, indicates whether or not caching
             should be used.
            steps_dict: Optional dict of steps.
            backend: Orchestrator backend.
            metadata_store: Configured metadata store. If None,
             the default metadata store is used.
            artifact_store: Configured artifact store. If None,
             the default artifact store is used.
        """
        # Generate a name if not given
        if name is None:
            name = str(round(time.time() * 1000))
        self.name = name
        self._immutable = False

        # Metadata store
        if metadata_store:
            self.metadata_store: ZenMLMetadataStore = metadata_store
        else:
            # use default
            self.metadata_store: ZenMLMetadataStore = \
                Repository.get_instance().get_default_metadata_store()

        if pipeline_name:
            # This means its been loaded in through YAML, try to get context
            self.pipeline_name = pipeline_name
            self.file_name = self.pipeline_name + '.yaml'
        else:
            # if pipeline_name is None then its a new pipeline
            self.pipeline_name = self.create_pipeline_name_from_name()
            self.file_name = self.pipeline_name + '.yaml'
            # check duplicates here as its a 'new' pipeline
            self._check_registered()
            track(event=CREATE_PIPELINE)
            logger.info(f'Pipeline {name} created.')

        self.enable_cache = enable_cache

        if steps_dict is None:
            self.steps_dict: Dict[Text, BaseStep] = {}
        else:
            self.steps_dict = steps_dict

        # Default to local
        if backend is None:
            self.backend = OrchestratorBaseBackend()
        else:
            self.backend = backend

        # Artifact store
        if artifact_store:
            self.artifact_store = artifact_store
        else:
            # use default
            self.artifact_store = \
                Repository.get_instance().get_default_artifact_store()

        # Datasource
        if datasource:
            self.datasource = datasource
        else:
            self.datasource = None

        self._source = source_utils.resolve_source_path(
            self.__class__.__module__ + '.' + self.__class__.__name__)
        self._kwargs = {
            keys.PipelineDetailKeys.NAME: self.pipeline_name,
            keys.PipelineDetailKeys.ENABLE_CACHE: self.enable_cache,
        }
        if kwargs:
            self._kwargs.update(kwargs)

示例#18

0

显示文件

CORTEX_MODEL_NAME = os.getenv('CORTEX_MODEL_NAME', 'zenml-classifier')

# For this example, the ArtifactStore must be a GCP bucket, as the
# CortexDeployer step is using the GCP env.

from zenml.core.repo.repo import Repository

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
                         labels=['has_diabetes'],
                         overwrite={
                             'has_diabetes': {
                                 'transform': [{

示例#19

0

显示文件

文件： config.py 项目： zeta1999/zenml

def get_pipelines_dir():
    """Print pipelines dir from local config."""
    repo: Repository = Repository.get_instance()
    click.echo(f'Default pipelines dir points to: '
               f'{repo.get_default_pipelines_dir()}')

示例#20

0

显示文件

文件： base_pipeline.py 项目： vingovan/zenml

    def __init__(self,
                 name: Text,
                 enable_cache: Optional[bool] = True,
                 steps_dict: Dict[Text, BaseStep] = None,
                 backends_dict: Dict[Text, BaseBackend] = None,
                 metadata_store: Optional[ZenMLMetadataStore] = None,
                 artifact_store: Optional[ArtifactStore] = None,
                 datasource: Optional[BaseDatasource] = None,
                 pipeline_name: Optional[Text] = None,
                 *args,
                 **kwargs):
        """
        Construct a base pipeline. This is a base interface that is meant
        to be overridden in multiple other pipeline use cases.

        Args:
            name: Outward-facing name of the pipeline.
            pipeline_name: A unique name that identifies the pipeline after
             it is run.
            enable_cache: Boolean, indicates whether or not caching
             should be used.
            steps_dict: Optional dict of steps.
            backends_dict: Optional dict of backends
            metadata_store: Configured metadata store. If None,
             the default metadata store is used.
            artifact_store: Configured artifact store. If None,
             the default artifact store is used.
        """
        self.name = name

        # Metadata store
        if metadata_store:
            self.metadata_store: ZenMLMetadataStore = metadata_store
        else:
            # use default
            self.metadata_store: ZenMLMetadataStore = \
                Repository.get_instance().get_default_metadata_store()

        if pipeline_name:
            # This means its been loaded in through YAML, try to get context
            if self.is_executed_in_metadata_store:
                self._immutable = True
                logger.debug(f'Pipeline {name} loaded and and is immutable.')
            else:
                # if metadata store does not have the pipeline_name, then we
                # can safely execute this again.
                self._immutable = False
                logger.debug(f'Pipeline {name} loaded and can be run.')

            self.pipeline_name = pipeline_name
            self.file_name = self.pipeline_name + '.yaml'
        else:
            # if pipeline_name is None then its a new pipeline
            self._immutable = False
            self.pipeline_name = self.create_pipeline_name_from_name()
            self.file_name = self.pipeline_name + '.yaml'
            # check duplicates here as its a 'new' pipeline
            if self.file_name in \
                    Repository.get_instance().get_pipeline_file_paths(
                        only_file_names=True):
                raise AssertionError(
                    f'Pipeline names must be unique in the repository. There '
                    f'is already a pipeline called {self.name}')
            track(event=CREATE_PIPELINE)
            logger.info(f'Pipeline {name} created.')

        self.enable_cache = enable_cache

        if steps_dict is None:
            self.steps_dict: Dict[Text, BaseStep] = {}
        else:
            self.steps_dict = steps_dict

        # Backends
        if backends_dict is None:
            self.backends_dict: Dict[Text, BaseBackend] = \
                self.get_default_backends()
        else:
            self.backends_dict = backends_dict

        # Artifact store
        if artifact_store:
            self.artifact_store = artifact_store
        else:
            # use default
            self.artifact_store = \
                Repository.get_instance().get_default_artifact_store()

        # Datasource
        if datasource:
            self.datasource = datasource
        else:
            self.datasource = None

示例#21

0

显示文件

import zenml
import shutil
from zenml.core.repo.repo import Repository
from zenml.core.repo.zenml_config import ZenMLConfig, PIPELINES_DIR_KEY
from zenml.utils.exceptions import InitializationException
from zenml.utils import yaml_utils
from zenml.core.standards import standard_keys as keys
from zenml.core.repo.constants import ARTIFACT_STORE_DEFAULT_DIR, \
    ZENML_DIR_NAME, ML_METADATA_SQLITE_DEFAULT_NAME
from zenml.core.metadata.mock_metadata_wrapper import MockMetadataStore

ZENML_ROOT = zenml.__path__[0]
TEST_ROOT = os.path.join(ZENML_ROOT, "testing")

pipelines_dir = os.path.join(TEST_ROOT, "test_pipelines")
repo: Repository = Repository.get_instance()
repo.zenml_config.set_pipelines_dir(pipelines_dir)

config_root = os.path.dirname(ZENML_ROOT)
artifact_store_path = os.path.join(config_root, ZENML_DIR_NAME,
                                   ARTIFACT_STORE_DEFAULT_DIR)

sqlite_uri = os.path.join(artifact_store_path, ML_METADATA_SQLITE_DEFAULT_NAME)


def test_zenml_config_init():
    # in the root initialization should work
    _ = ZenMLConfig(config_root)

    # outside of an initialized repo path
    with pytest.raises(InitializationException):

示例#22

0

显示文件

def set_metadata_store():
    """Compares pipelines in repo"""
    click.echo('Comparing pipelines in repo: Starting app..')
    repo: Repository = Repository.get_instance()
    repo.compare_pipelines()

示例#23

0

显示文件

文件： config.py 项目： zeta1999/zenml

def get_metadata_store():
    """Print metadata store from local config."""
    repo: Repository = Repository.get_instance()
    click.echo(f'Metadata store: '
               f'{repo.get_default_metadata_store().to_config()}')

示例#24

0

显示文件

文件： config.py 项目： zeta1999/zenml

def set_artifact_store(path: Text = None):
    """Change artifact store for local config."""
    repo: Repository = Repository.get_instance()
    repo.zenml_config.set_artifact_store(path)
    click.echo(f'Default artifact store updated to {path}')

示例#25

0

显示文件

文件： config.py 项目： zeta1999/zenml

def get_artifact_store():
    """Print artifact store from local config."""
    repo: Repository = Repository.get_instance()
    click.echo(f'Default artifact store points to: '
               f'{repo.get_default_artifact_store().path}')

示例#26

0

显示文件

 def _check_registered(self):
     if Repository.get_instance().get_pipeline_by_name(
             self.name) is not None:
         raise AlreadyExistsException(name=self.name,
                                      resource_type='pipeline')

示例#27

0

显示文件

文件： base_pipeline.py 项目： vingovan/zenml

 def load_config(self) -> Dict[Text, Any]:
     """Loads a config dict from yaml file."""
     return Repository.get_instance().load_pipeline_config(
         file_name=self.file_name)

示例#28

0

显示文件

文件： conftest.py 项目： syllogy/zenml

    def wrapper(filename):
        repo: Repository = Repository.get_instance()
        repo.zenml_config.set_pipelines_dir(pipeline_root)

        cfg = os.path.join(pipeline_root, filename)
        path_utils.rm_file(cfg)

示例#29

0

显示文件

文件： conftest.py 项目： zilongqiu/zenml

def repo():
    return Repository.get_instance()

示例#30

0

显示文件

文件： config.py 项目： zeta1999/zenml

def set_pipelines_dir(path: Text = None):
    """Change pipelines dir for local config."""
    repo: Repository = Repository.get_instance()
    repo.zenml_config.set_pipelines_dir(path)
    click.echo(f'Default pipelines dir updated to {path}')