예제 #1
0
def run_pipeline(path_to_config: Text):
    """
    Runs pipeline specified by the given config YAML object.

    Args:
        path_to_config: Path to config of the designated pipeline.
         Has to be matching the YAML file name.
    """
    # config has metadata store, backends and artifact store,
    # so no need to specify them
    print(path_to_config)
    try:
        config = read_yaml(path_to_config)
        p: TrainingPipeline = TrainingPipeline.from_config(config)
        p.run()
    except Exception as e:
        error(e)
예제 #2
0
from zenml.datasources import CSVDatasource
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.evaluator import TFMAEvaluator
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.steps.trainer import TFFeedForwardTrainer
from zenml.exceptions import AlreadyExistsException

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
                         labels=['has_diabetes'],
예제 #3
0
파일: run.py 프로젝트: Federicowengi/zenml
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.steps.trainer import TFFeedForwardTrainer
from zenml.utils.naming_utils import transformed_label_name

GCP_PROJECT = os.getenv('GCP_PROJECT')
MODEL_NAME = os.getenv('MODEL_NAME')

assert GCP_PROJECT
assert MODEL_NAME

# Deploy a tensorflow model on GCAIP. Note that no other trainer type
# works with this deployer except for the one shown here.

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
TEST_ROOT = os.path.join(ZENML_ROOT, "tests")
Repository.init_repo(TEST_ROOT, analytics_opt_in=False)

pipeline_root = os.path.join(TEST_ROOT, "pipelines")
csv_root = os.path.join(TEST_ROOT, "test_data")
image_root = os.path.join(csv_root, "images")


repo: Repository = Repository.get_instance()
if path_utils.is_dir(pipeline_root):
    path_utils.rm_dir(pipeline_root)
repo.zenml_config.set_pipelines_dir(pipeline_root)

try:
    for i in range(1, 6):
        training_pipeline = TrainingPipeline(name='csvtest{0}'.format(i))

        try:
            # Add a datasource. This will automatically track and version it.
            ds = CSVDatasource(name='my_csv_datasource',
                               path=os.path.join(csv_root, "my_dataframe.csv"))
        except AlreadyExistsException:
            ds = repo.get_datasource_by_name("my_csv_datasource")

        training_pipeline.add_datasource(ds)

        # Add a split
        training_pipeline.add_split(CategoricalDomainSplit(
            categorical_column="name",
            split_map={'train': ["arnold", "nicholas"], 'eval': ["lülük"]}))
예제 #5
0
from zenml.steps.split import RandomSplit
from zenml.steps.trainer import TFFeedForwardTrainer
from zenml.exceptions import AlreadyExistsException

GCP_BUCKET = os.getenv('GCP_BUCKET')
assert GCP_BUCKET
CORTEX_ENV = os.getenv('CORTEX_ENV', 'env')
CORTEX_MODEL_NAME = os.getenv('CORTEX_MODEL_NAME', 'zenml-classifier')

# For this example, the ArtifactStore must be a GCP bucket, as the
# CortexDeployer step is using the GCP env.

from zenml.repo.repo import Repository

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
예제 #6
0
from step.trainer import MyScikitTrainer
from zenml.datasources import CSVDatasource
from zenml.exceptions import AlreadyExistsException
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.evaluator import AgnosticEvaluator
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.utils import naming_utils

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
예제 #7
0
from zenml.datasources import CSVDatasource
from zenml.exceptions import AlreadyExistsException
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.evaluator import AgnosticEvaluator
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.steps.trainer import TorchFeedForwardTrainer
from zenml.utils import naming_utils

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
예제 #8
0
파일: run.py 프로젝트: Federicowengi/zenml
assert GCP_REGION
assert MYSQL_DB
assert MYSQL_USER
assert MYSQL_PWD

# Run the pipeline on a Google Cloud VM and train on GCP as well
# In order for this to work, the orchestrator and the backend should be in the
# same GCP project. Also, the metadata store and artifact store should be
# accessible by the orchestrator VM and the GCAIP worker VM.

# Note: If you are using a custom Trainer, then you need
# to build a new Docker image based on the ZenML Trainer image, and pass that
# into the `image` parameter in the SingleGPUTrainingGCAIPBackend.

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
예제 #9
0
from examples.pytorch_lightning.step.trainer import MyPyTorchLightningTrainer
from zenml.datasources import CSVDatasource
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.exceptions import AlreadyExistsException

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
                         labels=['has_diabetes'],
                         overwrite={
예제 #10
0
파일: run.py 프로젝트: sjoerdteunisse/zenml
from zenml.datasources import CSVDatasource
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.exceptions import AlreadyExistsException
from examples.scikit.step.trainer import MyScikitTrainer

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],
                         labels=['has_diabetes'],
                         overwrite={
예제 #11
0
파일: run.py 프로젝트: Federicowengi/zenml
from step.trainer import MyPyTorchLightningTrainer
from zenml.datasources import CSVDatasource
from zenml.exceptions import AlreadyExistsException
from zenml.pipelines import TrainingPipeline
from zenml.repo import Repository
from zenml.steps.evaluator import AgnosticEvaluator
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.utils import naming_utils

# Define the training pipeline
training_pipeline = TrainingPipeline()

# Add a datasource. This will automatically track and version it.
try:
    ds = CSVDatasource(name='Pima Indians Diabetes',
                       path='gs://zenml_quickstart/diabetes.csv')
except AlreadyExistsException:
    ds = Repository.get_instance().get_datasource_by_name(
        'Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

# Add a split
training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7}))

# Add a preprocessing unit
training_pipeline.add_preprocesser(
    StandardPreprocesser(features=[
        'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree',
        'age'
    ],