project = 'PROJECT' # the project to launch the VM in cloudsql_connection_name = f'{project}:REGION:INSTANCE' mysql_db = 'DATABASE' mysql_user = '******' mysql_pw = 'PASSWORD' training_job_dir = artifact_store_path + '/gcaiptrainer/' training_pipeline = TrainingPipeline(name='GCP Orchestrated') # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit( split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser( features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age'], labels=['has_diabetes'], overwrite={'has_diabetes': { 'transform': [{'method': 'no_transform', 'parameters': {}}]}} )) # Add a trainer training_pipeline.add_trainer(FeedForwardTrainer( loss='binary_crossentropy', last_activation='sigmoid',
from examples.gan.preprocessing import GANPreprocessor repo: Repository = Repository().get_instance() gan_pipeline = TrainingPipeline(name="whynotletitfly", enable_cache=False) try: ds = ImageDatasource( name="gan_images", base_path="/Users/nicholasjunge/workspaces/maiot/ce_project/images_mini" ) except: ds = repo.get_datasource_by_name('gan_images') gan_pipeline.add_datasource(ds) gan_pipeline.add_split( CategoricalDomainSplit(categorical_column="label", split_map={ "train": [0], "eval": [1] })) gan_pipeline.add_preprocesser(GANPreprocessor()) # gan_pipeline.add_preprocesser(transform_step) gan_pipeline.add_trainer(CycleGANTrainer(epochs=5)) gan_pipeline.run()
for i in range(1, 6): training_pipeline = TrainingPipeline(name='csvtest{0}'.format(i)) try: # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='my_csv_datasource', path=os.path.join(csv_root, "my_dataframe.csv")) except: ds = repo.get_datasource_by_name("my_csv_datasource") training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split( CategoricalDomainSplit(categorical_column="name", split_map={ 'train': ["arnold", "nicholas"], 'eval': ["lülük"] })) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=["name", "age"], labels=['gpa'], overwrite={ 'gpa': { 'transform': [{ 'method': 'no_transform', 'parameters': {} }] } }))
# Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split( RandomSplit(split_map={'train': 0.7, 'eval': 0.3}).with_backend( processing_backend) ) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser( features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age'], labels=['has_diabetes'], overwrite={'has_diabetes': { 'transform': [{'method': 'no_transform', 'parameters': {}}]}} ).with_backend(processing_backend) ) # Add a trainer training_pipeline.add_trainer(FeedForwardTrainer(