def train_model(experiment_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)): path = os.path.join(DATA_PATH, 'ts.csv') data, group_list = get_data(path, groups) fname = '{}_{}.csv'.format(experiment_name, '-'.join(group_list)) data.to_csv(fname, index=False) project = pio.Project.from_id(PROJECT_ID) dataset = project.create_dataset(name=experiment_name, dataframe=data) experiment_version_config = pio.TrainingConfig( advanced_models=[pio.AdvancedModel.LinReg], normal_models=[pio.NormalModel.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick, ) col_config = pio.ColumnConfig(target_column='target', time_column='time', # group_columns=group_list ) experiment_version = project.fit_timeseries_regression( experiment_name, dataset, time_window=time_window, training_config=experiment_version_config, column_config=col_config, ) return experiment_version
def setup_module(module): # Create project global project project = pio.Project.new(name=PROJECT_NAME, description="description test sdk") # Create dataset global dataset dataset = project.create_dataset('test_exporter', file_name='data_exporter/titanic.csv') # Train one model training_config = pio.TrainingConfig( advanced_models=[], normal_models=[], simple_models=[pio.SimpleModel.DecisionTree], features=[], profile=pio.Profile.Quick) column_config = pio.ColumnConfig(target_column='Survived', id_column='PassengerId') experiment_version = project.fit_classification( 'test_exporter_classif', dataset, column_config, metric=pio.metrics.Classification.AUC, training_config=training_config, ) # Create validation_prediction experiment_version.wait_until(lambda experimentv: (len( experimentv.models) > 0) or (experimentv._status['state'] == 'failed')) if experiment_version._status['state'] == 'failed': raise RuntimeError('Could not train experiment') global validation_prediction validation_prediction = experiment_version.predict_from_dataset(dataset) # Create experiment deployment experiment_version_best_model = experiment_version.best_model experiment_deployment = project.create_experiment_deployment( 'test_sdk_' + TESTING_ID, experiment_version_best_model) # Create deployment_prediction experiment_deployment.wait_until( lambda experimentd: experimentd.run_state == 'done') global deployment_prediction deployment_prediction = experiment_deployment.predict_from_dataset(dataset)
def train_model(uc_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)): path = os.path.join(DATA_PATH, 'ts.csv') data, group_list = get_data(path, groups) fname = '{}_{}.csv'.format(uc_name, '-'.join(group_list)) data.to_csv(fname, index=False) dataset = pio.Dataset.new(name=uc_name, dataframe=data) uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg], lite_models=[pio.Model.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick) col_config = pio.ColumnConfig(target_column='target', time_column='time', # group_columns=group_list ) uc = pio.TimeSeries.fit(uc_name, dataset, time_window=time_window, training_config=uc_config, column_config=col_config) return uc
import os import pandas as pd import previsionio as pio from .utils import get_testing_id TESTING_ID = get_testing_id() pio.config.default_timeout = 120 col_config = pio.ColumnConfig(target_column='class', filename_column='filename') uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg], lite_models=[], simple_models=[], features=[pio.Feature.Counts], profile=pio.Profile.Quick) test_datasets = {} dataset_name = 'cats_and_dogs_train' dataset_test_name = TESTING_ID + '-' + dataset_name def upload_datasets(): datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_img/{}'.format(dataset_name)) # upload CSV reference file dataset_csv = pio.Dataset.new( name=dataset_test_name, dataframe=pd.read_csv(os.path.join(datapath, '{}.csv'.format(dataset_name))) ) # upload ZIP images folder dataset_zip = pio.DatasetImages.new( name=dataset_test_name,
import os import pandas as pd import pytest import previsionio as pio from .datasets import make_supervised_datasets, remove_datasets from . import DATA_PATH from .utils import train_model, get_testing_id, DROP_COLS TESTING_ID = get_testing_id() pio.config.zip_files = False pio.config.default_timeout = 80 uc_config = pio.TrainingConfig(models=[pio.Model.LinReg], simple_models=[pio.SimpleModel.DecisionTree], features=[pio.Feature.Counts], profile=pio.Profile.Quick) test_datasets = {} type_problem_2_pio_class = { 'regression': pio.Regression, 'classification': pio.Classification, 'multiclassification': pio.MultiClassification, } type_problems = type_problem_2_pio_class.keys() def make_pio_datasets(paths): for problem_type, p in paths.items(): dataset = pio.Dataset.new(p.split('/')[-1].replace( '.csv',
# CLIENT INITIALIZATION ----------------------------------------- url = """https://<your instance>.prevision.io""" token = """<your token>""" pio.client.init_client(url, token) # DATA LOADING -------------------------------------------------- # load data from a CSV dataframe = pd.read_csv('helloworld_train.csv') # upload it to the platform dataset = pio.Dataset.new(name='helloworld_train', dataframe=dataframe) # USECASE TRAINING ---------------------------------------------- # setup usecase uc_config = pio.TrainingConfig(models=[pio.Model.XGBoost], features=pio.Feature.Full, profile=pio.Profile.Quick) # run training uc = pio.Classification.fit('helloworld_classif', dataset, metric=pio.metrics.Classification.AUC, training_config=uc_config) # (block until there is at least 1 model trained) uc.wait_until(lambda usecase: len(usecase) > 0) # check out the usecase status and other info uc.print_info() print('Current number of models:', len(uc)) print('Current (best model) score:', uc.score)
import pandas as pd import previsionio as pio from . import DATA_PATH from .datasets import make_supervised_datasets, remove_datasets from .utils import train_model, get_testing_id TESTING_ID = get_testing_id() PROJECT_NAME = "sdk_test_experiment_deployment" + str(TESTING_ID) PROJECT_ID = "" experiment_version_config = pio.TrainingConfig( advanced_models=[pio.AdvancedModel.LinReg], normal_models=[pio.NormalModel.LinReg], simple_models=[pio.SimpleModel.DecisionTree], features=[pio.Feature.Counts], profile=pio.Profile.Quick, ) training_type_2_pio_class = { 'regression': "fit_regression", 'classification': "fit_classification", 'multiclassification': "fit_multiclassification", } training_types = training_type_2_pio_class.keys() test_datasets = {} def make_pio_datasets(paths):
# get train & test dataset stocked on the datastore train = pio.dataset.Dataset.get_by_name(dataset_name='regression_house_80') test = pio.dataset.Dataset.get_by_name(dataset_name='regression_house_20') #transform a var train._data['bathrooms'] = train._data['bathrooms'].astype('int').apply( lambda x: round(x)) test._data['bathrooms'] = test._data['bathrooms'].astype('int').apply( lambda x: round(x)) #register new datasets train_fe = pio.Dataset.new('regression_house_80_fe', dataframe=train._data) test_fe = pio.Dataset.new('regression_house_20_fe', dataframe=test._data) ## auto ml use case starting uc_config = pio.TrainingConfig( models=[pio.Model.XGBoost, pio.Model.RandomForest], features=pio.Feature.Full, profile=pio.Profile.Quick, with_blend=False) col_config = pio.ColumnConfig(target_column='TARGET', id_column='ID') uc = pio.Regression.fit('housing_from_sdk', dataset=train_fe, holdout_dataset=test_fe, column_config=col_config, training_config=uc_config) uc.wait_until(lambda u: len(u) > 1) ## Get some Use case derived informations: # correlation matrix
import os import pandas as pd import previsionio as pio from .utils import get_testing_id TESTING_ID = get_testing_id() pio.config.default_timeout = 120 col_config = pio.ColumnConfig(target_column='class', filename_column='filename') uc_config = pio.TrainingConfig(models=[pio.Model.LinReg], features=[pio.Feature.Counts], profile=pio.Profile.Quick) test_datasets = {} dataset_name = 'cats_and_dogs_train' dataset_test_name = TESTING_ID + '-' + dataset_name def upload_datasets(): datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_img/{}'.format(dataset_name)) # upload CSV reference file dataset_csv = pio.Dataset.new(name=dataset_test_name, dataframe=pd.read_csv( os.path.join( datapath, '{}.csv'.format(dataset_name)))) # upload ZIP images folder dataset_zip = pio.DatasetImages.new(name=dataset_test_name,