示例#1
0
def train_model(uc_name, dataset, type_problem, type_problem_class,
                training_config):
    return type_problem_class.fit(uc_name,
                                  dataset,
                                  pio.ColumnConfig(target_column='target',
                                                   drop_list=DROP_COLS),
                                  training_config=training_config)
示例#2
0
def train_model(experiment_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)):
    path = os.path.join(DATA_PATH, 'ts.csv')
    data, group_list = get_data(path, groups)
    fname = '{}_{}.csv'.format(experiment_name, '-'.join(group_list))
    data.to_csv(fname, index=False)
    project = pio.Project.from_id(PROJECT_ID)
    dataset = project.create_dataset(name=experiment_name,
                                     dataframe=data)

    experiment_version_config = pio.TrainingConfig(
        advanced_models=[pio.AdvancedModel.LinReg],
        normal_models=[pio.NormalModel.LinReg],
        features=[pio.Feature.Counts],
        profile=pio.Profile.Quick,
    )

    col_config = pio.ColumnConfig(target_column='target',
                                  time_column='time',
                                  # group_columns=group_list
                                  )

    experiment_version = project.fit_timeseries_regression(
        experiment_name,
        dataset,
        time_window=time_window,
        training_config=experiment_version_config,
        column_config=col_config,
    )
    return experiment_version
示例#3
0
def train_model(project_id, experiment_name, dataset, training_type, training_type_func, training_config):
    project = pio.Project.from_id(project_id)
    training_type_func = getattr(project, training_type_func)
    return training_type_func(
        experiment_name,
        dataset,
        pio.ColumnConfig(target_column='target', drop_list=DROP_COLS),
        training_config=training_config,
    )
示例#4
0
def setup_module(module):
    # Create project
    global project
    project = pio.Project.new(name=PROJECT_NAME,
                              description="description test sdk")

    # Create dataset
    global dataset
    dataset = project.create_dataset('test_exporter',
                                     file_name='data_exporter/titanic.csv')

    # Train one model
    training_config = pio.TrainingConfig(
        advanced_models=[],
        normal_models=[],
        simple_models=[pio.SimpleModel.DecisionTree],
        features=[],
        profile=pio.Profile.Quick)
    column_config = pio.ColumnConfig(target_column='Survived',
                                     id_column='PassengerId')

    experiment_version = project.fit_classification(
        'test_exporter_classif',
        dataset,
        column_config,
        metric=pio.metrics.Classification.AUC,
        training_config=training_config,
    )

    # Create validation_prediction
    experiment_version.wait_until(lambda experimentv: (len(
        experimentv.models) > 0) or (experimentv._status['state'] == 'failed'))
    if experiment_version._status['state'] == 'failed':
        raise RuntimeError('Could not train experiment')
    global validation_prediction
    validation_prediction = experiment_version.predict_from_dataset(dataset)

    # Create experiment deployment
    experiment_version_best_model = experiment_version.best_model
    experiment_deployment = project.create_experiment_deployment(
        'test_sdk_' + TESTING_ID, experiment_version_best_model)

    # Create deployment_prediction
    experiment_deployment.wait_until(
        lambda experimentd: experimentd.run_state == 'done')
    global deployment_prediction
    deployment_prediction = experiment_deployment.predict_from_dataset(dataset)
示例#5
0
def train_model(uc_name, groups=1, time_window=pio.TimeWindow(-90, -30, 1, 15)):
    path = os.path.join(DATA_PATH, 'ts.csv')
    data, group_list = get_data(path, groups)
    fname = '{}_{}.csv'.format(uc_name, '-'.join(group_list))
    data.to_csv(fname, index=False)
    dataset = pio.Dataset.new(name=uc_name,
                              dataframe=data)

    uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg],
                                   lite_models=[pio.Model.LinReg],
                                   features=[pio.Feature.Counts],
                                   profile=pio.Profile.Quick)

    col_config = pio.ColumnConfig(target_column='target',
                                  time_column='time',
                                  # group_columns=group_list
                                  )

    uc = pio.TimeSeries.fit(uc_name,
                            dataset,
                            time_window=time_window,
                            training_config=uc_config,
                            column_config=col_config)
    return uc
示例#6
0
import os
import pandas as pd
import previsionio as pio
from .utils import get_testing_id

TESTING_ID = get_testing_id()

pio.config.default_timeout = 120

col_config = pio.ColumnConfig(target_column='class', filename_column='filename')
uc_config = pio.TrainingConfig(normal_models=[pio.Model.LinReg],
                               lite_models=[],
                               simple_models=[],
                               features=[pio.Feature.Counts],
                               profile=pio.Profile.Quick)

test_datasets = {}
dataset_name = 'cats_and_dogs_train'
dataset_test_name = TESTING_ID + '-' + dataset_name


def upload_datasets():
    datapath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_img/{}'.format(dataset_name))
    # upload CSV reference file
    dataset_csv = pio.Dataset.new(
        name=dataset_test_name,
        dataframe=pd.read_csv(os.path.join(datapath, '{}.csv'.format(dataset_name)))
    )
    # upload ZIP images folder
    dataset_zip = pio.DatasetImages.new(
        name=dataset_test_name,
示例#7
0
#transform a var
train._data['bathrooms'] = train._data['bathrooms'].astype('int').apply(
    lambda x: round(x))
test._data['bathrooms'] = test._data['bathrooms'].astype('int').apply(
    lambda x: round(x))
#register new datasets
train_fe = pio.Dataset.new('regression_house_80_fe', dataframe=train._data)
test_fe = pio.Dataset.new('regression_house_20_fe', dataframe=test._data)
## auto ml use case starting
uc_config = pio.TrainingConfig(
    models=[pio.Model.XGBoost, pio.Model.RandomForest],
    features=pio.Feature.Full,
    profile=pio.Profile.Quick,
    with_blend=False)

col_config = pio.ColumnConfig(target_column='TARGET', id_column='ID')

uc = pio.Regression.fit('housing_from_sdk',
                        dataset=train_fe,
                        holdout_dataset=test_fe,
                        column_config=col_config,
                        training_config=uc_config)

uc.wait_until(lambda u: len(u) > 1)

## Get some Use case derived informations:
# correlation matrix
print('*************************************')
print('***         GET CORR MATRIX       ***')
CM = uc.get_correlation_matrix()
print(CM)
示例#8
0
          'smart_242_normalized', 'smart_242_raw', 'smart_250_normalized',
          'smart_250_raw', 'smart_251_normalized', 'smart_251_raw',
          'smart_252_normalized', 'smart_252_raw', 'smart_254_normalized',
          'smart_254_raw', 'smart_255_normalized', 'smart_255_raw']
    dset = pd.read_csv(
        '/Users/gpistre/Prevision/prevision-python/examples/data/mclass.csv'
    ).sample(n=101).rename(columns={'failure': 'target'}).drop(cs, axis=1)

    uc_config = pio.TrainingConfig(models=[pio.Model.LinReg],

                                   features=[pio.Feature.Counts],

                                   profile=pio.Profile.Quick,
                                   with_blend=False)

    col_config = pio.ColumnConfig(target_column='target')

    train_dset = pio.Dataset.new(name='events_test' + '_train',
                                 dataframe=dset)

    uc = pio.MultiClassification.fit('events_test',
                                     dataset=train_dset,
                                     column_config=col_config,
                                     training_config=uc_config)

    uc.save()

    uc = pio.Supervised.load('events_test.pio')
    uc.wait_until(lambda u: len(u) > 0, timeout=None)

    preds = uc.predict(dset.drop('target', axis=1))