Python StructuredDataRegressor示例，autokeras.StructuredDataRegressor Python示例

示例#1

0

显示文件

    def data_regression(
        self,
        column_names: list = None,
        column_types: dict = None,
        output_dim: int = None,
        **kwargs,
    ) -> ak.StructuredDataRegressor:
        """Data Regression.

        Args:
            column_names (list, optional): Name of the columns. Defaults to None.
            column_types (dict, optional): Type of the columns. Defaults to None.
            output_dim (int, optional): Number of output dimensions. Defaults to None.

        Returns:
            ak.StructuredDataRegressor: AutoKERAS data regression class.
        """
        return ak.StructuredDataRegressor(
            column_names=column_names,
            column_types=column_types,
            output_dim=output_dim,
            loss=self.loss,
            metrics=self.metrics,
            project_name=self.project_name,
            max_trials=self.max_trials,
            directory=self.directory,
            objective=self.objective,
            tuner=self.tuner,
            overwrite=self.overwrite,
            seed=self.seed,
            max_model_size=self.max_model_size,
            **kwargs,
        )

示例#2

0

显示文件

文件： structure_data_test.py 项目： yifan2/autokeras

def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataRegressor(directory=tmp_path,
                                            seed=utils.SEED)

    auto_model.fit(x=utils.generate_structured_data(num_instances=100),
                   y=utils.generate_data(num_instances=100, shape=(1, )))

    assert fit.is_called

示例#3

0

显示文件

文件： test_task.py 项目： wade1990/autokeras

def test_structured_data_from_numpy_regressor(tmp_dir):
    num_data = 500
    data = common.structured_data(num_data)
    x_train = data
    y = np.random.rand(num_data, 1)
    y_train = y
    clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))

示例#4

0

显示文件

文件： test_task.py 项目： pabloesc/autokeras

def test_structured_data_from_csv_regressor(tmp_dir):
    clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1)
    clf.fit(x=common.TRAIN_FILE_PATH,
            y='fare',
            epochs=2,
            validation_data=common.TEST_FILE_PATH)
    x_test = common.csv_test('regression')
    assert clf.predict(x_test).shape == (len(x_test), 1)

示例#5

0

显示文件

文件： exse.py 项目： hansuoi/middle

def run():
    import numpy as np
    import autokeras as ak
    from sklearn.model_selection import train_test_split
    import pickle
    from tensorflow.keras.utils import plot_model
    from tensorflow.keras.models import model_from_json

    data  = s.open_data()
    score = s.open_score()

    print('pn, c1, c2, c3.\nSelect!')
    flag = True
    while flag:
        print('study_type = ', end='', flush=True)
        study_type = input()
        if study_type == 'pn':
            flag = False
        elif study_type == 'c1':
            data = s.c1(data)
            flag = False
        elif study_type == 'c2':
            data = s.c2(data)
            flag = False
        elif study_type == 'c3':
            data = s.c3(data)
            flag = False
        else:
            flag = True

    score = score / 100

    x_train,x_test,y_train,y_test = train_test_split(data, score, test_size=0.5)

    reg = ak.StructuredDataRegressor(max_trials=3)
    reg.fit(x_train, y_train, epochs=5)
    eva = reg.evaluate(x_test, y_test)

    eva_name = './result/' + study_type + '_3.txt'
    np.savetxt(eva_name, eva)

    model = reg.export_model()

#    json_string = model.to_json()
#    json_name = './result/' + study_type + '_3.json'
#    with open(json_name, 'w', encoding='utf-8') as f:
#        f.write(json_string)

    model_name = './result/' + study_type + '_3.h5'
    model.save(model_name, save_format='tf')

    pdf_name = './result/' + study_type + '_3.pdf'
    plot_model(model, to_file=pdf_name)

    # weights_name = study_type + '.hdf5'
    # model.save_weights(weights_name)

    return model, x_test, y_test

示例#6

0

显示文件

 def build_pipeline(self):
     """
     Makes a pipeline based on data_config
     """
     if self.problem_type == "classification":
         automl_pipeline = ak.StructuredDataClassifier(**self.automl_settings)
     elif self.problem_type == "regression":
         automl_pipeline = ak.StructuredDataRegressor(**self.automl_settings)
     return automl_pipeline

示例#7

0

显示文件

文件： test_task.py 项目： pabloesc/autokeras

def test_structured_data_from_numpy_regressor(tmp_dir):
    num_data = 500
    num_train = 400
    data = common.structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = np.random.rand(num_data, 1)
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    assert clf.predict(x_test).shape == (len(y_test), 1)

示例#8

0

显示文件

文件： structured_data_test.py 项目： arnoweb/autokeras-sandbox

def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataRegressor(directory=tmp_path,
                                            seed=utils.SEED)

    auto_model.fit(
        x=pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(
            np.unicode)[:100],
        y=utils.generate_data(num_instances=100, shape=(1, )),
    )

    assert fit.is_called

示例#9

0

显示文件

文件： task_api_test.py 项目： vtandroid/autokeras

def test_structured_data_from_numpy_regressor(tmp_dir):
    num_data = 500
    num_train = 400
    data = common.generate_structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = common.generate_data(num_instances=num_data, shape=(1, ))
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(directory=tmp_dir,
                                     max_trials=1,
                                     seed=common.SEED)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    assert clf.predict(x_test).shape == (len(y_test), 1)

示例#10

0

显示文件

文件： autokeras.py 项目： luisferreira97/autoautoml

    def run_example(self):

        # Initialize the classifier.
        regressor = ak.StructuredDataRegressor(max_trials=10,
                                               loss="mean_absolute_error")
        # x is the path to the csv file. y is the column name of the column to predict.

        regressor.fit(x='./data/churn-train.csv', y='churn_probability')

        # Evaluate the accuracy of the found model.
        print('MAE: {mae}'.format(mae=regressor.evaluate(
            x='./data/churn-test.csv', y='churn_probability')))

示例#11

0

显示文件

def test_structured_data_from_numpy_regressor(tmp_path):
    num_data = 500
    num_train = 400
    data = utils.generate_data(num_data, shape=(10, ))
    x_train, x_test = data[:num_train], data[num_train:]
    y = utils.generate_data(num_instances=num_data, shape=(1, ))
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(directory=tmp_path,
                                     max_trials=2,
                                     seed=utils.SEED)
    clf.fit(x_train, y_train, epochs=20, validation_data=(x_train, y_train))
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 1)

示例#12

0

显示文件

def test_structured_data_regressor(tmp_path):
    num_data = 500
    num_train = 400
    data = pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(np.unicode)[:num_data]
    x_train, x_test = data[:num_train], data[num_train:]
    y = utils.generate_data(num_instances=num_data, shape=tuple())
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(
        directory=tmp_path, max_trials=2, seed=utils.SEED
    )
    clf.fit(x_train, y_train, epochs=11, validation_data=(x_train, y_train))
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 1)

示例#13

0

显示文件

文件： task_test.py 项目： wangxuejie9527/autokeras

def test_structured_regressor(init, fit):
    num_data = 500
    train_x = common.generate_structured_data(num_data)
    train_y = common.generate_data(num_instances=100, shape=(1,))

    clf = ak.StructuredDataRegressor(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        directory=tmp_dir,
        max_trials=1,
        seed=common.SEED)
    clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y))

    assert init.called
    assert fit.called

示例#14

0

显示文件

 def build_model(self) -> ak.AutoModel:
     model = None
     if self.data_type == 'image':
         if self.task_type == 'regression':
             model = ak.ImageRegressor()
         elif self.task_type == 'classification':
             model = ak.ImageClassifier()
     elif self.data_type == 'text':
         if self.task_type == 'regression':
             model = ak.TextRegressor()
         elif self.task_type == 'classification':
             model = ak.TextRegressor()
     elif self.data_type == 'csv':
         if self.task_type == 'regression':
             model = ak.StructuredDataRegressor()
         elif self.task_type == 'classification':
             model = ak.StructuredDataClassifier()
     return model

示例#15

0

显示文件

def test_sd_reg_init_hp0_equals_hp_of_a_model(tmp_path):
    clf = ak.StructuredDataRegressor(
        directory=tmp_path,
        column_names=["a", "b"],
        column_types={
            "a": "numerical",
            "b": "numerical"
        },
    )
    clf.inputs[0].shape = (2, )
    clf.outputs[0].in_blocks[0].output_shape = (10, )
    init_hp = task_specific.STRUCTURED_DATA_REGRESSOR[0]
    hp = kerastuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())

示例#16

0

显示文件

def main():
    house_dataset = fetch_california_housing()
    data = house_dataset.data
    target = np.array(house_dataset.target)
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=42)

    clf = ak.StructuredDataRegressor(max_trials=10,
                                     directory='tmp_dir',
                                     overwrite=True)

    start_time = timeit.default_timer()
    clf.fit(x_train, y_train)
    stop_time = timeit.default_timer()

    mse = clf.evaluate(x_test, y_test)[1]
    print('RMSE: {rmse}'.format(mse=round(math.sqrt(mse), 2)))
    print('Total time: {time} seconds.'.format(
        time=round(stop_time - start_time, 2)))

示例#17

0

显示文件

def train_autokeras(X_train, X_test, y_train, y_test, mtype, common_name_model,
                    problemtype, classes, default_featurenames,
                    transform_model, settings, model_session):

    # create file names
    files = list()
    model_name = common_name_model + '.pickle'

    # remove folder if it exists
    if mtype == 'c':
        if 'structured_data_classifier' in os.listdir():
            shutil.rmtree('structured_data_classifier')
        model = ak.StructuredDataClassifier(max_trials=100)
        model.fit(X_train, y_train)
        files.append('structured_data_classifier')
    elif mtype == 'r':
        if 'structured_data_regressor' in os.listdir():
            shutil.rmtree('structured_data_regressor')
        model = ak.StructuredDataRegressor(max_trials=100)
        model.fit(X_train, y_train)
        files.append('structured_data_regressor')

    # show predictions
    predictions = model.predict(X_test).flatten()
    print(predictions)

    # pickle the model
    picklefile = open(common_name_model + '.pickle', 'wb')
    pickle.dump(model, picklefile)
    picklefile.close()

    # get variables
    files.append(model_name)
    model_dir = os.getcwd()

    return model_name, model_dir, files

示例#18

0

显示文件

def main():
    # paths
    # path to time series
    time_series_path = '../output/dataframes/BTCUSDT/BTCUSDT_m_15.csv'
    # output paths
    model_dir = '../output/models/model'
    model_path = f'{model_dir}/model.h5'

    # data settings
    # number of candles to use as features
    feature_length = 50
    # number of candles to predict
    output_length = 1
    # share of training data from all samples
    train_size = 0.8
    # label function
    label = calculate_next_closing_price

    # model settings
    # number of models to test
    max_trials = 300

    # load data sets
    print('Loading data sets...')
    x_train, x_test, y_train, y_test = get_datasets(time_series_path,
                                                    feature_length,
                                                    output_length, train_size,
                                                    label)

    # normalize data
    x_train, mean, std = normalize_data(x_train)
    # store mean and std
    np.save(f'{model_dir}/mean', mean)
    np.save(f'{model_dir}/std', std)

    x_test = normalize_data(x_test, mean, std)[0]

    # flatten features
    x_train = np.array([x_train[i].flatten() for i in range(len(x_train))])
    x_test = np.array([x_test[i].flatten() for i in range(len(x_test))])

    start_time = timer()

    # get model
    if label == calculate_up_down_label:
        search = ak.StructuredDataClassifier(max_trials=max_trials,
                                             overwrite=True,
                                             loss='accuracy')
    else:
        search = ak.StructuredDataRegressor(max_trials=max_trials,
                                            overwrite=True,
                                            metrics=['mean_absolute_error'])
    search.fit(x=x_train, y=y_train, validation_data=[x_test, y_test])

    print(f'Done getting model after {timer() - start_time}s!')

    model = search.export_model()
    model.summary()
    print(f'Evaluation: {model.evaluate(x_test, y_test)}')

    model.save(model_path)
    print(f'Model saved in {model_path}')

示例#19

0

显示文件

文件： test_task.py 项目： wade1990/autokeras

def test_structured_data_from_csv_regressor(tmp_dir):
    clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1)
    clf.fit(x=common.TRAIN_FILE_PATH, y='fare', epochs=2,
            validation_data=common.TEST_FILE_PATH)

示例#20

0

显示文件

x_train = x_train[:40]
x_test = x_test[:40]
y_train = y_train[:40]
y_test = y_test[:40]

x_train = x_train.reshape(40, 13).astype('float32') / 255.
x_test = x_test.reshape(40, 13).astype('float32') / 255.

from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

model = ak.StructuredDataRegressor(
    overwrite=False,
    max_trials=1,
    loss='mse',
    metrics=['mae'],
)

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau

es = EarlyStopping(monitor='val_loss', mode='min', patience=6)
lr = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=2)
ck = ModelCheckpoint('C:/data/modelcheckpoint',
                     save_weights_only=True,
                     save_best_onlT=True,
                     monitor='val_loss',
                     verbose=1)
model.fit(x_train,
          y_train,

示例#21

0

显示文件

文件： structured_data_regression.py 项目： yurimo36/autokeras

    columns=house_dataset.feature_names + ["Price"],
)
train_size = int(df.shape[0] * 0.9)
df[:train_size].to_csv("train.csv", index=False)
df[train_size:].to_csv("eval.csv", index=False)
train_file_path = "train.csv"
test_file_path = "eval.csv"
"""
The second step is to run the
[StructuredDataRegressor](/structured_data_regressor).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(overwrite=True,
                                 max_trials=3)  # It tries 3 different models.
# Feed the structured data regressor with training data.
reg.fit(
    # The path to the train.csv file.
    train_file_path,
    # The name of the label column.
    "Price",
    epochs=10,
)
# Predict with the best model.
predicted_y = reg.predict(test_file_path)
# Evaluate the best model with testing data.
print(reg.evaluate(test_file_path, "Price"))
"""
## Data Format
The AutoKeras StructuredDataRegressor is quite flexible for the data format.

示例#22

0

显示文件

train_size = int(df.shape[0] * 0.9)
df[:train_size].to_csv('train.csv', index=False)
df[train_size:].to_csv('eval.csv', index=False)
train_file_path = 'train.csv'
test_file_path = 'eval.csv'

"""
The second step is to run the
[StructuredDataRegressor](/structured_data_regressor).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(
    overwrite=True,
    max_trials=3) # It tries 10 different models.
# Feed the structured data regressor with training data.
reg.fit(
    # The path to the train.csv file.
    train_file_path,
    # The name of the label column.
    'Price',
    epochs=10)
# Predict with the best model.
predicted_y = reg.predict(test_file_path)
# Evaluate the best model with testing data.
print(reg.evaluate(test_file_path, 'Price'))

"""
## Data Format

示例#23

0

显示文件

values = values.astype('float32')

# Normalized
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

scaler_Y = MinMaxScaler(feature_range=(0, 1))

# Sliding window
reframed = series_to_supervised(data=scaled,
                                col_names=dataset.columns,
                                n_in=18,
                                n_out=1)

n_train_hours = int(reframed.shape[0] * 0.8)
train = reframed.iloc[:n_train_hours, :]
valid = reframed.iloc[n_train_hours:16779, :]

column_names = reframed.columns

column_names = column_names.drop('Energy(t)')
data_type = (len(column_names)) * ['numerical']
data_type = dict(zip(column_names, data_type))

# IPython.embed()

regressor = ak.StructuredDataRegressor(max_trials=2, column_types=data_type)
regressor.fit(x=train.drop(columns=['Energy(t)']), y=train['Energy(t)'])

IPython.embed()

示例#24

0

显示文件

文件： try.py 项目： rockeee/exercise_autoML

#     # delete
#     dataset.drop(features_drop, axis=1, inplace=True)

# # save to csv
# df_train.to_csv("df_train.csv", mode='w')
# df_test.to_csv("df_test.csv", mode='w')

for dataset in train_test_data:
    dataset.drop('id', axis=1, inplace=True)

df_train.to_csv("df_train.csv", mode='w')
df_test.to_csv("df_test.csv", mode='w')

# # It tries n different models.
# clf = ak.StructuredDataClassifier(max_trials=100)
# # Feed the structured data classifier with training data.
# train_y = df_train.pop('price')
# clf.fit(x=df_train, y=train_y)

# It tries n different models.
clf = ak.StructuredDataRegressor()
# Feed the structured data classifier with training data.
train_y = df_train.pop('price')
clf.fit(x=df_train, y=train_y)

preds = clf.predict(df_test)
df_pred = pd.read_csv("./data/sample_submission.csv")
df_pred['price'] = preds
df_pred.to_csv("./houseSubmission.csv", index=None)

示例#25

0

显示文件

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dropout
import autokeras as ak
from tensorflow.keras.datasets import boston_housing

(x_train, y_train), (x_test, y_test) = boston_housing.load_data()

print(x_train.shape, y_train.shape)  #(404, 13) (404,)
print(x_test.shape, y_test.shape)  #(102, 13) (102,)

model = ak.StructuredDataRegressor(overwrite=True, max_trials=3)

model.fit(x_train, y_train, epochs=10, validation_split=0.2)

results = model.evaluate(x_test, y_test)

model2 = model.export_model()
best_model = model.tuner.get_best_model()
best_model2.save('C:/data/h5/best_boston.h5')

# best_model = load_model('C:/data/h5/best_boston.h5')
# results = best_model.evaluate(x_test, y_test)
# print('results: ', results)
# best_model.summary()

示例#26

0

显示文件

文件： keras107_boston.py 项目： SunghoonSeok/Study

from sklearn.datasets import load_boston

dataset = load_boston()
x = dataset.data
y = dataset.target

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=0.8, shuffle=True, random_state=42)
#1. 데이터 / 전처리
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

model = ak.StructuredDataRegressor(loss='mse',metrics=['mae'],max_trials=2, overwrite=True)

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=20, verbose=1)
lr = ReduceLROnPlateau(factor=0.5, patience=10, verbose=1)

model.fit(x_train, y_train, epochs=300, validation_split=0.2, callbacks=[es,lr])
results = model.evaluate(x_test, y_test)

from sklearn.metrics import r2_score
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)

model2 = model.export_model()
try:
    model2.save('ak_save_boston', save_format='tf')

示例#27

0

显示文件

文件： auto.py 项目： luisferreira97/autoautoml

    def run_example(self, train_path, test_path, target):

        metrics = {}

        train = pd.read_csv(train_path)

        # Auto-keras
        regressor = ak.StructuredDataRegressor(max_trials=10,
                                               loss="mean_absolute_error")
        regressor.fit(x=train, y=target)
        metrics["auto-keras"] = regressor.evaluate(x=train, y=target)[0]

        # Auto-gluon
        train_data = task.Dataset(file_path=train_path)
        label_column = target
        predictor = task.fit(train_data=train_data,
                             label=label_column,
                             eval_metric="mean_absolute_error")
        test_data = task.Dataset(file_path=test_path)
        y_test = test_data[label_column]  # values to predict
        # delete label column to prove we're not cheating
        test_data_nolab = test_data.drop(labels=[label_column], axis=1)
        y_pred = predictor.predict(test_data_nolab)
        metrics["auto-gluon"] = predictor.evaluate_predictions(
            y_true=y_test, y_pred=y_pred,
            auxiliary_metrics=True)["mean_absolute_error"]

        # Auto-sklearn
        categorical_feature_mask = train.dtypes == object
        categorical_cols = train.columns[categorical_feature_mask].tolist()
        le = LabelEncoder()
        train[categorical_cols] = train[categorical_cols].apply(
            lambda col: le.fit_transform(col))
        X_train = train.drop(columns=[target]).to_numpy()
        y_train = train[target].to_numpy()
        test = pd.read_csv(test_path)
        test[categorical_cols] = test[categorical_cols].apply(
            lambda col: le.fit_transform(col))
        X_test = test.drop(columns=[target]).to_numpy()
        y_test = test[target].to_numpy()
        automl = autosklearn.regression.AutoSklearnRegressor(
            time_left_for_this_task=120,
            per_run_time_limit=30,
            resampling_strategy='cv',
            resampling_strategy_arguments={'folds': 5},
        )
        automl.fit(X_train.copy(),
                   y_train.copy(),
                   metric=autosklearn.metrics.mean_absolute_error)
        automl.refit(X_train.copy(), y_train.copy())
        predictions = automl.predict(X_test)
        metrics["auto-sklearn"] = sklearn.metrics.mean_absolute_error(
            y_test, predictions)

        # H2O AutoML
        h2o.init()

        train = h2o.import_file(train_path)
        test = h2o.import_file(test_path)
        x = train.columns
        y = target
        x.remove(y)
        aml = H2OAutoML(max_runtime_secs=20, seed=1, sort_metric="mae")
        aml.train(x=x, y=y, training_frame=train)
        metrics["h2o-automl"] = aml.leader.model_performance(test).mae()

        h2o.shutdown()

        # TPOT
        tpot = TPOTRegressor(generations=5,
                             population_size=50,
                             verbosity=2,
                             random_state=42,
                             scoring='neg_mean_absolute_error',
                             cv=5)
        tpot.fit(X_train, y_train)
        metrics["tpot"] = -tpot.score(X_test, y_test)

        best_metric = float("inf")
        best_model = "MODEL"
        for metric in metrics:
            if metrics[metric] < best_metric:
                best_metric = metrics[metric]
                best_model = metric

        print("THE BEST AUTOML TOOL IS " + str(best_model) +
              ", WITH A MAE OF " + str(best_metric) +
              " ACHIEVED BY THE BEST MODEL.")

        return metrics

示例#28

0

显示文件

文件： auto_keras.py 项目： zuliani99/AutoML-Benchmark

def prepare_and_test(X, y, task, timelife):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=1)

    if isinstance(y_train, pd.Series):
        y_train = y_train.to_frame()

    if isinstance(y_test, pd.Series):
        y_test = y_test.to_frame()

    print(y)

    #print(X_train)
    #print(y_train, 'vediamoooooo: y_train ' + str(len(np.unique(y_train))))
    #print(y_test, 'vediamoooooo: y_test ' + str(len(np.unique(y_test))))

    if (task == 'classification'):
        clf = ak.StructuredDataClassifier(overwrite=True,
                                          max_trials=3,
                                          metrics=['accuracy', f1_score])
        custom_obj = {'f1_score': f1_score}
    else:
        clf = ak.StructuredDataRegressor(
            overwrite=True,
            max_trials=3,
            metrics=['mean_squared_error', r2_score])
        custom_obj = {'r2_score': r2_score}

    clf.fit(X_train, y_train, validation_split=0.15, epochs=timelife)

    model = clf.export_model(custom_objects=custom_obj)
    model.summary()

    summary = []
    model.summary(print_fn=lambda x: summary.append(x))
    model_summary = '\n'.join(summary)

    y_pred = clf.predict(X_test, custom_objects=custom_obj)

    #le = LabelEncoder() # forse è meglio che tolga il tutto relativo al label encoder
    #print('non so cosa sto facendo ', accuracy_score(y_test, y_pred))
    #y_test = le.fit_transform(y_test).to_numpy()
    #y_pred = le.fit_transform(y_pred).to_numpy()

    y_test = y_test.to_numpy()

    print((y_test))
    print((y_pred))

    #print(clf.evaluate(X_test, y_test))

    if task == 'classification':
        shutil.rmtree('./structured_data_classifier')
        if len(np.unique(y)) > 2:
            print('multiclass')
            print(sklearn.metrics.accuracy_score(y_test, y_pred),
                  sklearn.metrics.f1_score(y_test, y_pred, average='weighted'))
            #print(clf.evaluate(X_test, y_test, custom_objects=custom_obj))
            #return (clf.evaluate(X_test, y_test)[0], f1_score(y_test, y_pred, average='weighted'), model_summary)
            return sklearn.metrics.accuracy_score(
                y_test, y_pred), sklearn.metrics.f1_score(
                    y_test, y_pred, average='weighted'), model_summary
        else:
            print('binary')
            #print(clf.evaluate(X_test, y_test))
            #return (clf.evaluate(X_test, y_test)[0], f1_score(y_test, y_pred), model_summary)
            return sklearn.metrics.accuracy_score(
                y_test,
                y_pred), sklearn.metrics.f1_score(y_test,
                                                  y_pred), model_summary
    else:
        shutil.rmtree('./structured_data_regressor')
        #print(clf.evaluate(X_test, y_test))
        #print(clf.evaluate(X_test, y_test)[0], r2_score(y_test, y_pred))
        #return (clf.evaluate(X_test, y_test)[0], r2_score(y_test, y_pred), model_summary)
        return np.sqrt(sklearn.metrics.mean_squared_error(
            y_test, y_pred)), sklearn.metrics.r2_score(y_test,
                                                       y_pred), model_summary

示例#29

0

显示文件

from sklearn.datasets import load_boston
import numpy as np
import tensorflow as tf
import autokeras as ak

datasets = load_boston()
x = datasets.data
y = datasets.target
print(x.shape)
print(y.shape)

x_train, x_test, y_train,y_test = train_test_split(x, y, train_size = 0.8 , random_state = 104 )

model = ak.StructuredDataRegressor(
    overwrite=True,
    max_trials=2, # 몇번 시도할것인가
    loss='mse',
    metrics=['mae']
)

model.fit(x_train,y_train, epochs = 10, validation_split = 0.2)

results = model.evaluate(x_test, y_test)
print(results)

model2 = model.export_model()
try:
    model2.save('./ak_test/boston', save_format='tf')
except:
    model2.save('./ak_test/boston.h5')

best_model = model.tuner.get_best_model()

示例#30

0

显示文件

文件： experiments.py 项目： superminghui/autokeras

 def get_auto_model(self):
     return ak.StructuredDataRegressor(max_trials=10,
                                       directory=self.tmp_dir,
                                       overwrite=True)