Пример #1
0
def train(config):
    with open(config.pkl_file,"rb") as fp:
        data=pickle.load(fp)
    fp.close()

    X=data["array"]
    y=data["cluster"]

    print("Encoder cluster -> label ")
    encoder=LabelEncoder()
    encoder.fit(y)
    label=encoder.transform(y)
    num_classes=len(np.unique(label))
    weight=compute_class_weight("balanced",np.unique(label),label)
    
    X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=config.test_ratio,shuffle=True)
    print("X train : ({},{})".format(X_train.shape[0],X_train.shape[1]))
    print("X test : ({},{})".format(X_test.shape[0],X_test.shape[1]))
    
    print("Build Model")
    model=AutoNetClassification(log_level='info',
            cuda=False,
            dataset_name="VKH_10X",
            shuffle=True,
            num_iterations=config.num_iter,
            budget_type='epochs',
            min_budget=100,
            max_budget=10000,
            result_logger_dir="./logger",
            cross_validator="k_fold", 
            cross_validator_args={"n_splits": 5})
    print("Training")
    model.fit(X_train,y_train,validation_split=0.2)
    return
Пример #2
0
def train_autopytorch(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session):

	# name model
	model_name=common_name_model+'.pickle'
	files=list()

	if mtype=='c': 
		from autoPyTorch import AutoNetClassification
		autonet = AutoNetClassification(log_level='debug', max_runtime=900, min_budget=50, max_budget=150)
		autonet.fit(X_train, y_train, validation_split=0.30)
		print(autonet.predict(X_test).flatten())

	if mtype=='r': 
		from autoPyTorch import AutoNetRegression
		autonet = AutoNetRegression(log_level='debug', max_runtime=900, min_budget=50, max_budget=150)
		autonet.fit(X_train, y_train)
		print(autonet.predict(X_test).flatten())

	print('saving model -->')
	torch.save(autonet, model_name)

	# get model directory
	files.append(model_name)
	files.append('configs.json')
	files.append('results.json')
	model_dir=os.getcwd()

	return model_name, model_dir, files
Пример #3
0
def create_model(max_batch):
    search_space_updates = HyperparameterSearchSpaceUpdates()
    #TODO: this still runs out of memory and wastes resources
    search_space_updates.append(node_name="CreateImageDataLoader", hyperparameter="batch_size", log=False, \
                                value_range=[2, max_batch])
    shutil.rmtree(save_output_to)
    autonet = AutoNetClassification(
                                    preset, \
                                    # hyperparameter_search_space_updates=search_space_updates, \
                                    min_workers=2, \
                                    # dataloader_worker=4, \
                                    # global_results_dir="results", \
                                    # keep_only_incumbent_checkpoints=False, \
                                    log_level="info", \
                                    budget_type="time", \
                                    # save_checkpoints=True, \
                                    result_logger_dir=save_output_to, \
                                    min_budget=200, \
                                    max_budget=600, \
                                    num_iterations=1, \
                                    # images_shape=[channels, input_size[0], input_size[1]], \
                                    optimizer = ["adam", "adamw", "sgd", "rmsprop"], \
                                    algorithm="hyperband", \
                                    optimize_metric="balanced_accuracy", \
                                    additional_metrics=["pac_metric"], \
                                    lr_scheduler=["cosine_annealing", "cyclic", "step", "adapt", "plateau", "alternating_cosine", "exponential"], \
                                    networks=['mlpnet', 'shapedmlpnet', 'resnet', 'shapedresnet'], #, 'densenet_flexible', 'resnet', 'resnet152', 'darts'], \
                                    use_tensorboard_logger=True, \
                                    cuda=True \
                                    )
    return autonet
    def fit(self, data_manager):
        if (data_manager.problem_type == ProblemType.FeatureRegression):
            autonet = AutoNetRegression()
        elif (data_manager.problem_type == ProblemType.FeatureMultilabel):
            autonet = AutoNetMultilabel()
        elif (data_manager.problem_type == ProblemType.FeatureClassification):
            autonet = AutoNetClassification()
        else:
            raise ValueError('Problem type ' + str(data_manager.problem_type) +
                             ' is not defined')

        autonet.pipeline[
            autonet_nodes.LogFunctionsSelector.get_name()].add_log_function(
                'test_result',
                test_result(autonet, data_manager.X_test, data_manager.Y_test))

        metrics = autonet.pipeline[autonet_nodes.MetricSelector.get_name()]
        metrics.add_metric('pac_metric', autonet_metrics.pac_metric)
        metrics.add_metric('balanced_accuracy',
                           autonet_metrics.balanced_accuracy)
        metrics.add_metric('mean_distance', autonet_metrics.mean_distance)
        metrics.add_metric('multilabel_accuracy',
                           autonet_metrics.multilabel_accuracy)
        metrics.add_metric('auc_metric', autonet_metrics.auc_metric)
        metrics.add_metric('accuracy', autonet_metrics.accuracy)

        return {'autonet': autonet}
Пример #5
0
    additional_metrices = []

""" TEST CASE 4: Openml, missing values """
if TEST_CASE == 4:
    dm.read_data("openml:188", is_classification=True)
    metric = "accuracy"
    additional_metrices = []

""" TEST CASE 5: MNIST """
if TEST_CASE == 5:
    dm.read_data(os.path.join(dataset_dir, "classification/phpnBqZGZ.csv"), is_classification=True)
    metric = "accuracy"
    additional_metrices = []

# Generate autonet
autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel()

# add metrics and test_result to pipeline
autonet.pipeline[autonet_nodes.LogFunctionsSelector.get_name()].add_log_function('test_result', test_result(autonet, dm.X_test, dm.Y_test))
metrics = autonet.pipeline[autonet_nodes.MetricSelector.get_name()]
metrics.add_metric('pac_metric', autonet_metrics.pac_metric)
metrics.add_metric('auc_metric', autonet_metrics.auc_metric)
metrics.add_metric('accuracy', autonet_metrics.accuracy)

# Fit autonet using train data
res = autonet.fit(min_budget=300,
                  max_budget=900, max_runtime=1800, budget_type='time',
                  normalization_strategies=['maxabs'],
                  train_metric=metric,
                  additional_metrics=additional_metrices,
                  cv_splits=3,
Пример #6
0
 X_train = scaler.transform(X_train)
 X_test = scaler.transform(X_test)
 #Init custom search space
 # search_space_updates = HyperparameterSearchSpaceUpdates()
 # search_space_updates.append(node_name="CreateDataLoader",
 #                             hyperparameter="batch_size",
 #                             value_range=[32],
 #                             log=False)
 #Init autonet
 # autoPyTorch = AutoNetClassification(hyperparameter_search_space_updates=search_space_updates,  # config
 autoPyTorch = AutoNetClassification(
     # "full_cs",  # config
     networks=["resnet"],
     # torch_num_threads=2,
     log_level='info',
     budget_type='epochs',
     min_budget=5,
     max_budget=20,
     num_iterations=100,
     cuda=True,
     use_pynisher=False)
 #fit
 autoPyTorch.fit(X_train=X_train,
                 Y_train=y_train,
                 X_valid=X_test,
                 Y_valid=y_test,
                 optimize_metric="auc_metric",
                 loss_modules=["cross_entropy", "cross_entropy_weighted"])
 #predict
 y_pred = autoPyTorch.predict(X_test)
 #check
Пример #7
0
dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets'))


dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True)
# 5620 samples, 10 classes, 65 features      --->    98% validation accuracy







mem_logger = MemoryLogger()
mem_logger.start()

try:
    autonet = AutoNetClassification(early_stopping_patience=15, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='error')

    res = autonet.fit(X_train=dm.X,
                        Y_train=dm.Y,
                        X_valid=dm.X_train,
                        Y_valid=dm.Y_train,
                        categorical_features=dm.categorical_features)
    print(res)
    
finally:
    mem_logger.stop()

Пример #8
0
__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
__version__ = "0.0.1"
__license__ = "BSD"

import os, sys

sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
from autoPyTorch import AutoNetClassification
from autoPyTorch.data_management.data_manager import DataManager

# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices)
dm = DataManager()
dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)

# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
autonet = AutoNetClassification(budget_type='epochs',
                                min_budget=1,
                                max_budget=9,
                                num_iterations=1,
                                log_level='info')

res = autonet.fit(X_train=dm.X,
                  Y_train=dm.Y,
                  X_valid=dm.X_train,
                  Y_valid=dm.Y_train)

print(res)
print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
Пример #9
0
__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
__version__ = "0.0.1"
__license__ = "BSD"

import os, sys
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
from autoPyTorch import AutoNetClassification
from autoPyTorch.data_management.data_manager import DataManager

# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
dm = DataManager()
dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)

# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
autonet = AutoNetClassification("tiny_cs", budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='debug', use_pynisher=False)

res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3})

print(res)
print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
Пример #10
0
                 is_classification=True)
    metric = "pac_metric"
    additional_metrices = []
""" TEST CASE 4: Openml, missing values """
if TEST_CASE == 4:
    dm.read_data("openml:188", is_classification=True)
    metric = "accuracy"
    additional_metrices = []
""" TEST CASE 5: MNIST """
if TEST_CASE == 5:
    dm.read_data("openml:40996", is_classification=True)
    metric = "accuracy"
    additional_metrices = []

# Generate autonet
autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel()

# add metrics and test_result to pipeline
autonet.pipeline[
    autonet_nodes.LogFunctionsSelector.get_name()].add_log_function(
        'test_result', test_result(autonet, dm.X_test, dm.Y_test))

# Fit autonet using train data
res = autonet.fit(min_budget=300,
                  max_budget=900,
                  max_runtime=1800,
                  budget_type='time',
                  normalization_strategies=['maxabs'],
                  train_metric=metric,
                  additional_metrics=additional_metrices,
                  cross_validator='stratified_k_fold',
Пример #11
0
from autoPyTorch import AutoNetClassification
# Other imports for later usage
import openml
import json
autonet = AutoNetClassification(config_preset="tiny_cs",
                                result_logger_dir="logs/")
# Get the current configuration as dict
current_configuration = autonet.get_current_autonet_config()
hyperparameter_search_space = autonet.get_hyperparameter_search_space()

task = openml.tasks.get_task(task_id=31)
X, y = task.get_X_and_y()
ind_train, ind_test = task.get_train_test_split_indices()
X_train, Y_train = X[ind_train], y[ind_train]
X_test, Y_test = X[ind_test], y[ind_test]
autonet = AutoNetClassification(config_preset="tiny_cs",
                                result_logger_dir="logs/")
# Fit (note that the settings are for demonstration, you might need larger budgets)
results_fit = autonet.fit(X_train=X_train,
                          Y_train=Y_train,
                          validation_split=0.3,
                          max_runtime=300,
                          min_budget=60,
                          max_budget=100,
                          refit=True)
# Save fit results as json
with open("logs/results_fit.json", "w") as file:
    json.dump(results_fit, file)
# See how the random configuration performs (often it just predicts 0)
score = autonet.score(X_test=X_test, Y_test=Y_test)
pred = autonet.predict(X=X_test)
Пример #12
0
    # Test logging
    autonet_config["additional_logs"] = [
        test_predictions_for_ensemble.__name__, test_result_ens.__name__
    ]

    # Initialize (ensemble)
    if args.ensemble_setting == "ensemble":
        print("Using ensembles!")
        ensemble_config = get_ensemble_config()
        autonet_config = {**autonet_config, **ensemble_config}
        autonet = AutoNetEnsemble(AutoNetClassification,
                                  config_preset="full_cs",
                                  **autonet_config)
    elif args.ensemble_setting == "normal":
        autonet = AutoNetClassification(config_preset="full_cs",
                                        **autonet_config)

    # Test logging cont.
    autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function(
        name=test_predictions_for_ensemble.__name__,
        log_function=test_predictions_for_ensemble(autonet, X_test, y_test),
        loss_transform=False)
    autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function(
        name=test_result_ens.__name__,
        log_function=test_result_ens(autonet, X_test, y_test))

    autonet.pipeline[BaselineTrainer.get_name()].add_test_data(X_test)

    print(autonet.get_current_autonet_config())

    fit_results = autonet.fit(X_train, y_train,
Пример #13
0
# Tập dữ liệu hoa Iris của sklearn
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html#sklearn.datasets.load_iris
import sklearn.model_selection
import sklearn.datasets

X, y = sklearn.datasets.load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = \
        sklearn.model_selection.train_test_split(X, y, random_state=1)
# Sử dụng máy học tự động
from autoPyTorch import AutoNetClassification
# khởi tạo autopytorch
autoPyTorch = AutoNetClassification("tiny_cs",
                                    log_level='info',
                                    max_runtime=900,
                                    min_budget=30,
                                    max_budget=90,
                                    cuda=True,
                                    use_pynisher=False)
# Huấn luyện với tập dữ liệu
autoPyTorch.fit(x_train, y_train, validation_split=0.3)

# Kiểm tra độ chính xác với dữ liệu kiểm thử
y_pred = autoPyTorch.predict(x_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
Пример #14
0
import openml
from pprint import pprint
from autoPyTorch import AutoNetClassification
from sklearn.metrics import accuracy_score

# get OpenML task by its ID
task = openml.tasks.get_task(task_id=32)
X, y = task.get_X_and_y()
ind_train, ind_test = task.get_train_test_split_indices()

# run Auto-PyTorch
autoPyTorch = AutoNetClassification(
    "tiny_cs",  # config preset
    log_level='info',
    max_runtime=300,
    min_budget=30,
    max_budget=90)

autoPyTorch.fit(X[ind_train], y[ind_train], validation_split=0.3)

# predict
y_pred = autoPyTorch.predict(X[ind_test])

print("Accuracy score", accuracy_score(y[ind_test], y_pred))

# print network configuration
pprint(autoPyTorch.fit_result["optimized_hyperparameter_config"])
                 is_classification=True)
    # 4601 samples, 2 classes, 58 features

if TEST_CASE == 8:
    dm.read_data(os.path.join(dataset_dir,
                              "classification/dataset_32_pendigits.csv"),
                 is_classification=True)

if TEST_CASE == 9:
    dm.read_data(os.path.join(dataset_dir, "classification/php4fATLZ.csv"),
                 is_classification=True)

if TEST_CASE == 10:
    dm.read_data(os.path.join(dataset_dir, "classification/phpnBqZGZ.csv"),
                 is_classification=True)

autonet = AutoNetClassification(budget_type='epochs',
                                min_budget=1,
                                max_budget=9,
                                num_iterations=1,
                                log_level='info')

res = autonet.fit(
    X_train=dm.X_train,
    Y_train=dm.Y_train,
    early_stopping_patience=3,
    # validation_split=0.3,
    categorical_features=dm.categorical_features)

print(res)