Пример #1
0
    def test_classification(self, as_frame=False):

        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 4,
            "metric": 'accuracy',
            "task": 'classification',
            "log_file_name": "test/iris.log",
            "log_training_metric": True,
            "model_history": True
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
        automl_experiment.fit(X_train=X_train,
                              y_train=y_train,
                              **automl_settings)
        print(automl_experiment.classes_)
        print(automl_experiment.predict_proba(X_train)[:5])
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.model_history)
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
        del automl_settings["metric"]
        del automl_settings["model_history"]
        del automl_settings["log_training_metric"]
        automl_experiment = AutoML()
        duration = automl_experiment.retrain_from_log(
            log_file_name=automl_settings["log_file_name"],
            X_train=X_train,
            y_train=y_train,
            train_full=True,
            record_id=0)
        print(duration)
        print(automl_experiment.model)
        print(automl_experiment.predict_proba(X_train)[:5])
Пример #2
0
    def test_custom_metric(self):

        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 10,
            'eval_method': 'holdout',
            "metric": custom_metric,
            "task": 'classification',
            "log_file_name": "test/iris_custom.log",
            "log_training_metric": True,
            'log_type': 'all',
            "model_history": True
        }
        X_train, y_train = load_iris(return_X_y=True)
        automl_experiment.fit(X_train=X_train,
                              y_train=y_train,
                              **automl_settings)
        print(automl_experiment.classes_)
        print(automl_experiment.predict_proba(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.model_history)
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
        automl_experiment = AutoML()
        estimator = automl_experiment.get_estimator_from_log(
            automl_settings["log_file_name"], record_id=0, objective='multi')
        print(estimator)
        time_history, best_valid_loss_history, valid_loss_history, \
            config_history, train_loss_history = get_output_from_log(
                filename=automl_settings['log_file_name'], time_budget=6)
        print(train_loss_history)
Пример #3
0
def run(dataset, config):
    log.info(f"\n**** FLAML [v{__version__}] ****\n")

    X_train, y_train = dataset.train.X, dataset.train.y.squeeze()
    X_test, y_test = dataset.test.X, dataset.test.y.squeeze()

    is_classification = config.type == 'classification'
    time_budget = config.max_runtime_seconds
    n_jobs = config.framework_params.get('_n_jobs', config.cores)
    log.info("Running FLAML with {} number of cores".format(config.cores))
    aml = AutoML()

    # Mapping of benchmark metrics to flaml metrics
    metrics_mapping = dict(
        acc='accuracy',
        auc='roc_auc',
        f1='f1',
        logloss='log_loss',
        mae='mae',
        mse='mse',
        rmse='rmse',
        r2='r2',
    )
    perf_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else 'auto'
    if perf_metric is None:
        log.warning("Performance metric %s not supported.", config.metric)

    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith('_')
    }

    log_dir = output_subdir("logs", config)
    flaml_log_file_name = os.path.join(log_dir, "flaml.log")
    with Timer() as training:
        aml.fit(X_train,
                y_train,
                metric=perf_metric,
                task=config.type,
                n_jobs=n_jobs,
                log_file_name=flaml_log_file_name,
                time_budget=time_budget,
                **training_params)

    with Timer() as predict:
        predictions = aml.predict(X_test)
    probabilities = aml.predict_proba(X_test) if is_classification else None
    labels = aml.classes_ if is_classification else None
    return result(
        output_file=config.output_predictions_file,
        probabilities=probabilities,
        predictions=predictions,
        truth=y_test,
        models_count=len(aml.config_history),
        training_duration=training.duration,
        predict_duration=predict.duration,
        probabilities_labels=labels,
    )
Пример #4
0
def test_mlflow():
    import subprocess
    import sys

    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
    import mlflow
    from flaml.data import load_openml_task

    try:
        X_train, X_test, y_train, y_test = load_openml_task(
            task_id=7592, data_dir="test/"
        )
    except (OpenMLServerException, ChunkedEncodingError) as e:
        print(e)
        return
    """ import AutoML class from flaml package """
    from flaml import AutoML

    automl = AutoML()
    settings = {
        "time_budget": 5,  # total running time in seconds
        "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
        "estimator_list": ["lgbm", "rf", "xgboost"],  # list of ML learners
        "task": "classification",  # task type
        "sample": False,  # whether to subsample training data
        "log_file_name": "adult.log",  # flaml log file
    }
    mlflow.set_experiment("flaml")
    with mlflow.start_run() as run:
        automl.fit(X_train=X_train, y_train=y_train, **settings)
        mlflow.sklearn.log_model(automl, "automl")
    loaded_model = mlflow.pyfunc.load_model(f"{run.info.artifact_uri}/automl")
    print(loaded_model.predict(X_test))
    automl._mem_thres = 0
    print(automl.trainable(automl.points_to_evaluate[0]))

    settings["use_ray"] = True
    try:
        with mlflow.start_run() as run:
            automl.fit(X_train=X_train, y_train=y_train, **settings)
            mlflow.sklearn.log_model(automl, "automl")
        automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl")
        print(automl.predict_proba(X_test))
    except ImportError:
        pass
Пример #5
0
 def test_sparse_matrix_classification(self):
     automl_experiment = AutoML()
     automl_settings = {
         "time_budget": 2,
         "metric": "auto",
         "task": "classification",
         "log_file_name": "test/sparse_classification.log",
         "split_type": "uniform",
         "n_jobs": 1,
         "model_history": True,
     }
     X_train = scipy.sparse.random(1554, 21, dtype=int)
     y_train = np.random.randint(3, size=1554)
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl_experiment.classes_)
     print(automl_experiment.predict_proba(X_train))
     print(automl_experiment.model)
     print(automl_experiment.config_history)
     print(automl_experiment.best_model_for_estimator("extra_tree"))
     print(automl_experiment.best_iteration)
     print(automl_experiment.best_estimator)
Пример #6
0
def test_package_minimum():
    # Initialize an AutoML instance
    automl = AutoML()
    # Specify automl goal and constraint
    automl_settings = {
        "time_budget": 10,  # in seconds
        "metric": "accuracy",
        "task": "classification",
        "log_file_name": "iris.log",
    }
    X_train, y_train = load_iris(return_X_y=True)
    # Train with labeled input data
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    # Check that `best_config` is created, the log was created and best model is accessible
    assert hasattr(automl, "best_config")
    assert Path("iris.log").exists()
    assert automl.model is not None
    print(automl.model)
    # Predict and check that the prediction shape is as expected
    preds = automl.predict_proba(X_train)
    assert preds.shape == (150, 3)
    print(preds)
Пример #7
0
 def test_classification(self, as_frame=False):
     automl_experiment = AutoML()
     automl_settings = {
         "time_budget": 4,
         "metric": "accuracy",
         "task": "classification",
         "log_file_name": "test/iris.log",
         "log_training_metric": True,
         "n_jobs": 1,
         "model_history": True,
     }
     X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
     if as_frame:
         # test drop column
         X_train.columns = range(X_train.shape[1])
         X_train[X_train.shape[1]] = np.zeros(len(y_train))
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl_experiment.classes_)
     print(automl_experiment.predict(X_train)[:5])
     print(automl_experiment.model)
     print(automl_experiment.config_history)
     print(automl_experiment.best_model_for_estimator("catboost"))
     print(automl_experiment.best_iteration)
     print(automl_experiment.best_estimator)
     del automl_settings["metric"]
     del automl_settings["model_history"]
     del automl_settings["log_training_metric"]
     automl_experiment = AutoML(task="classification")
     duration = automl_experiment.retrain_from_log(
         log_file_name=automl_settings["log_file_name"],
         X_train=X_train,
         y_train=y_train,
         train_full=True,
         record_id=0,
     )
     print(duration)
     print(automl_experiment.model)
     print(automl_experiment.predict_proba(X_train)[:5])
Пример #8
0
def test_hf_data():
    from flaml import AutoML
    import pandas as pd

    train_data = {
        "sentence1": [
            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
        ],
        "sentence2": [
            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
        ],
        "label": [1, 0, 1, 0],
        "idx": [0, 1, 2, 3],
    }
    train_dataset = pd.DataFrame(train_data)

    dev_data = {
        "sentence1": [
            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
            "The DVD-CCA then appealed to the state Supreme Court .",
        ],
        "sentence2": [
            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
        ],
        "label": [1, 1, 0, 1],
        "idx": [4, 5, 6, 7],
    }
    dev_dataset = pd.DataFrame(dev_data)

    test_data = {
        "sentence1": [
            "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
            "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
            "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
            "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
        ],
        "sentence2": [
            "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
            "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
            "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
            "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
        ],
        "label": [0, 0, 0, 0],
        "idx": [8, 10, 11, 12],
    }
    test_dataset = pd.DataFrame(test_data)

    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"

    X_train = train_dataset[custom_sent_keys]
    y_train = train_dataset[label_key]

    X_val = dev_dataset[custom_sent_keys]
    y_val = dev_dataset[label_key]

    X_test = test_dataset[custom_sent_keys]

    automl = AutoML()

    automl_settings = {
        "gpu_per_trial": 0,
        "max_iter": 3,
        "time_budget": 10,
        "task": "seq-classification",
        "metric": "accuracy",
        "log_file_name": "seqclass.log",
    }

    automl_settings["custom_hpo_args"] = {
        "model_path": "google/electra-small-discriminator",
        "output_dir": "test/data/output/",
        "ckpt_per_epoch": 5,
        "fp16": False,
    }

    try:
        automl.fit(X_train=X_train,
                   y_train=y_train,
                   X_val=X_val,
                   y_val=y_val,
                   **automl_settings)
    except requests.exceptions.HTTPError:
        return

    automl = AutoML()
    automl.retrain_from_log(X_train=X_train,
                            y_train=y_train,
                            train_full=True,
                            record_id=0,
                            **automl_settings)
    with open("automl.pkl", "wb") as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    with open("automl.pkl", "rb") as f:
        automl = pickle.load(f)
    shutil.rmtree("test/data/output/")
    automl.predict(X_test)
    automl.predict(["test test", "test test"])
    automl.predict([
        ["test test", "test test"],
        ["test test", "test test"],
        ["test test", "test test"],
    ])

    automl.predict_proba(X_test)
    print(automl.classes_)
Пример #9
0
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
    from flaml.data import load_openml_dataset

    try:
        X_train, X_test, y_train, y_test = load_openml_dataset(
            dataset_id=1169, data_dir="test/", dataset_format=dataset_format
        )
    except (OpenMLServerException, ChunkedEncodingError) as e:
        print(e)
        return
    """ import AutoML class from flaml package """
    from flaml import AutoML

    automl = AutoML()
    settings = {
        "time_budget": budget,  # total running time in seconds
        "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
        "task": "classification",  # task type
        "log_file_name": "airlines_experiment.log",  # flaml log file
        "seed": 7654321,  # random seed
        "hpo_method": hpo_method,
    }
    """The main flaml automl API"""
    automl.fit(X_train=X_train, y_train=y_train, **settings)
    """ retrieve best config and best learner """
    print("Best ML leaner:", automl.best_estimator)
    print("Best hyperparmeter config:", automl.best_config)
    print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
    print(
        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
    )
    print(automl.model.estimator)
    print("time taken to find best model:", automl.time_to_find_best_model)
    """ pickle and save the automl object """
    import pickle

    with open("automl.pkl", "wb") as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    """ compute predictions of testing dataset """
    y_pred = automl.predict(X_test)
    print("Predicted labels", y_pred)
    print("True labels", y_test)
    y_pred_proba = automl.predict_proba(X_test)[:, 1]
    """ compute different metric values on testing dataset """
    from flaml.ml import sklearn_metric_loss_score

    print("accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test))
    print(
        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
    )
    print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
    from flaml.data import get_output_from_log

    (
        time_history,
        best_valid_loss_history,
        valid_loss_history,
        config_history,
        metric_history,
    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
    for config in config_history:
        print(config)
    print(automl.resource_attr)
    print(automl.max_resource)
    print(automl.min_resource)
    automl.fit(X_train=X_train, y_train=y_train, ensemble=True, **settings)
Пример #10
0
            time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
                get_output_from_log(filename=settings['log_file_name'], time_budget=TIME_BUDGET)
            plt.title(f'Learning Curve - {m}')
            plt.xlabel('Wall Clock Time (s)')
            plt.ylabel('Validation Accuracy')
            plt.scatter(time_history, 1 - np.array(valid_loss_history))
            plt.step(time_history,
                     1 - np.array(best_valid_loss_history),
                     where='post')
            file_path = f'{PLOTS_ROOT}/automl_learning_curve_{m}.png'
            plt.savefig(file_path)
            plt.close()
            mlflow.log_artifact(file_path)

            y_pred = automl.predict(X_test)
            y_prob = automl.predict_proba(X_test)[:, 1]

            model_scores = calculate_model_score(y_test, y_pred)
            mlflow.log_metrics({
                f'accuracy': model_scores['accuracy'],
                f'f1': model_scores['f1'],
                f'f1_micro': model_scores['f1_micro'],
                f'f1_macro': model_scores['f1_macro'],
                f'precision': model_scores['precision'],
                f'recall': model_scores['recall'],
                f'roc_auc': model_scores['roc_auc']
            })

            f1_timestep = calc_score_and_std_per_timestep(
                X_test_df, y_test, y_pred)
            fig, ax = plt.subplots()
def test_mcc():
    from flaml import AutoML
    import requests
    import pandas as pd

    train_data = {
        "video-id": [
            "anetv_fruimvo90vA",
            "anetv_fruimvo90vA",
            "anetv_fruimvo90vA",
            "anetv_MldEr60j33M",
            "lsmdc0049_Hannah_and_her_sisters-69438",
        ],
        "fold-ind": ["10030", "10030", "10030", "5488", "17405"],
        "startphrase": [
            "A woman is seen running down a long track and jumping into a pit. The camera",
            "A woman is seen running down a long track and jumping into a pit. The camera",
            "A woman is seen running down a long track and jumping into a pit. The camera",
            "A man in a white shirt bends over and picks up a large weight. He",
            "Someone furiously shakes someone away. He",
        ],
        "sent1": [
            "A woman is seen running down a long track and jumping into a pit.",
            "A woman is seen running down a long track and jumping into a pit.",
            "A woman is seen running down a long track and jumping into a pit.",
            "A man in a white shirt bends over and picks up a large weight.",
            "Someone furiously shakes someone away.",
        ],
        "sent2": ["The camera", "The camera", "The camera", "He", "He"],
        "gold-source": ["gen", "gen", "gold", "gen", "gold"],
        "ending0": [
            "captures her as well as lifting weights down in place.",
            "follows her spinning her body around and ends by walking down a lane.",
            "watches her as she walks away and sticks her tongue out to another person.",
            "lifts the weights over his head.",
            "runs to a woman standing waiting.",
        ],
        "ending1": [
            "pans up to show another woman running down the track.",
            "pans around the two.",
            "captures her as well as lifting weights down in place.",
            "also lifts it onto his chest before hanging it back out again.",
            "tackles him into the passenger seat.",
        ],
        "ending2": [
            "follows her movements as the group members follow her instructions.",
            "captures her as well as lifting weights down in place.",
            "follows her spinning her body around and ends by walking down a lane.",
            "spins around and lifts a barbell onto the floor.",
            "pounds his fist against a cupboard.",
        ],
        "ending3": [
            "follows her spinning her body around and ends by walking down a lane.",
            "follows her movements as the group members follow her instructions.",
            "pans around the two.",
            "bends down and lifts the weight over his head.",
            "offers someone the cup on his elbow and strides out.",
        ],
        "label": [1, 3, 0, 0, 2],
    }
    dev_data = {
        "video-id": [
            "lsmdc3001_21_JUMP_STREET-422",
            "lsmdc0001_American_Beauty-45991",
            "lsmdc0001_American_Beauty-45991",
            "lsmdc0001_American_Beauty-45991",
        ],
        "fold-ind": ["11783", "10977", "10970", "10968"],
        "startphrase": [
            "Firing wildly he shoots holes through the tanker. He",
            "He puts his spatula down. The Mercedes",
            "He stands and looks around, his eyes finally landing on: "
            "The digicam and a stack of cassettes on a shelf. Someone",
            "He starts going through someone's bureau. He opens the drawer "
            "in which we know someone keeps his marijuana, but he",
        ],
        "sent1": [
            "Firing wildly he shoots holes through the tanker.",
            "He puts his spatula down.",
            "He stands and looks around, his eyes finally landing on: "
            "The digicam and a stack of cassettes on a shelf.",
            "He starts going through someone's bureau.",
        ],
        "sent2": [
            "He",
            "The Mercedes",
            "Someone",
            "He opens the drawer in which we know someone keeps his marijuana, but he",
        ],
        "gold-source": ["gold", "gold", "gold", "gold"],
        "ending0": [
            "overtakes the rig and falls off his bike.",
            "fly open and drinks.",
            "looks at someone's papers.",
            "stops one down and rubs a piece of the gift out.",
        ],
        "ending1": [
            "squeezes relentlessly on the peanut jelly as well.",
            "walks off followed driveway again.",
            "feels around it and falls in the seat once more.",
            "cuts the mangled parts.",
        ],
        "ending2": [
            "scrambles behind himself and comes in other directions.",
            "slots them into a separate green.",
            "sprints back from the wreck and drops onto his back.",
            "hides it under his hat to watch.",
        ],
        "ending3": [
            "sweeps a explodes and knocks someone off.",
            "pulls around to the drive - thru window.",
            "sits at the kitchen table, staring off into space.",
            "does n't discover its false bottom.",
        ],
        "label": [0, 3, 3, 3],
    }
    test_data = {
        "video-id": [
            "lsmdc0001_American_Beauty-45991",
            "lsmdc0001_American_Beauty-45991",
            "lsmdc0001_American_Beauty-45991",
            "lsmdc0001_American_Beauty-45991",
        ],
        "fold-ind": ["10980", "10976", "10978", "10969"],
        "startphrase": [
            "Someone leans out of the drive - thru window, "
            "grinning at her, holding bags filled with fast food. The Counter Girl",
            "Someone looks up suddenly when he hears. He",
            "Someone drives; someone sits beside her. They",
            "He opens the drawer in which we know someone "
            "keeps his marijuana, but he does n't discover"
            " its false bottom. He stands and looks around, his eyes",
        ],
        "sent1": [
            "Someone leans out of the drive - thru "
            "window, grinning at her, holding bags filled with fast food.",
            "Someone looks up suddenly when he hears.",
            "Someone drives; someone sits beside her.",
            "He opens the drawer in which we know"
            " someone keeps his marijuana, but he does n't discover its false bottom.",
        ],
        "sent2": [
            "The Counter Girl",
            "He",
            "They",
            "He stands and looks around, his eyes",
        ],
        "gold-source": ["gold", "gold", "gold", "gold"],
        "ending0": [
            "stands next to him, staring blankly.",
            "puts his spatula down.",
            "rise someone's feet up.",
            "moving to the side, the houses rapidly stained.",
        ],
        "ending1": [
            "with auditorium, filmed, singers the club.",
            "bumps into a revolver and drops surreptitiously into his weapon.",
            "lift her and they are alarmed.",
            "focused as the sight of someone making his way down a trail.",
        ],
        "ending2": [
            "attempts to block her ransacked.",
            "talks using the phone and walks away for a few seconds.",
            "are too involved with each other to "
            "notice someone watching them from the drive - thru window.",
            "finally landing on: the digicam and a stack of cassettes on a shelf.",
        ],
        "ending3": [
            "is eating solid and stinky.",
            "bundles the flaxen powder beneath the car.",
            "sit at a table with a beer from a table.",
            "deep and continuing, its bleed - length sideburns pressing on him.",
        ],
        "label": [0, 0, 2, 2],
    }

    train_dataset = pd.DataFrame(train_data)
    dev_dataset = pd.DataFrame(dev_data)
    test_dataset = pd.DataFrame(test_data)

    custom_sent_keys = [
        "sent1",
        "sent2",
        "ending0",
        "ending1",
        "ending2",
        "ending3",
        "gold-source",
        "video-id",
        "startphrase",
        "fold-ind",
    ]
    label_key = "label"

    X_train = train_dataset[custom_sent_keys]
    y_train = train_dataset[label_key]

    X_val = dev_dataset[custom_sent_keys]
    y_val = dev_dataset[label_key]

    X_test = test_dataset[custom_sent_keys]
    X_true = test_dataset[label_key]
    automl = AutoML()

    automl_settings = {
        "gpu_per_trial": 0,
        "max_iter": 2,
        "time_budget": 5,
        "task": "multichoice-classification",
        "metric": "accuracy",
        "log_file_name": "seqclass.log",
    }

    automl_settings["custom_hpo_args"] = {
        "model_path": "google/electra-small-discriminator",
        "output_dir": "test/data/output/",
        "ckpt_per_epoch": 1,
        "fp16": False,
    }

    try:
        automl.fit(X_train=X_train,
                   y_train=y_train,
                   X_val=X_val,
                   y_val=y_val,
                   **automl_settings)
    except requests.exceptions.HTTPError:
        return

    y_pred = automl.predict(X_test)
    proba = automl.predict_proba(X_test)
    print(str(len(automl.classes_)) + " classes")
    print(y_pred)
    print(X_true)
    print(proba)
    true_count = 0
    for i, v in X_true.items():
        if y_pred[i] == v:
            true_count += 1
    accuracy = round(true_count / len(y_pred), 5)
    print("Accuracy: " + str(accuracy))
Пример #12
0
def _test_hf_data():
    from flaml import AutoML
    import requests
    from datasets import load_dataset

    try:
        train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas()
        dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas()
        test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas()
    except requests.exceptions.ConnectionError:
        return

    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"

    X_train = train_dataset[custom_sent_keys]
    y_train = train_dataset[label_key]

    X_val = dev_dataset[custom_sent_keys]
    y_val = dev_dataset[label_key]

    X_test = test_dataset[custom_sent_keys]

    automl = AutoML()

    automl_settings = {
        "gpu_per_trial": 1,
        "max_iter": 2,
        "time_budget": 5000,
        "task": "seq-classification",
        "metric": "accuracy",
        "log_file_name": "seqclass.log",
        "use_ray": True,
    }

    automl_settings["custom_hpo_args"] = {
        "model_path": "facebook/muppet-roberta-base",
        "output_dir": "test/data/output/",
        "ckpt_per_epoch": 5,
        "fp16": True,
    }

    automl.fit(
        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
    )

    automl = AutoML()
    automl.retrain_from_log(
        X_train=X_train,
        y_train=y_train,
        train_full=True,
        record_id=0,
        **automl_settings
    )
    with open("automl.pkl", "wb") as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    with open("automl.pkl", "rb") as f:
        automl = pickle.load(f)
    shutil.rmtree("test/data/output/")
    automl.predict(X_test)
    automl.predict(["test test", "test test"])
    automl.predict(
        [
            ["test test", "test test"],
            ["test test", "test test"],
            ["test test", "test test"],
        ]
    )

    automl.predict_proba(X_test)
    print(automl.classes_)