Пример #1
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc)
    y_train, y_test = dataset.train.y_enc, dataset.test.y_enc
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test))

    def process_results(results):
        if results.probabilities is not None and not results.probabilities.shape:  # numpy load always return an array
            prob_format = results.probabilities.item()
            if prob_format == "predictions":
                target_values_enc = dataset.target.label_encoder.transform(
                    dataset.target.values)
                results.probabilities = Encoder(
                    'one-hot', target=False,
                    encoded_type=float).fit(target_values_enc).transform(
                        results.predictions)
            else:
                raise ValueError(
                    f"Unknown probabilities format: {prob_format}")
        return results

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config,
                       process_results=process_results)
Пример #2
0
def run(dataset: Dataset, config: TaskConfig):
    log.info(f"\n**** Decision Tree [sklearn v{sklearn.__version__}] ****\n")

    is_classification = config.type == 'classification'

    X_train, X_test = impute_array(*unsparsify(dataset.train.X_enc, dataset.test.X_enc, fmt='array'))
    y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc, fmt='array')

    estimator = DecisionTreeClassifier if is_classification else DecisionTreeRegressor
    predictor = estimator(random_state=config.seed, **config.framework_params)

    with Timer() as training:
        predictor.fit(X_train, y_train)
    with Timer() as predict:
        predictions = predictor.predict(X_test)
    probabilities = predictor.predict_proba(X_test) if is_classification else None

    save_predictions(dataset=dataset,
                     output_file=config.output_predictions_file,
                     probabilities=probabilities,
                     predictions=predictions,
                     truth=y_test,
                     target_is_encoded=is_classification)

    return dict(
        models_count=1,
        training_duration=training.duration,
        predict_duration=predict.duration
    )
Пример #3
0
def run(dataset: Dataset, config: TaskConfig):
    log.info(
        f"\n**** Gradient Boosting [sklearn v{sklearn.__version__}] ****\n")

    is_classification = config.type == 'classification'

    X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc)
    y_train, y_test = dataset.train.y, dataset.test.y

    estimator = GradientBoostingClassifier if is_classification else GradientBoostingRegressor
    predictor = estimator(random_state=config.seed, **config.framework_params)

    with Timer() as training:
        predictor.fit(X_train, y_train)
    predictions = predictor.predict(X_test)
    probabilities = predictor.predict_proba(
        X_test) if is_classification else None

    save_predictions(dataset=dataset,
                     output_file=config.output_predictions_file,
                     probabilities=probabilities,
                     predictions=predictions,
                     truth=y_test)

    return dict(models_count=1, training_duration=training.duration)
Пример #4
0
def run(dataset: Dataset, config: TaskConfig):
    from amlb.datautils import impute_array
    from frameworks.shared.caller import run_in_venv

    X_train_enc, X_test_enc = impute_array(dataset.train.X_enc,
                                           dataset.test.X_enc)
    data = dict(train=dict(X_enc=X_train_enc, y_enc=dataset.train.y_enc),
                test=dict(X_enc=X_test_enc, y_enc=dataset.test.y_enc))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
Пример #5
0
def run(dataset: Dataset, config: TaskConfig):
    from amlb.datautils import impute_array
    from frameworks.shared.caller import run_in_venv

    encode = config.framework_params.get('_encode', True)
    X_train, X_test = impute_array(
        dataset.train.X_enc,
        dataset.test.X_enc) if encode else (dataset.train.X, dataset.test.X)
    y_train, y_test = (dataset.train.y_enc,
                       dataset.test.y_enc) if encode else (dataset.train.y,
                                                           dataset.test.y)
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)