示例#1
0
def run_training():
    """Train the model"""

    #Read Data
    data = load_dataset()
    #Training pipeline
    pipeline.fake_news_pipeline.fit(data["traducción"].values,
                                    data["fake"].values)
    save_pipeline(pipeline_to_save=pipeline.fake_news_pipeline)
示例#2
0
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_input = test_data[0:1]

    # When
    subject = make_prediction(input_data=single_test_input)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions')[0], float)
    assert math.ceil(subject.get('predictions')[0]) == 112476
示例#3
0
def run_training():
    """Train the model."""
    # read in data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)
    # clean the target variable
    data = clean_target_variable(data)
    # split training & validation data
    X_train, X_val, y_train, y_val = split_dataset(data)
    print('...fitting model')
    pipeline.interest_rate_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"...saving model version: {_version}")
    save_pipeline(pipeline.interest_rate_pipe)
示例#4
0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_input = test_data

    # When
    subject = make_prediction(input_data=multiple_test_input)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) == 1451

    # We expect some rows to be filtered out
    assert len(subject.get('predictions')) != original_data_length
示例#5
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # transform the target
    data[config.TARGET] = data[config.PRE_TARGET].map({
        'Fully Paid': 1,
        'Charged Off': 0
    })
    data = data.drop(config.PRE_TARGET, axis=1)

    # divide train and test, set random_state
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.1,
                                                        random_state=0)

    # Set MLFlow experiment
    mlflow.set_experiment(config.EXPERIMENT_NAME)

    with mlflow.start_run():
        loan_pipe.fit(X_train[config.FEATURES], y_train)

        # Log dataset
        dataset_full_path_name = dataset_location()
        mlflow.set_tag("dataset", dataset_full_path_name)

        # Log parameters
        mlflow.log_param("penalty", model_config.PENALTY)
        mlflow.log_param("dual", model_config.DUAL)
        mlflow.log_param("C", model_config.C)
        mlflow.log_param("fit_intercept", model_config.FIT_INTERCEPT)
        mlflow.log_param("random_state", model_config.RANDOM_STATE)
        mlflow.log_param("class_weight", model_config.CLASS_WEIGHT)
        mlflow.log_param("max_iter", model_config.MAX_ITER)
        mlflow.log_param("multi_class", model_config.MULTI_CLASS)

        # Log the metrics
        score = loan_pipe.score(X_train, y_train)
        mlflow.log_metric("score", score)

        # Save the sklearn pipeline as mlflow model
        conda_env = mlflow.sklearn.get_default_conda_env()
        mlflow.sklearn.log_model(loan_pipe,
                                 "sklearn_pipeline",
                                 conda_env=conda_env)
示例#6
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    save_pipeline(pipeline_to_persist=pipeline.price_pipe)